/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* FMD Transport Subsystem
*
* A transport module uses some underlying mechanism to transport events.
* This mechanism may use any underlying link-layer protocol and may support
* additional link-layer packets unrelated to FMA. Some appropriate link-
* layer mechanism to create the underlying connection is expected to be
* called prior to calling fmd_xprt_open() itself. Alternatively, a transport
* may be created in the suspended state by specifying the FMD_XPRT_SUSPENDED
* flag as part of the call to fmd_xprt_open(), and then may be resumed later.
* The underlying transport mechanism is *required* to provide ordering: that
* is, the sequences of bytes written across the transport must be read by
* the remote peer in the order that they are written, even across separate
* calls to fmdo_send(). As an example, the Internet TCP protocol would be
* a valid transport as it guarantees ordering, whereas the Internet UDP
* protocol would not because UDP datagrams may be delivered in any order
* as a result of delays introduced when datagrams pass through routers.
*
* Similar to sending events, a transport module receives events that are from
* its peer remote endpoint using some transport-specific mechanism that is
* unknown to FMD. As each event is received, the transport module is
* responsible for constructing a valid nvlist_t object from the data and then
* calling fmd_xprt_post() to post the event to the containing FMD's dispatch
* queue, making it available to all local modules that are not transport
* modules that have subscribed to the event.
*
* The following state machine is used for each transport. The initial state
* is either SYN, ACK, or RUN, depending on the flags specified to xprt_create.
*
* FMD_XPRT_ACCEPT !FMD_XPRT_ACCEPT
* | |
* waiting +--v--+ +--v--+ waiting
* for syn | SYN |--+ --+| ACK | for ack
* event +-----+ \ / +-----+ event
* | \ / |
* drop all +--v--+ X +--v--+ send subscriptions,
* events | ERR |<---+ +--->| SUB | recv subscriptions,
* +-----+ +-----+ wait for run event
* ^ |
* | +-----+ |
* +-----| RUN |<----+
* +--^--+
* |
* FMD_XPRT_RDONLY
*
* When fmd_xprt_open() is called without FMD_XPRT_ACCEPT, the Common Transport
* Layer enqueues a "syn" event for the module in its event queue and sets the
* state to ACK. In state ACK, we are waiting for the transport to get an
* "ack" event and call fmd_xprt_post() on this event. Other events will be
* discarded. If an "ack" is received, we transition to state SUB. If a
* configurable timeout occurs or if the "ack" is invalid (e.g. invalid version
* exchange), we transition to state ERR. Once in state ERR, no further
* operations are valid except fmd_xprt_close() and fmd_xprt_error() will
* return a non-zero value to the caller indicating the transport has failed.
*
* When fmd_xprt_open() is called with FMD_XPRT_ACCEPT, the Common Transport
* Layer assumes this transport is being used to accept a virtual connection
* from a remote peer that is sending a "syn", and sets the initial state to
* SYN. In this state, the transport waits for a "syn" event, validates it,
* and then transitions to state SUB if it is valid or state ERR if it is not.
*
* Once in state SUB, the transport module is expected to receive a sequence of
* zero or more "subscribe" events from the remote peer, followed by a "run"
* event. Once in state RUN, the transport is active and any events can be
* sent or received. The transport module is free to call fmd_xprt_close()
* from any state. The fmd_xprt_error() function will return zero if the
* transport is not in the ERR state, or non-zero if it is in the ERR state.
*
* Once the state machine reaches RUN, other FMA protocol events can be sent
* and received across the transport in addition to the various control events.
*
* Table of Common Transport Layer Control Events
* ==============================================
*
* FMA Class Payload
* --------- -------
* resource.fm.xprt.uuclose string (uuid of case)
* resource.fm.xprt.uuresolved string (uuid of case)
* resource.fm.xprt.updated string (uuid of case)
* resource.fm.xprt.subscribe string (class pattern)
* resource.fm.xprt.unsubscribe string (class pattern)
* resource.fm.xprt.unsuback string (class pattern)
* resource.fm.xprt.syn version information
* resource.fm.xprt.ack version information
* resource.fm.xprt.run version information
*
* Control events are used to add and delete proxy subscriptions on the remote
* transport peer module, and to set up connections. When a "syn" event is
* sent, FMD will include in the payload the highest version of the FMA event
* protocol that is supported by the sender. When a "syn" event is received,
* the receiving FMD will use the minimum of this version and its version of
* the protocol, and reply with this new minimum version in the "ack" event.
* The receiver will then use this new minimum for subsequent event semantics.
*/
#include <strings.h>
#include <limits.h>
#include <fmd_alloc.h>
#include <fmd_error.h>
#include <fmd_conf.h>
#include <fmd_subr.h>
#include <fmd_string.h>
#include <fmd_protocol.h>
#include <fmd_thread.h>
#include <fmd_eventq.h>
#include <fmd_dispq.h>
#include <fmd_ctl.h>
#include <fmd_log.h>
#include <fmd_ustat.h>
#include <fmd_case.h>
#include <fmd_api.h>
#include <fmd_fmri.h>
#include <fmd_asru.h>
#include <fmd_xprt.h>
#include <fmd.h>
/*
* The states shown above in the transport state machine diagram are encoded
* using arrays of class patterns and a corresponding action function. These
* arrays are then passed to fmd_xprt_transition() to change transport states.
*/
{ "resource.fm.xprt.syn", fmd_xprt_event_syn },
{ "*", fmd_xprt_event_error },
};
{ "resource.fm.xprt.ack", fmd_xprt_event_ack },
{ "*", fmd_xprt_event_error },
};
{ "*", fmd_xprt_event_drop },
};
{ "resource.fm.xprt.subscribe", fmd_xprt_event_sub },
{ "resource.fm.xprt.run", fmd_xprt_event_run },
{ "resource.fm.xprt.*", fmd_xprt_event_error },
{ "*", fmd_xprt_event_drop },
};
{ "resource.fm.xprt.subscribe", fmd_xprt_event_sub },
{ "resource.fm.xprt.unsubscribe", fmd_xprt_event_unsub },
{ "resource.fm.xprt.unsuback", fmd_xprt_event_unsuback },
{ "resource.fm.xprt.uuclose", fmd_xprt_event_uuclose },
{ "resource.fm.xprt.uuresolved", fmd_xprt_event_uuresolved },
{ "resource.fm.xprt.updated", fmd_xprt_event_updated },
{ "resource.fm.xprt.*", fmd_xprt_event_error },
};
/*
* Template for per-transport statistics installed by fmd on behalf of each
* transport. These are used to initialize the per-transport xi_stats. For
* each statistic, the name is prepended with "fmd.xprt.%u", where %u is the
* transport ID (xi_id) and then are inserted into the per-module stats hash.
* The values in this array must match fmd_xprt_stat_t from <fmd_xprt.h>.
*/
{
},
};
static void
{
}
static void
{
uint_t i;
for (i = 0; i < xch->xch_hashlen; i++) {
}
}
}
/*
* Insert the specified class into the specified class hash, and return the
* reference count. A return value of one indicates this is the first insert.
* If an eventq is associated with the hash, insert a dispq subscription for it.
*/
static uint_t
{
}
}
/*
* Delete the specified class from the specified class hash, and return the
* reference count. A return value of zero indicates the class was deleted.
* If an eventq is associated with the hash, delete the dispq subscription.
*/
static uint_t
{
break;
else
}
return (-1U); /* explicitly permit an invalid delete */
return (0);
}
/*
* Queue subscribe events for the specified transport corresponding to all of
* the active module subscriptions. This is an extremely heavyweight operation
* that we expect to take place rarely (i.e. when loading a transport module
* or when it establishes a connection). We lock all of the known modules to
* prevent them from adding or deleting subscriptions, then snapshot their
* subscriptions, and then unlock all of the modules. We hold the modhash
* lock for the duration of this operation to prevent new modules from loading.
*/
static void
{
uint_t i, j;
for (i = 0; i < mhp->mh_hashlen; i++) {
}
for (i = 0; i < mhp->mh_hashlen; i++) {
}
}
for (i = 0; i < mhp->mh_hashlen; i++) {
}
}
static void
{
fmd_event_t *e;
char *s;
/*
* If we've reached the SUB state, take out the big hammer and snapshot
* all of the subscriptions of all of the loaded modules. Then queue a
* run event for our remote peer indicating that it can enter RUN.
*/
if (state == _fmd_xprt_state_sub) {
/*
* For read-write transports, we always want to set up remote
* subscriptions to the bultin list.* events, regardless of
* whether any agents have subscribed to them.
*/
}
}
}
static void
{
}
static int
{
return (1);
}
return (1);
}
return (0);
}
void
{
fmd_event_t *e;
char *class;
return; /* transitioned to error state */
/*
* If the transport module didn't specify an authority, extract the
* one that is passed along with the xprt.syn event and use that.
*/
}
}
void
{
return; /* transitioned to error state */
/*
* If the transport module didn't specify an authority, extract the
* one that is passed along with the xprt.syn event and use that.
*/
}
}
/*
* Upon transition to RUN, we take every solved case and resend a list.suspect
* event for it to our remote peer. If a case transitions from solved to a
* future state (CLOSE_WAIT, CLOSED, or REPAIRED) while we are iterating over
* the case hash, we will get it as part of examining the resource cache, next.
*/
static void
{
fmd_event_t *e;
char *class;
return;
}
/*
* Similar to the above function, but for use with readonly transport. Puts
* the event on the module's queue such that it's fmdo_recv function can pick
* it up and send it if appropriate.
*/
static void
{
fmd_event_t *e;
char *class;
return;
}
void
{
}
}
void
{
char *class;
return; /* transitioned to error state */
return; /* malformed protocol event */
}
void
{
fmd_event_t *e;
char *class;
return; /* transitioned to error state */
return; /* malformed protocol event */
}
void
{
char *class;
return; /* transitioned to error state */
return; /* malformed protocol event */
}
/*
* on diagnosing side, receive a uuclose from the proxy.
*/
void
{
char *uuid;
return; /* transitioned to error state */
/*
* update resource cache status and transition case
*/
}
}
/*
* on diagnosing side, receive a uuresolved from the proxy.
*/
void
{
char *uuid;
return; /* transitioned to error state */
}
}
/*
*/
void
{
char *uuid;
return; /* transitioned to error state */
/*
* Only update status with new repairs if "no remote repair"
* is not set. Do the case_update anyway though (as this will
* refresh the status on the proxy side).
*/
nelem != 0) {
(void) nvlist_lookup_uint8_array(nvl,
&nelem);
proxy_asrup, NULL);
}
}
}
}
void
{
}
void
{
}
{
fmd_event_t *e;
char *s;
/*
* Grab fmd.d_xprt_lock to block fmd_xprt_suspend_all() and then create
* a transport ID and make it visible in fmd.d_xprt_ids. If transports
* were previously suspended, set the FMD_XPRT_DSUSPENDED flag on us to
* ensure that this transport will not run until fmd_xprt_resume_all().
*/
if (fmd.d_xprt_suspend != 0)
/*
* If the module has not yet finished _fmd_init(), set the ISUSPENDED
* bit so that fmdo_send() is not called until _fmd_init() completes.
*/
/*
* Initialize the transport statistics that we keep on behalf of fmd.
* These are set up using a template defined at the top of this file.
* We rename each statistic with a prefix ensuring its uniqueness.
*/
for (i = 0; i < statc; i++) {
}
/*
* Create the outbound eventq for this transport and link to its stats.
* If any suspend bits were set above, suspend the eventq immediately.
*/
/*
* Create our subscription hashes: local subscriptions go to xi_queue,
* remote subscriptions are tracked only for protocol requests, and
*/
/*
* Determine our initial state based upon the creation flags. If we're
* read-only, go directly to RUN. If we're accepting a new connection,
* wait for a SYN. Otherwise send a SYN and wait for an ACK.
*/
/*
* Send the list.suspects across here for readonly transports.
* For read-write transport they will be sent on transition to
* RUN state in fmd_xprt_event_run().
*/
} else if (flags & FMD_XPRT_ACCEPT)
else
/*
* If client.xprtlog is set to TRUE, create a debugging log for the
*/
if (i) {
}
/*
* If this is a read-only transport, return without creating a send
* queue thread and setting up any connection events in our queue.
*/
goto out;
/*
* Once the transport is fully initialized, create a send queue thread
* and start any connect events flowing to complete our initialization.
*/
(void) fmd_set_errno(EFMD_XPRT_THR);
return (NULL);
}
/*
* If the transport is not being opened to accept an inbound connect,
* start an outbound connection by enqueuing a SYN event for our peer.
*/
if (!(flags & FMD_XPRT_ACCEPT)) {
}
out:
return ((fmd_xprt_t *)xip);
}
void
{
uint_t i, n;
/*
* Remove the transport from global visibility, cancel its send-side
* thread, join with it, and then remove the transport from module
* visibility. Once all this is done, destroy and free the transport.
*/
}
/*
* Release every case handle in the module that was cached by this
* transport. This will result in these cases disappearing from the
* local case hash so that fmd_case_uuclose() and fmd_case_repaired()
* etc can no longer be used.
*/
}
/*
* Destroy every class in the various subscription hashes and remove
* any corresponding subscriptions from the event dispatch queue.
*/
/*
* Uniquify the stat names exactly as was done in fmd_xprt_create()
* before calling fmd_ustat_insert(), otherwise fmd_ustat_delete()
* won't find the entries in the hash table.
*/
n = sizeof (_fmd_xprt_stat_tmpl) / sizeof (fmd_stat_t);
for (i = 0; i < n; i++) {
}
}
void
{
}
void
{
}
void
{
int err;
if (FMD_EVENT_TTL(ep) == 0) {
continue;
}
if (err == FMD_SEND_RETRY) {
}
}
}
}
/*
* This function creates a local suspect list. This is used when a suspect list
* is created directly by an external source like fminject.
*/
static void
{
return;
}
/*
* copy diag_code if present
*/
}
/*
* copy suspects
*/
&nelem);
for (i = 0; i < nelem; i++) {
int err;
/*
* If no fru specified, get it from topo
*/
/*
* If no asru specified, get it from topo
*/
/*
* If no location specified, get it from topo
*/
&loc) != 0) {
(void) nvlist_add_string(flt_copy,
&err) == 0)
(void) nvlist_add_string(flt_copy,
if (loc)
}
}
/*
* copy diag_time if present
*/
/*
* copy DE fmri if present
*/
}
/*
* copy injected if present
*/
}
/*
* This function is called to create a proxy case on receipt of a list.suspect
* from the diagnosing side of the transport.
*/
static void
{
char *class;
int err;
int got_proxy_asru = 0;
int got_hc_rsrc = 0;
int got_hc_asru = 0;
int got_present_rsrc = 0;
char *scheme;
int need_update = 0;
return;
return;
&nelem);
/*
* In order to implement FMD_XPRT_HCONLY and FMD_XPRT_HC_PRESENT_ONLY
* etc we first scan the suspects to see if
* - there was an asru in the received fault
* - there was an hc-scheme resource in the received fault
* - any hc-scheme resource in the received fault is present in the
* local topology
* - any hc-scheme resource in the received fault has an asru in the
* local topology
*/
if (nelem > 0) {
for (i = 0; i < nelem; i++) {
diag_asru[i] = 1;
continue;
/*
* If there is an hc-scheme asru, use that to find the
* real asru. Otherwise if there is an hc-scheme
* resource, work out the old asru from that.
* This order is to allow a two stage evaluation
* of the asru where a fault in the diagnosing side
* is in a component not visible to the proxy side,
* but prevents a component that is visible from
* working. So the diagnosing side sets the asru to
* the latter component (in hc-scheme as the diagnosing
* side doesn't know about the proxy side's virtual
* schemes), and then the proxy side can convert that
* to a suitable virtual scheme asru.
*/
&scheme) == 0 &&
got_hc_asru = 1;
continue;
got_present_rsrc = 1;
&err) == 0) {
proxy_asru[i] =
got_proxy_asru = 1;
}
} else if (nvlist_lookup_nvlist(nvlp[i],
&scheme) == 0 &&
got_hc_rsrc = 1;
continue;
got_present_rsrc = 1;
&err) == 0) {
proxy_asru[i] =
got_proxy_asru = 1;
}
}
}
}
/*
* If we're set up only to report hc-scheme faults, and
* there aren't any, then just drop the event.
*/
if (got_hc_rsrc == 0 && got_hc_asru == 0 &&
if (nelem > 0) {
}
return;
}
/*
* If we're set up only to report locally present hc-scheme
* faults, and there aren't any, then just drop the event.
*/
if (got_present_rsrc == 0 &&
if (nelem > 0) {
for (i = 0; i < nelem; i++)
nvlist_free(asrua[i]);
}
return;
}
/*
* If fmd_case_recreate() returns NULL, UUID is already known.
*/
if (nelem > 0) {
for (i = 0; i < nelem; i++)
nvlist_free(asrua[i]);
}
return;
}
for (i = 0; i < nelem; i++) {
if (proxy_asru[i] != FMD_PROXY_ASRU_NOT_NEEDED) {
/*
* the original asru was hc-scheme use that as resource.
*/
if (proxy_asru[i] == FMD_PROXY_ASRU_FROM_ASRU) {
(void) nvlist_remove(flt_copy,
(void) nvlist_lookup_nvlist(flt_copy,
FM_FAULT_ASRU, &asru);
(void) nvlist_add_nvlist(flt_copy,
}
asrua[i]);
nvlist_free(asrua[i]);
} else if (got_hc_asru == 0 &&
/*
* If we have an asru from diag side, but it's not
* in hc scheme, then we can't be sure what it
* represents, so mark as no retire.
*/
(void) nvlist_add_boolean_value(flt_copy,
}
}
/*
* copy diag_time
*/
/*
* copy DE fmri
*/
}
/*
* copy injected if present
*/
/*
* Transition to solved. This will log the suspect list and create
* the resource cache entries.
*/
/*
* Update status if it is not simply "all faulty" (can happen if
* list.suspects are being re-sent when the transport has reconnected).
*/
&nelem);
for (i = 0; i < nelem; i++) {
need_update = 1;
}
if (need_update) {
cip->ci_diag_asru);
}
/*
* if asru on proxy side, send an update back to the diagnosing side to
*/
if (got_proxy_asru)
if (nelem > 0)
}
void
{
fmd_event_t *e;
uint_t n;
/*
* Grab the transport lock and set the busy flag to indicate we are
* busy receiving an event. If [DI]SUSPEND is pending, wait until fmd
* resumes the transport before continuing on with the receive.
*/
return; /* fmd_destroy() is in progress */
}
}
goto done;
}
if (isireport) {
char *pri;
ishvireport = 1;
} else {
ishvireport = 0;
}
}
/*
* The logonly flag should only be set for ereports.
*/
"logonly flag is not valid for class %s",
goto done;
}
/*
* If a time-to-live value is present in the event and is zero, drop
* the event and bump xs_timeouts. Otherwise decrement the TTL value.
*/
if (ttl == 0) {
"timeout: event received with ttl=0\n",
goto done;
}
}
/*
* If we are using the native system clock, the underlying transport
* code can provide a tighter event time bound by telling us when the
* event was enqueued. If we're using simulated clocks, this time
* has no meaning to us, so just reset the value to use HRT_NOW.
*/
hrt = FMD_HRT_NOW;
/*
* If an event's class is in the FMD_CTL_CLASS family, then create a
* control event. If a FMD_EVN_TOD member is found, create a protocol
* event using this time. Otherwise create a protocol event using hrt.
*/
if (isproto == FMD_B_FALSE)
else {
}
/*
* If the debug log is enabled, create a temporary event, log it to the
* debug log, and then reset the underlying state of the event.
*/
}
}
/*
* Iterate over the rules for the current state trying to match the
* event class to one of our special rules. If a rule is matched, the
* event is consumed and not dispatched to other modules. If the rule
* set ends without matching an event, we fall through to dispatching.
*/
fmd_event_hold(e);
fmd_event_rele(e);
goto done;
}
}
/*
* Record ereports and ireports in the log. This code will
* be replaced later with a per-transport intent log instead.
*/
issysevent == B_TRUE) {
if (isereport == FMD_B_TRUE) {
} else {
if (ishvireport || issysevent) {
} else {
}
}
(void) pthread_rwlock_rdlock(lockp);
(void) pthread_rwlock_unlock(lockp);
}
/*
* If a list.suspect event is received, create a case for the specified
* UUID in the case hash, with the transport module as its owner.
*/
else
fmd_event_hold(e);
fmd_event_rele(e);
goto done;
}
/*
* If a list.updated or list.repaired event is received, update the
* resource cache status and the local case.
*/
}
}
fmd_event_hold(e);
fmd_event_rele(e);
goto done;
}
/*
* If a list.isolated event is received, update resource cache status
*/
}
fmd_event_hold(e);
fmd_event_rele(e);
goto done;
}
/*
* If a list.resolved event is received, resolve the local case.
*/
}
fmd_event_hold(e);
fmd_event_rele(e);
goto done;
}
/*
* Don't proxy ereports on an EXTERNAL transport - we won't
* know how to diagnose them with the wrong topology. Note
* order for it to be freed.
*/
fmd_event_hold(e);
fmd_event_rele(e);
} else if (isproto == FMD_B_TRUE)
else
done:
}
void
{
fmd_event_t *e;
char *s;
return; /* read-only transports do not proxy uuclose */
}
/*
* On proxy side, send back uuresolved request to diagnosing side
*/
void
{
fmd_event_t *e;
char *s;
return; /* read-only transports do not proxy uuresolved */
}
/*
*/
void
{
fmd_event_t *e;
char *s;
return; /* read-only transports do not support remote repairs */
}
/*
* Insert the specified class into our remote subscription hash. If the class
* is already present, bump the reference count; otherwise add it to the hash
* and then enqueue an event for our remote peer to proxy our subscription.
*/
void
{
fmd_event_t *e;
char *s;
return; /* read-only transports do not proxy subscriptions */
return; /* transport is not yet an active subscriber */
if (refs > 1)
return; /* we've already asked our peer for this subscription */
}
/*
* Delete the specified class from the remote subscription hash. If the
* reference count drops to zero, ask our remote peer to unsubscribe by proxy.
*/
void
{
fmd_event_t *e;
char *s;
return; /* read-only transports do not proxy subscriptions */
return; /* transport is not yet an active subscriber */
/*
* If the subscription reference count drops to zero in xi_rsub, insert
* an entry into the xi_usub hash indicating we await an unsuback event.
*/
if (refs != 0)
return; /* other subscriptions for this class still active */
}
static void
{
}
}
void
{
}
static void
{
}
}
void
{
}
/*ARGSUSED*/
static void
{
}
}
void
fmd_xprt_suspend_all(void)
{
if (fmd.d_xprt_suspend++ != 0) {
return; /* already suspended */
}
}
/*ARGSUSED*/
static void
{
}
}
void
fmd_xprt_resume_all(void)
{
if (fmd.d_xprt_suspend == 0)
fmd_panic("fmd_xprt_suspend/resume_all mismatch\n");
if (--fmd.d_xprt_suspend != 0) {
return; /* not ready to be resumed */
}
}