/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <dirent.h>
#include <limits.h>
#include <unistd.h>
#include <alloca.h>
#include <stddef.h>
#include <fmd_alloc.h>
#include <fmd_string.h>
#include <fmd_error.h>
#include <fmd_subr.h>
#include <fmd_protocol.h>
#include <fmd_event.h>
#include <fmd_conf.h>
#include <fmd_fmri.h>
#include <fmd_dispq.h>
#include <fmd_case.h>
#include <fmd_module.h>
#include <fmd_asru.h>
#include <fmd.h>
static const char *const _fmd_asru_events[] = {
};
static const char *const _fmd_asru_snames[] = {
"uf", "uF", "Uf", "UF" /* same order as above */
};
static uint_t
{
}
static boolean_t
{
}
static fmd_asru_t *
{
char *s;
if (fmri)
strcmp(s, FM_FMRI_SCHEME_FMD) == 0)
return (ap);
}
static void
{
}
static void
{
}
static fmd_asru_t *
{
return (ap);
}
/*
* Lookup an asru in the hash by name and place a hold on it. If the asru is
* not found, no entry is created and NULL is returned. This internal function
* is for callers who have the ah_lock held and is used by lookup_name below.
*/
{
uint_t h;
break;
}
(void) fmd_asru_hold(ap);
else
(void) fmd_set_errno(EFMD_ASRU_NOENT);
return (ap);
}
#define HC_ONLY_FALSE 0
static int
{
char *s;
/*
* Check if there is evidence that this object is no longer present.
* frus, as those are the things that are physically present or not
* present - an asru can be spread over a number of frus some of which
* are present and some not, so fmd_fmri_present() is not generally
* meaningful. However retain a check for asru first for compatibility.
* If we have checked all three and we still get -1 then nothing knows
* whether it's present or not, so err on the safe side and treat it
* as still present.
*
* Note that if hc_only is set, then we only check status using fmris
* that are in hc-scheme.
*/
return (fmd_asru_fake_not_present);
strcmp(s, FM_FMRI_SCHEME_HC) == 0))) {
if (ps == -1) {
} else {
/* see if we can improve on UNKNOWN */
if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
}
}
}
FM_FMRI_SCHEME, &s) == 0 &&
strcmp(s, FM_FMRI_SCHEME_HC) == 0))) {
if (ps == -1) {
} else {
/* see if we can improve on UNKNOWN */
if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
}
}
}
if (ps == -1)
return (ps);
}
static void
char *name)
{
ahp->ah_al_count++;
}
static void
char *name)
{
}
static void
{
}
static void
char *name)
{
}
static void
char *name)
{
}
static void
{
}
static fmd_asru_link_t *
{
return (alp);
}
/*ARGSUSED*/
static void
{
else
}
static int
{
return (EFMD_ASRU_FMRI);
return (EFMD_ASRU_FMRI);
}
return (0);
}
static fmd_asru_link_t *
const char *al_uuid)
{
got_asru = 1;
got_fru = 1;
got_rsrc = 1;
label = "";
/*
* Grab the rwlock as a writer; Then create and insert the asru with
* ahp->ah_lock held and hash it in. We'll then drop the rwlock and
* proceed to initializing the asru.
*/
/*
* Create and initialise the per-fault "link" structure.
*/
if (got_asru)
/*
* If this is the first fault for this asru, then create the per-asru
* structure and link into the hash.
*/
NULL);
} else
/*
* Put the link structure on the list associated with the per-asru
* structure. Then put the link structure on the various hashes.
*/
return (alp);
}
static void
{
int ps;
char *class;
int err;
/*
* Extract the most recent values of 'faulty' from the event log.
*/
&faulty) != 0) {
return;
}
return;
}
&unusable);
&repaired);
&replaced);
&acquitted);
&resolved);
/*
* Attempt to recreate the case in CLOSED, REPAIRED or RESOLVED state
* If the case is already present, fmd_case_recreate() will return it.
* If not, we'll create a new orphaned case. Either way, we use the
* ASRU event to insert a suspect into the partially-restored case.
*/
else
}
/*
* For faults with a resource, re-evaluate the asru from the resource.
*/
}
/*
* Now create the resource cache entries.
*/
/*
* Check to see if the resource is still present in the system.
*/
if (ps == FMD_OBJ_STATE_REPLACED) {
} else if (ps == FMD_OBJ_STATE_STILL_PRESENT ||
ps == FMD_OBJ_STATE_UNKNOWN) {
&asru) == 0) {
int us;
switch (fmd_fmri_service_state(asru)) {
break;
case FMD_SERVICE_STATE_OK:
break;
case -1:
/* not supported by scheme */
if (us > 0)
else if (us == 0)
break;
}
}
}
if (faulty) {
}
if (unusable) {
}
if (replaced)
else if (repaired)
else if (acquitted)
else
}
static void
{
if (err != 0)
else
}
/*
* Open a saved log file and restore it into the ASRU hash. If we can't even
* open the log, rename the log file to <uuid>- to indicate it is corrupt. If
* fmd_log_replay() fails, we either delete the file (if it has reached the
* upper limit on cache age) or rename it for debugging if it was corrupted.
*/
static void
{
uint_t n;
return;
}
n = ahp->ah_al_count;
if (ahp->ah_al_count == n)
}
void
{
int zero;
return;
}
continue; /* skip "." and ".." */
if (zero)
}
}
/*
* If the resource is present and faulty but not unusable, replay the fault
* event that caused it be marked faulty. This will cause the agent
* subscribing to this fault class to again disable the resource.
*/
/*ARGSUSED*/
static void
{
fmd_event_t *e;
char *class;
}
}
void
{
}
/*
* Check if the resource is still present. If not, and if the rsrc.age time
* has expired, then do an implicit repair on the resource.
*/
/*ARGSUSED*/
static void
{
int ps;
int err;
return;
/*
* Checking for aged resources only happens on the diagnosing side
* not on a proxy.
*/
return;
if (ps == FMD_OBJ_STATE_REPLACED) {
} else if (ps == FMD_OBJ_STATE_NOT_PRESENT) {
return;
}
}
}
/*ARGSUSED*/
void
{
/*
* Case must be in resolved state for this to be called. So modified
* time on resource cache entry should be the time the resolve occurred.
* Return 0 if not yet hit rsrc.aged.
*/
return;
*(int *)arg = 0;
}
/*ARGSUSED*/
void
{
/*
* Find most recent modified time of a set of resource cache entries.
*/
return;
}
void
{
}
{
ahp->ah_al_count = 0;
return (ahp);
}
void
{
uint_t i;
for (i = 0; i < ahp->ah_hashlen; i++) {
}
}
}
/*
* Take a snapshot of the ASRU database by placing an additional hold on each
* member in an auxiliary array, and then call 'func' for each ASRU.
*/
void
{
for (i = 0; i < ahp->ah_hashlen; i++) {
}
for (i = 0; i < apc; i++) {
}
}
void
{
for (i = 0; i < ahp->ah_hashlen; i++) {
}
for (i = 0; i < alpc; i++) {
}
}
static void
{
uint_t h;
/* LINTED pointer alignment */
if (fmd_asru_strcmp(ahp,
/* LINTED pointer alignment */
alpc++;
/* LINTED pointer alignment */
if (fmd_asru_strcmp(ahp,
/* LINTED pointer alignment */
for (i = 0; i < alpc; i++) {
}
}
void
{
}
void
{
}
void
{
}
void
{
}
void
{
}
/*
* Lookup an asru in the hash by name and place a hold on it. If the asru is
* not found, no entry is created and NULL is returned.
*/
{
return (ap);
}
/*
* Create a resource cache entry using the fault event "nvl" for one of the
* suspects from the case "cp".
*
* The fault event can have the following components : FM_FAULT_ASRU,
* FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine
* when calling fmd_nvl_create_fault(). In the general case, these are all
* optional and an entry will always be added into the cache even if one or all
* of these fields is missing.
*
* However, for hardware faults the recommended practice is that the fault
* event should always have the FM_FAULT_RESOURCE field present and that this
* should be represented in hc-scheme.
*
* Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields
* where known, though at some future stage fmd might be able to fill these
* in automatically from the topology.
*/
{
char *parsed_uuid;
int uuidlen;
/*
* Generate a UUID for the ASRU. libuuid cleverly gives us no
* interface for specifying or learning the buffer size. Sigh.
* The spec says 36 bytes but we use a tunable just to be safe.
*/
/*
* Now create the resource cache entries.
*/
return (alp);
}
/*
* Release the reference count on an asru obtained using fmd_asru_hash_lookup.
* We take 'ahp' for symmetry and in case we need to use it in future work.
*/
/*ARGSUSED*/
void
{
else
}
static void
{
uint_t h;
/* LINTED pointer alignment */
} else
}
}
static void
{
name);
} else
}
void
{
char *label;
uint_t h;
/*
* first delete hash entries for each suspect
*/
&label) != 0)
label = "";
}
/*
* then delete associated case hash entries
*/
ahp->ah_al_count--;
/*
* decrement case ref.
*/
/*
* If we found a matching ASRU, unlink its log file and
* then release the hash entry. Note that it may still
* be referenced if another thread is manipulating it;
* this is ok because once we unlink, the log file will
* not be restored, and the log data will be freed when
* all of the referencing threads release their
* respective references.
*/
"failed to unlink asru %s", path);
/*
* Now unlink from the global per-resource cache
* and if this is the last link then remove that from
* it's own hash too.
*/
uint_t h;
} else
}
}
} else
}
}
typedef struct {
static void
{
alp->al_asru_fmri &&
farcp->farc_reason)) {
else
}
}
}
static void
{
int flags;
/*
* Check if all entries associated with this asru are acquitted and
* if so acquit containees. Don't try to repair containees on proxy
* side unless we have local asru.
*/
}
}
}
void
{
int cleared;
/*
* don't allow remote repair over readonly transport
*/
return;
/*
* don't allow repair etc by asru on proxy unless asru is local
*/
return;
/*
* For acquit, need to check both name and uuid if specified
*/
return;
/*
* For replaced, verify it has been replaced if we have serial number.
* If not set *farap->fara_rval to FARA_ERR_RSRCNOTR.
*/
return;
}
/*
* if called from fmd_adm_*() and we really did clear the bit then
* we need to do a case update to see if the associated case can be
* repaired. No need to do this if called from fmd_case_*() (ie
* when arg is NULL) as the case will be explicitly repaired anyway.
*/
/*
* *farap->fara_rval defaults to FARA_ERR_RSRCNOTF (not found).
* If we find a valid cache entry which we repair then we
* set it to FARA_OK. However we don't want to do this if
* we have already set it to FARA_ERR_RSRCNOTR (not replaced)
* in a previous iteration (see above). So only set it to
* FARA_OK if the current value is still FARA_ERR_RSRCNOTF.
*/
if (cleared) {
else
}
}
}
/*
* Discard the case associated with this alp if it is in resolved state.
* Called on "fmadm flush".
*/
/*ARGSUSED*/
void
{
int check_if_aged = 0;
*rval = 0;
}
/*
* This is only called for proxied faults. Set various flags so we can
* find the nature of the transport from the resource cache code.
*/
/*ARGSUSED*/
void
{
return;
/*
* Note that this is a proxy fault and save whetehr transport is
* RDONLY or EXTERNAL.
*/
if (entryp->fasp_proxy_external) {
}
if (entryp->fasp_proxy_rdonly)
/*
* Save whether asru is accessible in local domain
*/
}
(*entryp->fasp_countp)++;
}
/*ARGSUSED*/
void
{
}
/*
* This function is used for fault proxying. It updates the resource status in
* the resource cache based on information that has come from the other side of
* the transport. This can be called on either the proxy side or the
* diagnosing side.
*/
void
{
return;
/*
* For proxy, if there is no asru on the proxy side, but there is on
* the diag side, then take the diag side asru status.
* For diag, if there is an asru on the proxy side, then take the proxy
* side asru status.
*/
if (entryp->faus_is_proxy ?
if (status & FM_SUSPECT_DEGRADED)
else
if (status & FM_SUSPECT_UNUSABLE)
else
}
/*
* Update the faulty status too.
*/
if (!(status & FM_SUSPECT_FAULTY))
else if (entryp->faus_is_proxy)
/*
* for proxy only, update the present status too.
*/
if (entryp->faus_is_proxy) {
if (!(status & FM_SUSPECT_NOT_PRESENT)) {
} else {
}
}
(*entryp->faus_countp)++;
}
/*
* This function is called on the diagnosing side when fault proxying is
* in use and the proxy has sent a uuclose. It updates the status of the
* resource cache entries.
*/
void
{
return;
(*entryp->facs_countp)++;
}
static void
{
fmd_event_t *e;
char *class;
/*
* Don't log to disk on proxy side
*/
return;
return; /* can't log events if we can't open the log */
fmd_event_hold(e);
fmd_event_rele(e);
/*
* For now, we close the log file after every update to conserve file
* descriptors and daemon overhead. If this becomes a performance
* issue this code can change to keep a fixed-size LRU cache of logs.
*/
}
int
{
return (0);
}
return (1);
}
int
{
}
return (0);
}
if (sflag == FMD_ASRU_UNUSABLE)
else if (sflag == FMD_ASRU_FAULTY) {
/*
* only clear the faulty bit if all links are clear
*/
if (!(flags & FMD_ASRU_FAULTY))
}
return (1);
}
/*ARGSUSED*/
void
{
}
/*
* Report the current known state of the link entry (ie this particular fault
* affecting this particular ASRU).
*/
int
{
/*
* For fault proxying with an EXTERNAL transport, believe the presence
* state as sent by the diagnosing side. Otherwise find the presence
* state here. Note that if fault proxying with an INTERNAL transport
* we can only trust the presence state where we are using hc-scheme
* fmris which should be consistant across domains in the same system -
* other schemes can refer to different devices in different domains.
*/
if (ps == FMD_OBJ_STATE_NOT_PRESENT)
return (st | FMD_ASRU_UNUSABLE);
if (ps == FMD_OBJ_STATE_REPLACED) {
return (st | FMD_ASRU_UNUSABLE);
}
}
else
/*
* For fault proxying, unless we have a local ASRU, then believe the
* service state sent by the diagnosing side. Otherwise find the service
* state here. Try fmd_fmri_service_state() first, but if that's not
* supported by the scheme then fall back to fmd_fmri_unusable().
*/
/* not supported by scheme - try fmd_fmri_unusable */
if (us > 0)
st |= FMD_ASRU_UNUSABLE;
else if (us == 0)
st &= ~FMD_ASRU_UNUSABLE;
} else {
if (us == FMD_SERVICE_STATE_UNUSABLE) {
st &= ~FMD_ASRU_DEGRADED;
st |= FMD_ASRU_UNUSABLE;
} else if (us == FMD_SERVICE_STATE_OK) {
} else if (us == FMD_SERVICE_STATE_ISOLATE_PENDING) {
} else if (us == FMD_SERVICE_STATE_DEGRADED) {
st &= ~FMD_ASRU_UNUSABLE;
st |= FMD_ASRU_DEGRADED;
}
}
}
return (st);
}
/*
* Report the current known state of the ASRU by refreshing its unusable status
* based upon the routines provided by the scheme module. If the unusable bit
* is different, we do *not* generate a state change here because that change
* may be unrelated to fmd activities and therefore we have no case or event.
* The absence of the transition is harmless as this function is only provided
* for RPC observability and fmd's clients are only concerned with ASRU_FAULTY.
*/
int
{
char *s;
/* do not report non-fmd non-present resources */
/*
* As with fmd_asru_al_getstate(), we can only trust the
* local presence state on a proxy if the transport is
* internal and the scheme is hc. Otherwise we believe the
* state as sent by the diagnosing side.
*/
&s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) {
if (fmd_asru_fake_not_present >=
return (0);
}
return (0);
}
/*
* As with fmd_asru_al_getstate(), we can only trust the local unusable
* state on a proxy if there is a local ASRU.
*/
if (us > 0)
st |= FMD_ASRU_UNUSABLE;
else if (us == 0)
st &= ~FMD_ASRU_UNUSABLE;
}
return (st);
}