smf_response.c revision f6e214c7418f43af38bd8c3a557e3d0a1d311cfa
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* SMF software-response subsidiary
*/
#include <strings.h>
#include <libscf.h>
#include <fm/fmd_fmri.h>
#include "smf.h"
static struct {
} swrp_smf_stats = {
{ "swrp_smf_repairs", FMD_TYPE_UINT64,
"repair events received for propogation to SMF" },
{ "swrp_smf_clears", FMD_TYPE_UINT64,
"notifications from SMF of exiting maint state" },
{ "swrp_smf_closed", FMD_TYPE_UINT64,
"cases closed" },
{ "swrp_smf_wrongclass", FMD_TYPE_UINT64,
"unexpected event class received" },
{ "swrp_smf_badlist", FMD_TYPE_UINT64,
"list event with invalid structure" },
{ "swrp_smf_badresource", FMD_TYPE_UINT64,
"list.repaired with smf fault but bad svc fmri" },
{ "swrp_smf_badclrevent", FMD_TYPE_UINT64,
"maint clear event from SMF malformed" },
{ "swrp_smf_noloop", FMD_TYPE_UINT64,
"avoidance of smf->fmd->smf repairs propogations" },
{ "swrp_smf_suppressed", FMD_TYPE_UINT64,
"not propogated to smf because no longer in maint" },
{ "swrp_smf_cachefull", FMD_TYPE_UINT64,
"uuid cache full" },
};
#define CACHE_NENT_INC 16
#define CACHE_NENT_MAX 128
struct smf_uuid_cache_ent {
char uuid[37];
char fmristr[90];
};
#define CACHE_VERSION 1
struct smf_uuid_cache {
};
static struct smf_uuid_cache *uuid_cache;
#define UUID_CACHE_BUFNAME "uuid_cache"
static void
{
struct smf_uuid_cache *newcache;
uint32_t n;
sizeof (struct smf_uuid_cache_ent);
if (uuid_cache != NULL) {
}
}
static void
{
}
/*
* Garbage-collect the uuid cache. Any cases that are already resolved
* we do not need an entry for. If a case is not resolved but the
* service involved in that case is no longer in maintenance state
* then we've lost sync somehow, so repair the asru (which will
* also resolve the case).
*/
static void
{
struct smf_uuid_cache_ent *entp;
char *svcname;
int err, i;
for (i = 0; i < uuid_cache->nentries; i++) {
continue;
} else {
&err) != 0) {
continue;
}
}
}
}
if (thp)
}
static void
{
if (sz == 0)
return;
/*
* Garbage collect now, not just for tidiness but also to help
* fmd and smf state stay in sync at module startup.
*/
}
/*
* Add the UUID of an SMF maintenance defect case to our cache and
* record the associated full svc FMRI string for the case.
*/
static void
{
int gced = 0;
int i;
if (uuid_cache == NULL)
/*
* If we somehow already have an entry for this uuid then
* return leaving it undisturbed.
*/
for (i = 0; i < uuid_cache->nentries; i++) {
return;
}
scan:
for (i = 0; i < uuid_cache->nentries; i++) {
break;
}
}
/*
* Before growing the cache we try again after first
* garbage-collecting the existing cache for any cases
* that are confirmed as resolved.
*/
if (!gced) {
gced = 1;
goto scan;
}
if (oldn < CACHE_NENT_MAX) {
} else {
return;
}
}
}
/*
* (if not NULL) or fmristr (if not NULL) mark as resolved. Return 1 iff
* an entry that matched on uuid was already marked, otherwise (entry
* matched on either, matched on uuid but not marked, not found).
*/
static int
{
int dirty = 0;
int rv = 0;
int i;
if (uuid_cache == NULL)
return (0);
for (i = 0; i < uuid_cache->nentries; i++) {
continue;
rv = 1;
dirty++;
dirty++;
}
}
if (dirty)
return (rv);
}
/*
* We will receive list events for cases we are not interested in. Test
* that this list has exactly one suspect and that it matches the maintenance
* defect. Return the defect to the caller in the second argument,
* and the defect resource element in the third arg.
*/
static int
{
return (0);
}
if (nfaults != 1 ||
return (0);
return (0);
}
return (1);
}
/*
* Received newly-diagnosed list.suspect events that are for the
* maintenane defect we diagnose. Close the case (the resource was already
* isolated by SMF) after cachng the case UUID.
*/
/*ARGSUSED*/
static void
{
return;
}
return;
return;
}
}
}
/*ARGSUSED*/
static void
{
char *fromstate;
char *fmristr;
return;
}
return;
}
/*
* Filter those not describing a transition out of maintenance.
*/
return;
return;
}
return;
}
/*
* Mark any UUID for a case against this service as resolved
* in our cache. When we fmd_repair_asru below fmd will emit
* a list.repaired as a result, and our handling of that event
* must not propogate the repair towards SMF (since the repair
* was initiated via SMF itself and not via fmadm).
*/
}
/*ARGSUSED*/
static void
{
char *fmristr, *shrtfmristr;
char *uuid;
int already;
return;
}
return;
}
return;
return;
}
/*
* If the cache already had a marked entry for this UUID then
* this is a list.repaired arising from a SMF-initiated maintenance
* clear (propogated with fmd_repair_asru above which then results
* in a list.repaired) and so we should not propogate the repair
* back towards SMF. But do still force the case to RESOLVED state in
* case fmd is unable to confirm the service no longer in maintenance
* state (it may have failed again) so that a new case can be opened.
*/
if (already) {
return;
}
/*
* Only propogate to SMF if we can see that service still
* in maintenance state. We're not synchronized with SMF
* and this state could change at any time, but if we can
* see it's not in maintenance state then things are obviously
* moving (e.g., external svcadm active) so we don't poke
* at SMF otherwise we confuse things or duplicate operations.
*/
if (shrtfmristr != NULL) {
(void) smf_restore_instance(shrtfmristr);
} else {
}
} else {
}
}
const struct sw_disp swrp_smf_disp[] = {
};
/*ARGSUSED*/
int
{
/*
* We need to subscribe to all SMF transition class events because
* we need to look inside the payload to see which events indicate
* a transition out of maintenance state.
*/
/*
* Subscribe to the defect class diagnosed for maintenance events.
* The module will then receive list.suspect events including
* these defects, and in our dispatch table above we list routing
* for list.suspect.
*/
*dpp = &swrp_smf_disp[0];
return (SW_SUB_INIT_SUCCESS);
}
/*ARGSUSED*/
void
{
}
const struct sw_subinfo smf_response_info = {
"smf repair", /* swsub_name */
SW_CASE_NONE, /* swsub_casetype */
swrp_smf_init, /* swsub_init */
swrp_smf_fini, /* swsub_fini */
NULL, /* swsub_timeout */
NULL, /* swsub_case_close */
NULL, /* swsub_case_vrfy */
};