fmd.c revision 44743693dce3212f5edba623e0cb0327bd4337a3
#
pragma ident "%Z%%M% %I% %E% SMI"const char _fmd_version[] =
"1.2";
/* daemon version string */ * Note: the configuration file path is ordered from most common to most host- * specific because new conf files are merged/override previous ones. The * module paths are in the opposite order, from most specific to most common, * because once a module is loaded fmd will not try to load over the same name. {
"help",
"display debugging modes and exit",
FMD_DBG_HELP },
{
"disp",
"debug dispatch queue processing",
FMD_DBG_DISP },
{
"xprt",
"debug transport-specific routines",
FMD_DBG_XPRT },
{
"evt",
"debug event subsystem routines",
FMD_DBG_EVT },
{
"log",
"debug log subsystem routines",
FMD_DBG_LOG },
{
"tmr",
"debug timer subsystem routines",
FMD_DBG_TMR },
{
"all",
"enable all available debug modes",
FMD_DBG_ALL },
{
"ckpt.dirmode", &
fmd_conf_int32,
"0700" },
/* ckpt directory perm mode */{
"ckpt.restore", &
fmd_conf_bool,
"true" },
/* restore checkpoints? */{
"ckpt.zero", &
fmd_conf_bool,
"false" },
/* zero checkpoints on start? */{
"client.buflim", &
fmd_conf_size,
"10m" },
/* client buffer space limit */{
"client.error", &
fmd_cerror_ops,
"unload" },
/* client error policy */{
"client.memlim", &
fmd_conf_size,
"10m" },
/* client allocation limit */{
"client.evqlim", &
fmd_conf_uint32,
"256" },
/* client event queue limit */{
"client.xprtlim", &
fmd_conf_uint32,
"256" },
/* client transport limit */{
"client.xprtqlim", &
fmd_conf_uint32,
"1024" },
/* client transport queue li */{
"core", &
fmd_conf_bool,
"false" },
/* force core dump on quit */{
"fakenotpresent", &
fmd_conf_bool,
"false" },
/* simulate rsrc not present */{
"fg", &
fmd_conf_bool,
"false" },
/* run daemon in foreground */{
"gc_interval", &
fmd_conf_time,
"1d" },
/* garbage collection intvl */{
"log.minfree", &
fmd_conf_size,
"2m" },
/* min log fsys free space */{
"log.waitrotate", &
fmd_conf_time,
"200ms" },
/* log rotation retry delay */{
"rpc.adm.prog", &
fmd_conf_uint32,
"100169" },
/* FMD_ADM rpc program num */{
"rpc.api.prog", &
fmd_conf_uint32,
"100170" },
/* FMD_API rpc program num */{
"rpc.rcvsize", &
fmd_conf_size,
"128k" },
/* rpc receive buffer size */{
"rsrc.age", &
fmd_conf_time,
"30d" },
/* max age of old rsrc log */{
"rsrc.zero", &
fmd_conf_bool,
"false" },
/* zero rsrc cache on start? */{
"self.name", &
fmd_conf_string,
"fmd-self-diagnosis" },
/* self-diag module */{
"self.dict", &
fmd_conf_list,
"FMD.dict" },
/* self-diag dictionary list */{
"trace.frames", &
fmd_conf_uint32,
"16" },
/* max trace rec stack frames */ * Statistics maintained by fmd itself on behalf of various global subsystems. * NOTE: FMD_TYPE_STRING statistics should not be used here. If they are * required in the future, the FMD_ADM_MODGSTAT service routine must change. {
"errlog.replayed",
FMD_TYPE_UINT64,
"total events replayed from errlog" },
{
"errlog.partials",
FMD_TYPE_UINT64,
"events partially committed in errlog" },
{
"errlog.enospc",
FMD_TYPE_UINT64,
"events not appended to errlog (ENOSPC)" },
{
"fltlog.enospc",
FMD_TYPE_UINT64,
"events not appended to fltlog (ENOSPC)" },
{
"log.enospc",
FMD_TYPE_UINT64,
"events not appended to other logs (ENOSPC)" },
{
"topo.drgen",
FMD_TYPE_UINT64,
"current topology DR generation number" },
(
unsigned char **)&
bufp) != -
1) {
* A small number of properties must be set manually before we open * the root configuration file. These include any settings for our * memory allocator and path expansion token values, because these * values are needed by the routines in fmd_conf.c itself. After * the root configuration file is processed, we reset these properties * based upon the latest values from the configuration file. "failed to load required configuration properties\n");
* Manually specified rootdirs override config files, so only update * d_rootdir based on the config files we parsed if no 'root' was set. * Once the base conf file properties are loaded, lookup the values * of $conf_path and $conf_file and merge in any other conf files. * Update the value of fmd.d_fg based on "fg". We cache this property * because it must be accessed deep within fmd at fmd_verror() time. * Update any other properties that must be cached for performance. * Initialize our custom libnvpair allocator and create an nvlist for * authority elements corresponding to this instance of the daemon. * The fmd_module_t for the root module must be created manually. Most * of it remains unused and zero, except for the few things we fill in. * In addition to inserting the _fmd_stats collection of program-wide * statistics, we also insert a statistic named after each of our * errors and update these counts in fmd_verror() (see fmd_subr.c). * Unload the self-diagnosis module first. This ensures that it does * not get confused as we start unloading other modules, etc. We must * hold the dispq lock as a writer while doing so since it uses d_self. * Unload modules in reverse order *except* for the root module, which * is first in the list. This allows it to keep its thread and trace. * Close both log files now that modules are no longer active. We must * set these pointers to NULL in case any subsequent errors occur. * Now destroy the resource cache: each ASRU contains a case reference, * which may in turn contain a pointer to a referenced owning module. * Now that all data structures that refer to modules are torn down, * no modules should be remaining on the module list except for d_rmod. * If we trip one of these assertions, we're missing a rele somewhere. * Now destroy the root module. We clear its thread key first so any * calls to fmd_trace() inside of the module code will be ignored. * Now destroy the remaining global data structures. If 'core' was * set to true, force a core dump so we can check for memory leaks. fmd_panic(
"forcing core dump at user request\n");
* Events are committed to the errlog after cases are checkpointed. If fmd * crashes before an event is ever associated with a module, this function will * be called to replay it to all subscribers. If fmd crashes in between the * subscriber checkpointing and committing the event in the error log, the * module will have seen the event and we don't want to replay it. So we look * for the event in all modules and transition it to the proper state. If * it is found, we commit it to the error log and do not replay it. The in- * memory case search used by fmd_module_contains() et al isn't particularly * efficient, but it is faster than doing read i/o's on every case event to * check their status or write i/o's on every event to replay to update states. * We can improve the efficiency of this lookup algorithm later if necessary. * Custom door server create callback. Any fmd services that use doors will * require those threads to have their fmd-specific TSD initialized, etc. fmd_panic(
"failed to create server for door %p", (
void *)
dip);
* This signal handler is installed for the client.thrsig signal to be used to * force an auxiliary thread to wake up from a system call and return EINTR in * response to a module's use of fmd_thr_signal(). We also trace the event. * Cache all the current debug property settings in d_fmd_debug, * d_fmd_dbout, d_hdl_debug, and d_hdl_dbout. If a given debug mask * is non-zero and the corresponding dbout mask is zero, set dbout * to a sensible default value based on whether we have daemonized. * Initialize remaining major program data structures such as the * clock, dispatch queues, log files, module hash collections, etc. * This work is done here rather than in fmd_create() to permit the -o * command-line option to modify properties after fmd_create() is done. * The clock must be initialized before fmd_topo_init() because * fmd_topo_update() calls fmd_time_gethrtime(). * The root module's mod_queue is created with limit zero, making it * act like /dev/null; anything inserted here is simply ignored. * Once our subsystems that use signals have been set up, install the * signal handler for the fmd_thr_signal() API. Verify that the signal * being used for this purpose doesn't conflict with something else. * Before loading modules, create an empty control event which will act * as a global barrier for module event processing. Each module we * load successfully will insert it at their head of their event queue, * and then pause inside of fmd_ctl_rele() after dequeuing the event. * This module barrier is required for two reasons: * (a) During module loading, the restoration of case checkpoints may * result in a list.* event being recreated for which the intended * subscriber has not yet loaded depending on the load order. Such * events could then result in spurious "no subscriber" errors. * (b) During errlog replay, a sequence of errors from a long time ago * may be replayed, and the module may attempt to install relative * timers associated with one or more of these events. If errlog * replay were "racing" with active module threads, an event E1 * that resulted in a relative timer T at time E1 + N nsec could * fire prior to an event E2 being enqueued, even if the relative * time ordering was E1 < E2 < E1 + N, causing mis-diagnosis. * Once all data structures are initialized, we load all of our modules * in order according to class in order to load up any subscriptions. * Once built-in modules are loaded, we detach from our waiting parent. dp->
d_running =
1;
/* we are now officially an active fmd */ * Now that we're running, if a pipe fd was specified, write an exit * status to it to indicate that our parent process can safely detach. * Then proceed to loading the remaining non-built-in modules. * Before loading all modules, repopulate the ASRU cache from its * persistent repository on disk. Then during module loading, the * restoration of checkpoint files will reparent any active cases. * With all modules loaded, replay fault events from the ASRU cache for * any ASRUs that must be retired, replay error events from the errlog * that did not finish processing the last time ran, and then release * the global module barrier by executing a final rele on d_mod_event. * Finally, awaken any threads associated with receiving events from * open transports and tell them to proceed with fmd_xprt_recv().