1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * This file and its contents are supplied under the terms of the
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Common Development and Distribution License ("CDDL"), version 1.0.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * You may only use this file in accordance with the terms of version
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * 1.0 of the CDDL.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * A full copy of the text of the CDDL should have accompanied this
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * source. A copy of the CDDL is also available via the Internet at
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Copyright (c) 2013, Joyent, Inc. All rights reserved.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Disk Lights Agent (FMA)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * This Fault Management Daemon (fmd) module periodically scans the topology
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * tree, enumerates all disks with associated fault indicators, and then
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * synchronises the fault status of resources in the FMA Resource Cache with
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * the indicators. In short: it turns the fault light on for befallen disks.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Presently, we recognise associated fault indicators for disks by looking
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * for the following structure in the topology tree:
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * +---- /disk=0 <---------------- our Disk
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * +---- /bay=N?indicator=fail <---- the Fault Light
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * \---- /bay=N?indicator=ident
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * That is: a DISK node will have a parent BAY; that BAY will itself have
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * child Facility nodes, one of which will be called "fail". If any of the
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * above does not hold, we simply do nothing for this disk.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulowstatic void disklights_topo(fmd_hdl_t *, topo_hdl_t *);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulowstatic void disklights_recv(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow const char *);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulowstatic void disklights_timeout(fmd_hdl_t *, id_t, void *);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * POLL_INTERVAL is the period after which we perform an unsolicited poll
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * to ensure we remain in sync with reality.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow#define DL_PROP_POLL_INTERVAL "poll-interval"
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * COALESCE_INTERVAL is how long we wait after we are trigged by either a
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * topology change or a relevant list.* event, in order to allow a series
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * of events to coalesce.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow#define DL_PROP_COALESCE_INTERVAL "coalesce-interval"
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow { DL_PROP_POLL_INTERVAL, FMD_TYPE_TIME, "5min" },
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow { DL_PROP_COALESCE_INTERVAL, FMD_TYPE_TIME, "3s" },
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow "Disk Lights Agent",
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Fetch the Facility Node properties (name, type) from the FMRI
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * for this node, or return -1 if we can't.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulowget_facility_props(topo_hdl_t *hdl, tnode_t *node, char **facname,
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow if (topo_node_resource(node, &fmri, &e) != 0)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow if (nvlist_lookup_nvlist(fmri, FM_FMRI_FACILITY, &fnvl) != 0)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow if (nvlist_lookup_string(fnvl, FM_FMRI_FACILITY_NAME, &nn) != 0)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow if (nvlist_lookup_string(fnvl, FM_FMRI_FACILITY_TYPE, &tt) != 0)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulowdl_fault_walk_inner(topo_hdl_t *thp, tnode_t *node, void *arg)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * We're only interested in BAY children that are valid Facility Nodes.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow if (topo_node_flags(node) != TOPO_NODE_FACILITY ||
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow get_facility_props(thp, node, &facname, &factype) != 0) {
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Attempt to set the LED mode appropriately. If this fails, give up
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * and move on.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow (void) topo_prop_set_uint32(node, TOPO_PGROUP_FACILITY, TOPO_LED_MODE,
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulowdl_fault_walk_outer(topo_hdl_t *thp, tnode_t *node, void *arg)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * We are only looking for DISK nodes in the topology that have a parent
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow if (strcmp(DISK, topo_node_name(node)) != 0 ||
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow (pnode = topo_node_parent(node)) == NULL ||
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Check to see if the Resource this FMRI describes is Faulty:
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow if (topo_node_resource(node, &fmri, &err) != 0)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow has_fault = fmd_nvl_fmri_has_fault(dl->dl_fmd, fmri,
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Walk the children of this BAY and flush out our fault status if
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * we find an appropriate indicator node.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow fwi.fwi_mode = has_fault ? TOPO_LED_STATE_ON : TOPO_LED_STATE_OFF;
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow (void) topo_node_child_walk(thp, pnode, dl_fault_walk_inner, &fwi,
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Walk all of the topology nodes looking for DISKs that match the structure
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * described in the overview. Once we find them, check their fault status
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * and update their fault indiciator accordingly.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow thp = fmd_hdl_topo_hold(dl->dl_fmd, TOPO_VERSION);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC, dl_fault_walk_outer,
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow fmd_hdl_error(dl->dl_fmd, "failed to get topology: %s\n",
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow if (topo_walk_step(twp, TOPO_WALK_CHILD) == TOPO_WALK_ERR) {
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow fmd_hdl_error(dl->dl_fmd, "failed to walk topology: %s\n",
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * If we're already on the short-poll coalesce timer, then return
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * immediately.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Replace existing poll timer with coalesce timer:
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow fmd_timer_remove(dl->dl_fmd, dl->dl_timer);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow dl->dl_timer = fmd_timer_install(dl->dl_fmd, NULL, NULL,
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulowdisklights_timeout(fmd_hdl_t *hdl, id_t id, void *data)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow disk_lights_t *dl = fmd_hdl_getspecific(hdl);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Install the long-interval timer for the next poll.
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow dl->dl_timer = fmd_timer_install(hdl, NULL, NULL, dl->dl_poll_interval);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulowdisklights_topo(fmd_hdl_t *hdl, topo_hdl_t *thp)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow disk_lights_t *dl = fmd_hdl_getspecific(hdl);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulowdisklights_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow disk_lights_t *dl = fmd_hdl_getspecific(hdl);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow dl = fmd_hdl_zalloc(hdl, sizeof (*dl), FMD_SLEEP);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Load Configuration:
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow dl->dl_poll_interval = fmd_prop_get_int64(hdl, DL_PROP_POLL_INTERVAL);
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow dl->dl_coalesce_interval = fmd_prop_get_int64(hdl,
1410cb930a3e26032c59c6835837a28c47366b3cJoshua M. Clulow * Schedule the initial enumeration: