disk_transport.c revision 24db46411fd54f70c35b94bb952eb7ba040e43b4
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * CDDL HEADER START
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * The contents of this file are subject to the terms of the
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Common Development and Distribution License (the "License").
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * You may not use this file except in compliance with the License.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * See the License for the specific language governing permissions
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * and limitations under the License.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * When distributing Covered Code, include this CDDL HEADER in each
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * If applicable, add the following below this CDDL HEADER, with the
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * fields enclosed by brackets "[]" replaced with your own identifying
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * information: Portions Copyright [yyyy] [name of copyright owner]
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * CDDL HEADER END
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Use is subject to license terms.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock#pragma ident "%Z%%M% %I% %E% SMI"
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Disk error transport module
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * This transport module is responsible for translating between disk errors
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * and FMA ereports. It is a read-only transport module, and checks for the
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * following failures:
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * - overtemp
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * - predictive failure
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * - self-test failure
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * These failures are detected via the TOPO_METH_DISK_STATUS method, which
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * leverages libdiskstatus to do the actual analysis. This transport module is
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * in charge of the following tasks:
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * - discovering available devices
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * - periodically checking devices
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * - managing device addition/removal
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrockstatic struct dt_stat {
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock { "dropped", FMD_TYPE_UINT64, "number of dropped ereports" }
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrocktypedef struct disk_monitor {
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrockdt_post_ereport(fmd_hdl_t *hdl, fmd_xprt_t *xprt, const char *protocol,
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock const char *faultname, uint64_t ena, nvlist_t *detector, nvlist_t *payload)
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock (void) snprintf(fullclass, sizeof (fullclass), "%s.io.%s.disk.%s",
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock e |= nvlist_add_uint8(nvl, FM_VERSION, FM_EREPORT_VERSION);
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock e |= nvlist_add_nvlist(nvl, FM_EREPORT_DETECTOR, detector);
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock if (e == 0) {
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Check a single topo node for failure. This simply invokes the disk status
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * method, and generates any ereports as necessary.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrockdt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock fmd_hdl_error(dmp->dm_hdl, "failed to get fmri: %s\n",
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock if (topo_hdl_nvalloc(thp, &in, NV_UNIQUE_NAME) != 0) {
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock if (topo_fmri_nvl2str(thp, fmri, &fmristr, &err) == 0 &&
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock (void) nvlist_add_string(in, "path", dmp->dm_sim_file);
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Try to invoke the method. If this fails (most likely because the
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * method is not supported), then ignore this node.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock if (topo_method_invoke(node, TOPO_METH_DISK_STATUS,
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock TOPO_METH_DISK_STATUS_VERSION, in, &result, &err) != 0) {
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Add any faults.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock nvlist_lookup_string(result, "protocol", &protocol) == 0) {
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock while ((elem = nvlist_next_nvpair(faults, elem)) != NULL) {
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Periodic timeout. Iterates over all hc:// topo nodes, calling
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * dt_analyze_disk() for each one.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock/*ARGSUSED*/
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC, dt_analyze_disk,
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock if (topo_walk_step(twp, TOPO_WALK_CHILD) == TOPO_WALK_ERR) {
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL, dmp->dm_interval);
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Called when the topology may have changed. We want to examine all disks in
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * case a new one has been inserted, but we don't want to overwhelm the system
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * in the event of a flurry of topology changes, as most likely only a small
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * number of disks are changing. To avoid this, we set the timer for a small
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * but non-trivial interval (by default 1 minute), and ignore intervening
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * changes during this period. This still gives us a reasonable response time
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * to newly inserted devices without overwhelming the system if lots of hotplug
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * activity is going on.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock/*ARGSUSED*/
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock dmp = fmd_hdl_zalloc(hdl, sizeof (disk_monitor_t), FMD_SLEEP);
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock dmp->dm_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock dmp->dm_interval = fmd_prop_get_int64(hdl, "interval");
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Determine if we have the simulate property set. This property allows
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * the developer to substitute a faulty device based off all or part of
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * an FMRI string. For example, one could do:
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * setprop simulate "sata-port=4/disk=4 /path/to/sim.so"
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * When the transport module encounters an FMRI containing the given
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * string, then it will open the simulator file instead of the
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * corresponding device. This can be any file, but is intended to be a
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * libdiskstatus simulator shared object, capable of faking up SCSI
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * responses.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * The property consists of two strings, an FMRI fragment and an
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * absolute path, separated by whitespace.
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock const char *sep;
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock dmp->dm_sim_file = fmd_hdl_strdup(hdl, sep, FMD_SLEEP);
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * Call our initial timer routine. This will do an initial check of all
24db46411fd54f70c35b94bb952eb7ba040e43b4eschrock * the disks, and then start the periodic timeout.