disk_transport.c revision 184cd04c26b064536977dfbb913a1240eaf6f708
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Disk error transport module
*
* This transport module is responsible for translating between disk errors
* and FMA ereports. It is a read-only transport module, and checks for the
* following failures:
*
* - overtemp
* - predictive failure
* - self-test failure
*
* These failures are detected via the TOPO_METH_DISK_STATUS method, which
* leverages libdiskstatus to do the actual analysis. This transport module is
* in charge of the following tasks:
*
* - discovering available devices
* - periodically checking devices
* - managing device addition/removal
*/
#include <ctype.h>
#include <fm/fmd_api.h>
#include <fm/libdiskstatus.h>
#include <fm/libtopo.h>
#include <fm/topo_hc.h>
#include <fm/topo_mod.h>
#include <limits.h>
#include <string.h>
#include <sys/fm/io/scsi.h>
#include <sys/fm/protocol.h>
static struct dt_stat {
fmd_stat_t dropped;
} dt_stats = {
{ "dropped", FMD_TYPE_UINT64, "number of dropped ereports" }
};
typedef struct disk_monitor {
fmd_hdl_t *dm_hdl;
fmd_xprt_t *dm_xprt;
id_t dm_timer;
hrtime_t dm_interval;
char *dm_sim_search;
char *dm_sim_file;
boolean_t dm_timer_istopo;
} disk_monitor_t;
static void
dt_post_ereport(fmd_hdl_t *hdl, fmd_xprt_t *xprt, const char *protocol,
const char *faultname, uint64_t ena, nvlist_t *detector, nvlist_t *payload)
{
nvlist_t *nvl;
int e = 0;
char fullclass[PATH_MAX];
(void) snprintf(fullclass, sizeof (fullclass), "%s.io.%s.disk.%s",
FM_EREPORT_CLASS, protocol, faultname);
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) == 0) {
e |= nvlist_add_string(nvl, FM_CLASS, fullclass);
e |= nvlist_add_uint8(nvl, FM_VERSION, FM_EREPORT_VERSION);
e |= nvlist_add_uint64(nvl, FM_EREPORT_ENA, ena);
e |= nvlist_add_nvlist(nvl, FM_EREPORT_DETECTOR, detector);
e |= nvlist_merge(nvl, payload, 0);
if (e == 0) {
fmd_xprt_post(hdl, xprt, nvl, 0);
} else {
nvlist_free(nvl);
dt_stats.dropped.fmds_value.ui64++;
}
} else {
dt_stats.dropped.fmds_value.ui64++;
}
}
/*
* Check a single topo node for failure. This simply invokes the disk status
* method, and generates any ereports as necessary.
*/
static int
dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
{
nvlist_t *result;
nvlist_t *fmri, *faults;
char *protocol;
int err;
disk_monitor_t *dmp = arg;
uint64_t ena;
nvpair_t *elem;
boolean_t fault;
nvlist_t *details;
char *fmristr;
nvlist_t *in = NULL;
if (topo_node_resource(node, &fmri, &err) != 0) {
fmd_hdl_error(dmp->dm_hdl, "failed to get fmri: %s\n",
topo_strerror(err));
return (TOPO_WALK_ERR);
}
if (topo_hdl_nvalloc(thp, &in, NV_UNIQUE_NAME) != 0) {
nvlist_free(fmri);
return (TOPO_WALK_ERR);
}
if (dmp->dm_sim_search) {
fmristr = NULL;
if (topo_fmri_nvl2str(thp, fmri, &fmristr, &err) == 0 &&
strstr(fmristr, dmp->dm_sim_search) != 0)
(void) nvlist_add_string(in, "path", dmp->dm_sim_file);
topo_hdl_strfree(thp, fmristr);
}
/*
* Try to invoke the method. If this fails (most likely because the
* method is not supported), then ignore this node.
*/
if (topo_method_invoke(node, TOPO_METH_DISK_STATUS,
TOPO_METH_DISK_STATUS_VERSION, in, &result, &err) != 0) {
nvlist_free(fmri);
nvlist_free(in);
return (TOPO_WALK_NEXT);
}
nvlist_free(in);
ena = fmd_event_ena_create(dmp->dm_hdl);
/*
* Add any faults.
*/
if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
nvlist_lookup_string(result, "protocol", &protocol) == 0) {
elem = NULL;
while ((elem = nvlist_next_nvpair(faults, elem)) != NULL) {
if (nvpair_type(elem) != DATA_TYPE_BOOLEAN_VALUE)
continue;
(void) nvpair_value_boolean_value(elem, &fault);
if (!fault ||
nvlist_lookup_nvlist(result, nvpair_name(elem),
&details) != 0)
continue;
dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
nvpair_name(elem), ena, fmri, details);
}
}
nvlist_free(result);
nvlist_free(fmri);
return (TOPO_WALK_NEXT);
}
/*
* Periodic timeout. Iterates over all hc:// topo nodes, calling
* dt_analyze_disk() for each one.
*/
/*ARGSUSED*/
static void
dt_timeout(fmd_hdl_t *hdl, id_t id, void *data)
{
topo_hdl_t *thp;
topo_walk_t *twp;
int err;
disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
dmp->dm_hdl = hdl;
thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC, dt_analyze_disk,
dmp, &err)) == NULL) {
fmd_hdl_topo_rele(hdl, thp);
fmd_hdl_error(hdl, "failed to get topology: %s\n",
topo_strerror(err));
return;
}
if (topo_walk_step(twp, TOPO_WALK_CHILD) == TOPO_WALK_ERR) {
topo_walk_fini(twp);
fmd_hdl_topo_rele(hdl, thp);
fmd_hdl_error(hdl, "failed to walk topology\n");
return;
}
topo_walk_fini(twp);
fmd_hdl_topo_rele(hdl, thp);
dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL, dmp->dm_interval);
dmp->dm_timer_istopo = B_FALSE;
}
/*
* Called when the topology may have changed. We want to examine all disks in
* case a new one has been inserted, but we don't want to overwhelm the system
* in the event of a flurry of topology changes, as most likely only a small
* number of disks are changing. To avoid this, we set the timer for a small
* but non-trivial interval (by default 1 minute), and ignore intervening
* changes during this period. This still gives us a reasonable response time
* to newly inserted devices without overwhelming the system if lots of hotplug
* activity is going on.
*/
/*ARGSUSED*/
static void
dt_topo_change(fmd_hdl_t *hdl, topo_hdl_t *thp)
{
disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
if (dmp->dm_timer_istopo)
return;
fmd_timer_remove(hdl, dmp->dm_timer);
dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL,
fmd_prop_get_int64(hdl, "min-interval"));
dmp->dm_timer_istopo = B_TRUE;
}
static const fmd_prop_t fmd_props[] = {
{ "interval", FMD_TYPE_TIME, "1h" },
{ "min-interval", FMD_TYPE_TIME, "1min" },
{ "simulate", FMD_TYPE_STRING, "" },
{ NULL, 0, NULL }
};
static const fmd_hdl_ops_t fmd_ops = {
NULL, /* fmdo_recv */
dt_timeout, /* fmdo_timeout */
NULL, /* fmdo_close */
NULL, /* fmdo_stats */
NULL, /* fmdo_gc */
NULL, /* fmdo_send */
dt_topo_change, /* fmdo_topo_change */
};
static const fmd_hdl_info_t fmd_info = {
"Disk Transport Agent", "1.0", &fmd_ops, fmd_props
};
void
_fmd_init(fmd_hdl_t *hdl)
{
disk_monitor_t *dmp;
char *simulate;
if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
return;
(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
sizeof (dt_stats) / sizeof (fmd_stat_t),
(fmd_stat_t *)&dt_stats);
dmp = fmd_hdl_zalloc(hdl, sizeof (disk_monitor_t), FMD_SLEEP);
fmd_hdl_setspecific(hdl, dmp);
dmp->dm_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
dmp->dm_interval = fmd_prop_get_int64(hdl, "interval");
/*
* Determine if we have the simulate property set. This property allows
* the developer to substitute a faulty device based off all or part of
* an FMRI string. For example, one could do:
*
* setprop simulate "bay=4/disk=4 /path/to/sim.so"
*
* When the transport module encounters an FMRI containing the given
* string, then it will open the simulator file instead of the
* corresponding device. This can be any file, but is intended to be a
* libdiskstatus simulator shared object, capable of faking up SCSI
* responses.
*
* The property consists of two strings, an FMRI fragment and an
* absolute path, separated by whitespace.
*/
simulate = fmd_prop_get_string(hdl, "simulate");
if (simulate[0] != '\0') {
const char *sep;
size_t len;
for (sep = simulate; *sep != '\0'; sep++) {
if (isspace(*sep))
break;
}
if (*sep != '\0') {
len = sep - simulate;
dmp->dm_sim_search = fmd_hdl_alloc(hdl,
len + 1, FMD_SLEEP);
(void) memcpy(dmp->dm_sim_search, simulate, len);
dmp->dm_sim_search[len] = '\0';
}
for (; *sep != '\0'; sep++) {
if (!isspace(*sep))
break;
}
if (*sep != '\0') {
dmp->dm_sim_file = fmd_hdl_strdup(hdl, sep, FMD_SLEEP);
} else if (dmp->dm_sim_search) {
fmd_hdl_strfree(hdl, dmp->dm_sim_search);
dmp->dm_sim_search = NULL;
}
}
fmd_prop_free_string(hdl, simulate);
/*
* Call our initial timer routine. This will do an initial check of all
* the disks, and then start the periodic timeout.
*/
dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL, 0);
}
void
_fmd_fini(fmd_hdl_t *hdl)
{
disk_monitor_t *dmp;
dmp = fmd_hdl_getspecific(hdl);
if (dmp) {
fmd_xprt_close(hdl, dmp->dm_xprt);
fmd_hdl_strfree(hdl, dmp->dm_sim_search);
fmd_hdl_strfree(hdl, dmp->dm_sim_file);
fmd_hdl_free(hdl, dmp, sizeof (*dmp));
}
}