zfs_de.c revision c5904d138f3bdf0762dbf452a43d5a5c387ea6a8
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <assert.h>
#include <stddef.h>
#include <strings.h>
#include <libuutil.h>
#include <libzfs.h>
/*
* Our serd engines are named 'zfs_<pool_guid>_<vdev_guid>_{checksum,io}'. This
* #define reserves enough space for two 64-bit hex values plus the length of
* the longest string.
*/
typedef struct zfs_case_data {
int zc_has_timer;
int zc_pool_state;
char zc_serd_checksum[MAX_SERDLEN];
char zc_serd_io[MAX_SERDLEN];
int zc_has_serd_timer;
typedef struct zfs_case {
} zfs_case_t;
#define CASE_DATA "data"
#define CASE_DATA_VERSION_INITIAL 1
#define CASE_DATA_VERSION_SERD 2
static hrtime_t zfs_case_timeout;
static hrtime_t zfs_serd_timeout;
/*
* Write out the persistent representation of an active case.
*/
static void
{
/*
* Always update cases to the latest version, even if they were the
* previous version when unserialized.
*/
}
/*
* Read back the persistent representation of an active case.
*/
static zfs_case_t *
{
return (NULL);
}
/*
* fmd_buf_read() will have already zeroed out the remainder of the
* buffer, so we don't have to do anything special if the version
* doesn't include the SERD engine name.
*/
return (zcp);
}
/*
* Iterate over any active cases. If any cases are associated with a pool or
* vdev which is no longer present on the system, close the associated case.
*/
static void
{
int ret;
/*
* Mark any cases associated with this (pool, vdev) pair.
*/
}
/*
* Iterate over all children.
*/
&children) != 0) {
for (c = 0; c < children; c++)
}
}
/*ARGSUSED*/
static int
{
int ret;
/*
* Mark any cases associated with just this pool.
*/
}
return (-1);
}
return (0);
}
static void
{
/*
* There is no way to open a pool by GUID, or lookup a vdev by GUID. No
* matter what we do, we're going to have to stomach a O(vdevs * cases)
* algorithm. In reality, both quantities are likely so small that
* neither will matter. Given that iterating over pools is more
* expensive than iterating over the in-memory case list, we opt for a
* 'present' flag in each case that starts off cleared. We then iterate
* over all pools, marking those that are still present, and removing
* those that aren't found.
*
* Note that we could also construct an FMRI and rely on
* fmd_nvl_fmri_present(), but this would end up doing the same search.
*/
/*
* Mark the cases an not present.
*/
/*
* Iterate over all pools and mark the pools and vdevs found. If this
* fails (most probably because we're out of memory), then don't close
* any of the cases and we cannot be sure they are accurate.
*/
return;
/*
* Remove those cases which were not found.
*/
if (!zcp->zc_present)
}
}
/*
* checksum).
*/
static void
const char *type)
{
}
/*
* Solve a given ZFS case. This first checks to make sure the diagnosis is
* still valid, as well as cleaning up any pending timer associated with the
* case.
*/
static void
{
/*
* Construct the detector from the case data. The detector is in the
* ZFS scheme, and is either the pool or the vdev, depending on whether
* this is a vdev or pool fault.
*/
return;
FM_FMRI_SCHEME_ZFS) != 0 ||
return;
}
/*
* We also want to make sure that the detector (pool or vdev) properly
* reflects the diagnosed state, when the fault corresponds to internal
* ZFS state (i.e. not checksum or I/O error-induced). Otherwise, a
* wasn't available) and is now healthy will be mis-diagnosed.
*/
return;
}
detector);
}
}
if (serialize)
}
/*
* Main fmd entry point.
*/
/*ARGSUSED*/
static void
{
const char *serd;
if (isresource) {
/*
* For resources, we don't have a normal payload.
*/
&vdev_guid) != 0)
else
} else {
(void) nvlist_lookup_nvlist(nvl,
(void) nvlist_lookup_int32(nvl,
}
/*
* We also ignore all ereports generated during an import of a pool,
* since the only possible fault (.pool) would result in import failure,
* and hence no persistent fault. Some day we may want to do something
* with these ereports, so we continue generating them internally.
*/
if (pool_state == SPA_LOAD_IMPORT)
return;
/*
* Device I/O errors are ignored during pool open.
*/
if (pool_state == SPA_LOAD_OPEN &&
return;
/*
* Determine if this ereport corresponds to an open case. Cases are
* indexed by ENA, since ZFS does all the work of chaining together
* related ereports.
*
* We also detect if an ereport corresponds to an open case by context,
* such as:
*
* - An error occurred during an open of a pool with an existing
* case.
*
* - An error occurred for a device which already has an open
* case.
*/
(void) nvlist_lookup_uint64(nvl,
if (nvlist_lookup_uint64(nvl,
vdev_guid = 0;
ena = 0;
/*
* Matches a known ENA.
*/
break;
/*
* Matches a case involving load errors for this same pool.
*/
break;
/*
* Device errors for the same device.
*/
break;
}
fmd_case_t *cs;
zfs_case_data_t data = { 0 };
/*
* If this is one of our 'fake' resource ereports, and there is
* no case open, simply discard it.
*/
if (isresource)
return;
/*
* Open a new case.
*/
/*
* Initialize the case buffer. To commonize code, we actually
* create the buffer with existing data, and then call
* zfs_case_unserialize() to instantiate the in-core structure.
*/
sizeof (zfs_case_data_t));
}
if (isresource) {
"resource.fs.zfs.autoreplace")) {
/*
* The 'resource.fs.zfs.autoreplace' event indicates
* that the pool was loaded with the 'autoreplace'
* property set. In this case, any pending device
* failures should be ignored, as the asynchronous
* autoreplace handling will take care of them.
*/
"resource.fs.zfs.removed")) {
/*
* The 'resource.fs.zfs.removed' event indicates that
* device removal was detected, and the device was
* closed asynchronously. If this is the case, we
* assume that any recent I/O errors were due to the
* device removal, not any fault of the device itself.
* We reset the SERD engine, and cancel any pending
* timers.
*/
}
}
return;
}
/*
* Associate the ereport with this case.
*/
/*
* Don't do anything else if this case is already solved.
*/
return;
/*
* Determine if we should solve the case and generate a fault. We solve
* a case if:
*
* a. A pool failed to open (ereport.fs.zfs.pool)
* b. A device failed to open (ereport.fs.zfs.pool) while a pool
* was up and running.
*
* We may see a series of ereports associated with a pool open, all
* chained together by the same ENA. If the pool open succeeds, then
* we'll see no further ereports. To detect when a pool open has
* succeeded, we associate a timer with the event. When it expires, we
* close the case.
*/
/*
* Pool level fault. Before solving the case, go through and
* close any open device cases that may be pending.
*/
}
/*
* Device fault. If this occurred during pool open, then defer
* reporting the fault. If the pool itself could not be opeend,
* we only report the pool fault, not every device fault that
* may have caused the problem. If we do not see a pool fault
* within the timeout period, then we'll solve the device case.
*/
/*
* If this is a checksum or I/O error, then toss it into the
* appropriate SERD engine and check to see if it has fired.
* Ideally, we want to do something more sophisticated,
* (persistent errors for a single data block, etc). For now,
* a single SERD engine is sufficient.
*/
}
"ereport.fs.zfs.checksum")) {
}
FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE, &failmode) == 0) &&
strlen(FM_EREPORT_FAILMODE_CONTINUE)) == 0) {
"fault.fs.zfs.io_failure_continue",
B_FALSE);
strlen(FM_EREPORT_FAILMODE_WAIT)) == 0) {
"fault.fs.zfs.io_failure_wait", B_FALSE);
}
}
/*
* Because I/O errors may be due to device removal, we postpone
* any diagnosis until we're sure that we aren't about to
* receive a 'resource.fs.zfs.removed' event.
*/
}
}
}
}
/*
* Timeout indicates one of two scenarios:
*
* - A device could not be opened while opening a pool, but the pool
* itself was opened successfully.
*
* - We diagnosed an I/O error, and it was not due to device removal (which
* would cause the timeout to be cancelled).
*/
/* ARGSUSED */
static void
{
const char *faultname;
}
faultname = "fault.fs.zfs.vdev.io";
} else {
faultname = "fault.fs.zfs.vdev.checksum";
}
}
}
static void
{
}
/*
* We use the fmd gc entry point to look for old cases that no longer apply.
* This allows us to keep our set of case data small in a long running system.
*/
static void
{
}
static const fmd_hdl_ops_t fmd_ops = {
zfs_fm_recv, /* fmdo_recv */
zfs_fm_timeout, /* fmdo_timeout */
zfs_fm_close, /* fmdo_close */
NULL, /* fmdo_stats */
zfs_fm_gc, /* fmdo_gc */
};
static const fmd_prop_t fmd_props[] = {
};
static const fmd_hdl_info_t fmd_info = {
};
void
{
fmd_case_t *cp;
return;
return;
}
return;
}
return;
}
/*
* Iterate over all active cases and unserialize the associated buffers,
* adding them to our list of open cases.
*/
/*
* Clear out any old cases that are no longer valid.
*/
}
void
{
/*
* Remove all active cases.
*/
}
}