/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
*/
#include <strings.h>
#include <sys/libdevid.h>
#include <fcntl.h>
#include <libdevinfo.h>
#include <unistd.h>
#include <wait.h>
#include <sys/stat.h>
#include <spawn.h>
#include "fmd_zfs.h"
#define ZR_NOT_FOUND 0 /* keep looking */
#define ZR_VDEV_FOUND 1 /* we found the vdev in the tree */
#define ZR_REPLACING_PARENT 2 /* vdev is a child of a "replacing" vdev */
/*
* This returns true if the vdev in question has a "replacing" vdev as an
* ancestor. As there is no way to work "upwards" through an nvlist,
* we must recurse downwards to determine if the child of any "replacing" vdev
* contains the specified vdev.
*/
static int
vdev_replacing_parent(zpool_handle_t *zhp, char *vdev, nvlist_t *tree)
{
nvlist_t **child;
uint_t children, c;
if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0) {
char *devname =
zpool_vdev_name(NULL, zhp, tree, B_FALSE, B_FALSE);
boolean_t match = (strcmp(devname, vdev) == 0);
free(devname);
/*
* If the vdev names match, we found it. If we return
* ZR_VDEV_FOUND, we'll unwind the stack.
*/
return (match ? ZR_VDEV_FOUND : ZR_NOT_FOUND);
}
/*
* This is an inner vdev, not a leaf vdev. We need to search
* all of its children for the leaf we're interested in.
*/
for (c = 0; c < children; c++) {
char *type;
int result;
/* ignore config errors */
if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE,
&type) != 0)
continue;
/* see if the disk vdev is a child of this vdev */
result = vdev_replacing_parent(zhp, vdev, child[c]);
/*
* If we found the vdev, then determine what to return. If
* we're a replacing, then the vdev is a child of a replacing.
* Otherwise, return the result we got from searching below us.
*/
if (result != ZR_NOT_FOUND)
return (strcmp(type, VDEV_TYPE_REPLACING) == 0 ?
ZR_REPLACING_PARENT : result);
}
/* The vdev couldn't be found at this level of the tree */
return (ZR_NOT_FOUND);
}
/*
* Given a vdev, attempt to replace it with every known spare until one
* succeeds.
*/
/*ARGSUSED*/
int
fmd_zfs_replace_with_spare(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
vdev_type_t vdev_type, nvlist_t *sibling)
{
nvlist_t *config, *nvroot, *replacement;
nvlist_t **spares;
uint_t s, nspares;
char *dev_name;
char *type;
/*
* Don't try and spare something which is already spared or replacing or
* which is a cache or available spare node.
*/
switch (vdev_type) {
case VDEV_IS_AVAIL_SPARE:
case VDEV_IS_CACHE:
case VDEV_IS_SPARED_DATA:
case VDEV_IS_SPARED_LOG:
case VDEV_IS_REPLACING:
return (FMD_ZFS_ITER_CONTINUE);
}
if (vdev == NULL ||
nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) != 0 ||
(strcmp(type, VDEV_TYPE_DISK) != 0 &&
strcmp(type, VDEV_TYPE_FILE) != 0))
return (FMD_ZFS_ITER_CONTINUE);
config = zpool_get_config(zhp, NULL);
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) != 0)
return (FMD_ZFS_ITER_CONTINUE);
/*
* Find out if there are any hot spares available in the pool.
*/
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
&spares, &nspares) != 0)
return (FMD_ZFS_ITER_CONTINUE);
dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE, B_FALSE);
/*
* If the failing device is in the process of replacing a previously
* failed disk, then we will already have allocated a spare for that
* previous disk, and we don't want to allocate another one here. So
* check whether we have an ancestor which is of type "replacing"
* and if so then return.
*/
if (vdev_replacing_parent(zhp, dev_name, nvroot) ==
ZR_REPLACING_PARENT) {
free(dev_name);
return (FMD_ZFS_ITER_CONTINUE);
}
if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) {
free(dev_name);
return (FMD_ZFS_ITER_CONTINUE);
}
(void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
VDEV_TYPE_ROOT);
/*
* Try to replace each spare, ending when we successfully
* replace it.
*/
for (s = 0; s < nspares; s++) {
char *spare_name;
if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
&spare_name) != 0)
continue;
(void) nvlist_add_nvlist_array(replacement,
ZPOOL_CONFIG_CHILDREN, &spares[s], 1);
if (zpool_vdev_attach(zhp, dev_name, spare_name,
replacement, B_TRUE) == 0)
break;
}
free(dev_name);
nvlist_free(replacement);
return (FMD_ZFS_ITER_CONTINUE);
}
/*ARGSUSED*/
int
fmd_zfs_fault_vdev(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
vdev_type_t vdev_type, nvlist_t *sibling)
{
uint64_t vdev_guid;
char *type;
vdev_stat_t *vs;
uint_t c;
if (vdev == NULL ||
nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) != 0 ||
(strcmp(type, VDEV_TYPE_DISK) != 0 &&
strcmp(type, VDEV_TYPE_FILE) != 0))
return (FMD_ZFS_ITER_CONTINUE);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0 && vs->vs_state >= VDEV_STATE_UNUSABLE)
(void) zpool_vdev_fault(zhp, vdev_guid, arg);
return (FMD_ZFS_ITER_CONTINUE);
}
/*ARGSUSED*/
int
fmd_zfs_degrade_vdev(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
vdev_type_t vdev_type, nvlist_t *sibling)
{
uint64_t vdev_guid;
char *type;
vdev_stat_t *vs;
uint_t c;
if (vdev == NULL ||
nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) != 0 ||
(strcmp(type, VDEV_TYPE_DISK) != 0 &&
strcmp(type, VDEV_TYPE_FILE) != 0))
return (FMD_ZFS_ITER_CONTINUE);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0 && vs->vs_state >= VDEV_STATE_DEGRADED)
(void) zpool_vdev_degrade(zhp, vdev_guid, arg);
return (FMD_ZFS_ITER_CONTINUE);
}
/*ARGSUSED*/
int
fmd_zfs_clear_vdev(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
vdev_type_t vdev_type, nvlist_t *sibling)
{
uint64_t vdev_guid;
nvlist_t *policy;
if (vdev == NULL) {
/*
* If no vdev specified, we've been asked to clear the pool.
*/
if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) == 0) {
if (nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND,
ZPOOL_NEVER_REWIND) == 0)
(void) zpool_clear(zhp, NULL, policy, B_TRUE);
nvlist_free(policy);
}
return (FMD_ZFS_ITER_CONTINUE);
}
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
(void) zpool_vdev_clear(zhp, vdev_guid);
return (FMD_ZFS_ITER_CONTINUE);
}
/*ARGSUSED*/
int
fmd_zfs_vdev_state(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
vdev_type_t vdev_type, nvlist_t *sibling)
{
uint32_t *rvalp = (uint32_t *)arg;
vdev_stat_t *vs;
uint_t c;
if (vdev == NULL) {
/*
* If no vdev specified, we've been asked for the state of
* the pool - find state of root node.
*/
nvlist_t *cfg = zpool_get_config(zhp, NULL);
(void) nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev);
}
/*
* update state if less than current value.
*/
if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0 && vs->vs_state < *rvalp)
*rvalp = vs->vs_state;
return (FMD_ZFS_ITER_CONTINUE);
}
/*ARGSUSED*/
int
fmd_zfs_in_config(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
vdev_type_t vdev_type, nvlist_t *sibling)
{
*(int *)arg = 1;
return (FMD_ZFS_ITER_TERMINATE);
}
#define TYPE_DEVID 1
#define TYPE_GUID 2
#define TYPE_LOCATION 3
#define TYPE_DEVPATH 4
#define TYPE_ALL 5
typedef struct zfs_iter_cbdata {
int cb_type;
uint64_t cb_pool_guid;
uint64_t cb_vdev_guid;
char *cb_devid;
char *cb_devpath;
char *cb_chassissn;
char *cb_location;
fmd_zfs_func_t cb_func;
void *cb_arg;
} zfs_iter_cbdata_t;
static int
iter_by_vdev(zpool_handle_t *zhp, nvlist_t *vdev, zfs_iter_cbdata_t *cbp,
vdev_type_t type, nvlist_t *sib)
{
nvlist_t **child;
uint_t c, children;
char *devid, *chassissn, *location, *path;
uint64_t guid;
switch (cbp->cb_type) {
case TYPE_GUID:
if (nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid) == 0 &&
guid == cbp->cb_vdev_guid)
if (cbp->cb_func(zhp, vdev, cbp->cb_arg, type, sib) ==
FMD_ZFS_ITER_TERMINATE)
return (FMD_ZFS_ITER_TERMINATE);
break;
case TYPE_LOCATION:
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_CHASSISSN,
&chassissn) == 0 &&
strcmp(cbp->cb_chassissn, chassissn) == 0 &&
nvlist_lookup_string(vdev, ZPOOL_CONFIG_LOCATION,
&location) == 0 &&
strcmp(cbp->cb_location, location) == 0)
if (cbp->cb_func(zhp, vdev, cbp->cb_arg, type, sib) ==
FMD_ZFS_ITER_TERMINATE)
return (FMD_ZFS_ITER_TERMINATE);
break;
case TYPE_DEVPATH:
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &path) ==
0 && strncmp(cbp->cb_devpath, path,
strlen(cbp->cb_devpath)) == 0 &&
(path[strlen(cbp->cb_devpath)] == '\0' ||
path[strlen(cbp->cb_devpath)] == ':'))
if (cbp->cb_func(zhp, vdev, cbp->cb_arg, type, sib) ==
FMD_ZFS_ITER_TERMINATE)
return (FMD_ZFS_ITER_TERMINATE);
break;
case TYPE_DEVID:
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_DEVID, &devid) ==
0 && devid_str_compare(cbp->cb_devid, devid) == 0)
if (cbp->cb_func(zhp, vdev, cbp->cb_arg, type, sib) ==
FMD_ZFS_ITER_TERMINATE)
return (FMD_ZFS_ITER_TERMINATE);
break;
case TYPE_ALL:
if (cbp->cb_func(zhp, vdev, cbp->cb_arg, type, sib) ==
FMD_ZFS_ITER_TERMINATE)
return (FMD_ZFS_ITER_TERMINATE);
break;
}
if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0)
for (c = 0; c < children; c++) {
vdev_type_t tmp;
uint64_t islog = 0LL;
char *vdevtype = "";
nvlist_t *sibling = NULL;
if (type == VDEV_NULL) {
/*
* If type is VDEV_NULL, this is a top-level
* vdev. See if it is of type log or data.
*/
(void) nvlist_lookup_uint64(vdev,
ZPOOL_CONFIG_IS_LOG, &islog);
tmp = islog ? VDEV_IS_LOG : VDEV_IS_DATA;
} else {
/*
* If the parent node is SPARED or REPLACING
* work out what type of node the child is
* and keep track of the child's sibling.
*/
(void) nvlist_lookup_string(vdev,
ZPOOL_CONFIG_TYPE, &vdevtype);
if (vdevtype && strcmp(vdevtype,
VDEV_TYPE_REPLACING) == 0) {
if (c == 0) {
tmp = VDEV_IS_REPLACING;
sibling = child[1];
} else {
tmp = type;
sibling = child[c - 1];
}
} else if (vdevtype && strcmp(vdevtype,
VDEV_TYPE_SPARE) == 0) {
if (c == 0) {
tmp = (type == VDEV_IS_LOG) ?
VDEV_IS_SPARED_LOG :
VDEV_IS_SPARED_DATA;
sibling = child[1];
} else {
tmp = VDEV_IS_ACTIVE_SPARE;
sibling = child[c - 1];
}
} else
tmp = type;
}
if (iter_by_vdev(zhp, child[c], cbp, tmp, sibling) ==
FMD_ZFS_ITER_TERMINATE)
return (FMD_ZFS_ITER_TERMINATE);
}
if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0)
for (c = 0; c < children; c++)
if (iter_by_vdev(zhp, child[c], cbp, VDEV_IS_CACHE,
NULL) == FMD_ZFS_ITER_TERMINATE)
return (FMD_ZFS_ITER_TERMINATE);
if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_SPARES,
&child, &children) == 0)
for (c = 0; c < children; c++)
if (iter_by_vdev(zhp, child[c], cbp,
VDEV_IS_AVAIL_SPARE, NULL) ==
FMD_ZFS_ITER_TERMINATE)
return (FMD_ZFS_ITER_TERMINATE);
return (FMD_ZFS_ITER_CONTINUE);
}
static int
iter_by_pool(zpool_handle_t *zhp, void *data)
{
zfs_iter_cbdata_t *cbp = data;
nvlist_t *config;
nvlist_t *nvroot;
int rval = FMD_ZFS_ITER_CONTINUE;
if (cbp->cb_type == TYPE_GUID) {
if (cbp->cb_pool_guid != zpool_get_prop_int(zhp,
ZPOOL_PROP_GUID, NULL))
goto done;
if (cbp->cb_vdev_guid == 0ll) {
rval = cbp->cb_func(zhp, NULL, cbp->cb_arg, VDEV_NULL,
NULL);
goto done;
}
}
config = zpool_get_config(zhp, NULL);
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0)
rval = iter_by_vdev(zhp, nvroot, cbp, VDEV_NULL, NULL);
done:
zpool_close(zhp);
return ((rval == FMD_ZFS_ITER_TERMINATE) ? 1 : 0);
}
void
fmd_zfs_iter_by_devid(libzfs_handle_t *zhdl, char *devid, fmd_zfs_func_t func,
void *arg)
{
zfs_iter_cbdata_t cb;
cb.cb_type = TYPE_DEVID;
cb.cb_devid = devid;
cb.cb_func = func;
cb.cb_arg = arg;
(void) zpool_iter(zhdl, iter_by_pool, &cb);
}
void
fmd_zfs_iter_by_location(libzfs_handle_t *zhdl, char *chassissn, char *location,
fmd_zfs_func_t func, void *arg)
{
zfs_iter_cbdata_t cb;
cb.cb_type = TYPE_LOCATION;
cb.cb_chassissn = chassissn;
cb.cb_location = location;
cb.cb_func = func;
cb.cb_arg = arg;
(void) zpool_iter(zhdl, iter_by_pool, &cb);
}
void
fmd_zfs_iter_all(libzfs_handle_t *zhdl, fmd_zfs_func_t func, void *arg)
{
zfs_iter_cbdata_t cb;
cb.cb_type = TYPE_ALL;
cb.cb_func = func;
cb.cb_arg = arg;
(void) zpool_iter(zhdl, iter_by_pool, &cb);
}
void
fmd_zfs_iter_by_devpath(libzfs_handle_t *zhdl, char *devpath,
fmd_zfs_func_t func, void *arg)
{
zfs_iter_cbdata_t cb;
cb.cb_type = TYPE_DEVPATH;
cb.cb_devpath = devpath;
cb.cb_func = func;
cb.cb_arg = arg;
(void) zpool_iter(zhdl, iter_by_pool, &cb);
}
void
fmd_zfs_iter_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid,
uint64_t vdev_guid, fmd_zfs_func_t func, void *arg)
{
zfs_iter_cbdata_t cb;
cb.cb_type = TYPE_GUID;
cb.cb_vdev_guid = vdev_guid;
cb.cb_pool_guid = pool_guid;
cb.cb_func = func;
cb.cb_arg = arg;
(void) zpool_iter(zhdl, iter_by_pool, &cb);
}
#define ZFS_LABEL_EXT "/usr/lib/zfs/labeldisk"
#define ZFS_LABELFAIL_EXT "/usr/lib/zfs/labelfail"
#define ZFS_CHECK_EXT "/usr/lib/zfs/checkdisk"
static int
fmd_zfs_spawn(char **argv)
{
pid_t pid;
int ret;
if (posix_spawn(&pid, argv[0], NULL, NULL, argv, NULL) != 0 ||
waitpid(pid, &ret, 0) != pid ||
!WIFEXITED(ret) || WEXITSTATUS(ret) != 0)
return (-1);
return (0);
}
int
fmd_zfs_autoreplace(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
vdev_type_t type, nvlist_t *sibling)
{
fmd_zfs_cb_t *fzp = (fmd_zfs_cb_t *)arg;
nvlist_t *nvroot = NULL, *newvd = NULL;
uint64_t wholedisk = 0ULL;
char *vdevname = NULL, *origvdevname;
char *xdevid, *devidstr, *savedevidstr = NULL, *minor;
ddi_devid_t devid;
char *devdskpath = NULL, *path = NULL, *physpath = NULL, *xpath;
const char *oldsuffix;
char *argv[4];
size_t len;
int fd;
struct stat64 statbuf;
uint32_t old_pool_state = VDEV_STATE_HEALTHY;
/*
* Ignore vdevs that are already being replaced.
*/
if (type == VDEV_IS_REPLACING)
return (FMD_ZFS_ITER_CONTINUE);
if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL))
goto forcefault;
if ((vdevname = zpool_vdev_name(zpool_get_handle(zhp), zhp, vdev,
B_FALSE, B_FALSE)) == NULL)
goto forcefault;
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
if (strncmp(fzp->fz_path, "/dev/lofi", 9) == 0) {
len = strlen(fzp->fz_physpath);
if ((physpath = malloc(len + 1)) == NULL)
goto forcefault;
(void) strcpy(physpath, fzp->fz_physpath);
len = strlen(fzp->fz_path);
if ((path = malloc(len + 1)) == NULL)
goto forcefault;
(void) strcpy(path, fzp->fz_path);
len = strlen(fzp->fz_path);
if ((devdskpath = malloc(len + 1)) == NULL)
goto forcefault;
(void) strcpy(devdskpath, fzp->fz_path);
devidstr = fzp->fz_devid;
goto do_lofi;
}
/*
* Modify physpath/path/devid to be for the new disk, but make
* sure they have the same slice (if any) as before.
*/
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &xpath) == 0) {
len = strlen(fzp->fz_physpath);
if ((oldsuffix = strchr(xpath, ':')) == NULL)
oldsuffix = "";
if ((physpath = malloc(len + strlen(oldsuffix) + 1)) == NULL)
goto forcefault;
(void) strcpy(physpath, fzp->fz_physpath);
(void) strcpy(physpath + len, oldsuffix);
} else
physpath = strdup(fzp->fz_physpath);
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &xpath) == 0) {
len = strlen(fzp->fz_path);
/* look for suffix - ignoring "/dev/dsk" */
if ((oldsuffix = strchr(&xpath[9], 's')) == NULL &&
(oldsuffix = strchr(&xpath[9], 'p')) == NULL)
oldsuffix = "";
if ((path = malloc(len + strlen(oldsuffix) + 1)) == NULL)
goto forcefault;
(void) strcpy(path, fzp->fz_path);
(void) strcpy(path + len, oldsuffix);
} else
path = strdup(fzp->fz_path);
len = strlen(path) + 10;
if ((devdskpath = malloc(len)) == NULL)
goto forcefault;
(void) snprintf(devdskpath, len, "/dev/dsk/%s", path);
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_DEVID, &xdevid) == 0 &&
devid_str_decode(fzp->fz_devid, &devid, NULL) == 0 &&
devid_str_decode(xdevid, NULL, &minor) == 0) {
savedevidstr = devidstr = devid_str_encode(devid, minor);
devid_free(devid);
devid_str_free(minor);
} else
devidstr = fzp->fz_devid;
/*
* If the 'checkdisk' program exists, then verify that the type of the
* disk inserted matches the type of the disk it is attempting to
* replace.
*/
if (stat64(ZFS_CHECK_EXT, &statbuf) == 0) {
argv[0] = ZFS_CHECK_EXT;
argv[1] = wholedisk ? fzp->fz_path : path;
argv[2] = type == VDEV_IS_CACHE ? "cache" :
(type == VDEV_IS_LOG || type == VDEV_IS_SPARED_LOG) ?
"log" : "data";
argv[3] = NULL;
if (fmd_zfs_spawn(argv) != 0)
goto forcefault;
}
/*
* Before we attempt a replacement, we zero out the labels. While
* unlikely, it's possible that we are re-inserting a stale disk that
* was offlined, and its old label is going to conflict with what's
* currently in the config. But don't do this for cache or active
* spare devices as there's no zpool_vdev_attach to recreate the labels.
*/
if (type != VDEV_IS_CACHE && type != VDEV_IS_AVAIL_SPARE) {
if ((fd = open(devdskpath, O_RDWR)) >= 0) {
(void) zpool_clear_label(fd);
(void) fsync(fd);
(void) close(fd);
}
}
/*
* If this is a request to label a whole disk, then attempt to
* write out the label. If it is not a whole disk, and the
* external 'labeldisk' program exists, then invoke this
* program to label the disk.
*
* If any part of this process fails, then do a force
* a hot spare replacement).
*/
if (wholedisk) {
if (zpool_label_disk(zpool_get_handle(zhp), zhp,
fzp->fz_path, ZPOOL_LABEL_MATCH_REQ_PART, NULL) != 0) {
if (stat64(ZFS_LABELFAIL_EXT, &statbuf) == 0) {
argv[0] = ZFS_LABELFAIL_EXT;
argv[1] = fzp->fz_path;
argv[2] = NULL;
(void) fmd_zfs_spawn(argv);
}
goto forcefault;
}
} else {
if (stat64(ZFS_LABEL_EXT, &statbuf) == 0) {
argv[0] = ZFS_LABEL_EXT;
argv[1] = path;
argv[2] = NULL;
if (fmd_zfs_spawn(argv) != 0) {
if (stat64(ZFS_LABELFAIL_EXT,
&statbuf) == 0) {
argv[0] = ZFS_LABELFAIL_EXT;
argv[1] = path;
argv[2] = NULL;
(void) fmd_zfs_spawn(argv);
}
goto forcefault;
}
}
}
do_lofi:
/*
* Construct the root vdev to pass to zpool_vdev_attach(). While adding
* the entire vdev structure is harmless, we construct a reduced set of
* path/physpath/wholedisk to keep it simple.
*/
if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 ||
nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0 ||
nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, devdskpath) != 0 ||
(fzp->fz_devchassis != NULL && nvlist_add_string(newvd,
ZPOOL_CONFIG_DEVCHASSIS, fzp->fz_devchassis) != 0) ||
(fzp->fz_chassissn != NULL && nvlist_add_string(newvd,
ZPOOL_CONFIG_CHASSISSN, fzp->fz_chassissn) != 0) ||
(fzp->fz_location != NULL && nvlist_add_string(newvd,
ZPOOL_CONFIG_LOCATION, fzp->fz_location) != 0) ||
nvlist_add_string(newvd, ZPOOL_CONFIG_PHYS_PATH, physpath) != 0 ||
(devidstr != NULL &&
nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, devidstr) != 0) ||
nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
((type == VDEV_IS_LOG || type == VDEV_IS_SPARED_LOG) &&
nvlist_add_uint64(newvd, ZPOOL_CONFIG_IS_LOG, B_TRUE) != 0) ||
nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0)
goto forcefault;
(void) fmd_zfs_vdev_state(zhp, NULL, &old_pool_state, type, sibling);
if (type == VDEV_IS_CACHE) {
/*
* Do a remove/add pair instead of a zpool_vdev_attach().
*/
if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
&newvd, 1) != 0)
goto forcefault;
if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
NULL, 0) != 0)
goto forcefault;
if (zpool_vdev_remove(zhp, vdevname) == 0)
(void) zpool_add(zhp, nvroot);
} else if (type == VDEV_IS_AVAIL_SPARE) {
/*
* Do a remove/add pair instead of a zpool_vdev_attach().
*/
if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
&newvd, 1) != 0)
goto forcefault;
if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
NULL, 0) != 0)
goto forcefault;
if (zpool_vdev_remove(zhp, vdevname) == 0)
(void) zpool_add(zhp, nvroot);
} else if (type == VDEV_IS_ACTIVE_SPARE) {
/*
* Do a detach of the active spare first.
*/
if (zpool_vdev_detach(zhp, vdevname) != 0)
goto forcefault;
/*
* Now remove the old hot spare and add the new hot spare.
*/
if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
&newvd, 1) != 0)
goto forcefault;
if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
NULL, 0) != 0)
goto forcefault;
if (zpool_vdev_remove(zhp, vdevname) != 0)
goto forcefault;
if (zpool_add(zhp, nvroot) != 0)
goto forcefault;
/*
* Now do the attach of the new active spare.
*/
if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
NULL, 0) != 0)
goto forcefault;
if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&newvd, 1) != 0)
goto forcefault;
origvdevname = zpool_vdev_name(zpool_get_handle(zhp), zhp,
sibling, B_FALSE, B_FALSE);
(void) zpool_vdev_attach(zhp, origvdevname,
devdskpath, nvroot, B_TRUE);
free(origvdevname);
} else {
if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&newvd, 1) != 0)
goto forcefault;
(void) zpool_vdev_attach(zhp, vdevname, devdskpath, nvroot,
B_TRUE);
}
/*
* If this action might have made the pool usable again then enable
* datasets.
*/
if (old_pool_state < VDEV_STATE_DEGRADED)
(void) zpool_enable_datasets(zhp, NULL, 0);
out:
if (nvroot)
nvlist_free(nvroot);
if (newvd)
nvlist_free(newvd);
if (physpath)
free(physpath);
if (path)
free(path);
if (devdskpath)
free(devdskpath);
if (vdevname)
free(vdevname);
if (savedevidstr)
devid_str_free(savedevidstr);
return (FMD_ZFS_ITER_CONTINUE);
forcefault:
/*
* If we are being called after an fmd restart, we may have missed
* a check event. So force a fault here by doing a re-online of the
* original device - which we know will fail.
*/
if (fzp->fz_isrestart)
(void) fmd_zfs_reonline(zhp, vdev, NULL, type, sibling);
goto out;
}
/*ARGSUSED*/
int
fmd_zfs_reonline(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
vdev_type_t type, nvlist_t *sibling)
{
char *vdevname;
vdev_state_t newstate;
char *sibvdevname;
uint32_t old_pool_state = VDEV_STATE_HEALTHY;
if (type == VDEV_IS_AVAIL_SPARE)
return (FMD_ZFS_ITER_CONTINUE);
if ((vdevname = zpool_vdev_name(zpool_get_handle(zhp), zhp, vdev,
B_FALSE, B_FALSE)) == NULL)
return (FMD_ZFS_ITER_CONTINUE);
(void) fmd_zfs_vdev_state(zhp, NULL, &old_pool_state, type, sibling);
if (zpool_vdev_online(zhp, vdevname, ZFS_ONLINE_AUTO, &newstate) == 0 &&
(newstate == VDEV_STATE_HEALTHY ||
newstate == VDEV_STATE_DEGRADED)) {
/*
* If this action might have made the pool usable again then
* enable datasets.
*/
if (old_pool_state < VDEV_STATE_DEGRADED)
(void) zpool_enable_datasets(zhp, NULL, 0);
if (newstate == VDEV_STATE_HEALTHY &&
(type == VDEV_IS_SPARED_DATA ||
type == VDEV_IS_SPARED_LOG)) {
/*
* If there was a hot spare activated, then detach the
* spare now rather than waiting for resilvering to
* complete. If we don't do this, then we are forced to
* wait for the spare itself to resilver, which can be
* orders of magnitude longer than the original disk
* (which is only missing a small amount of data). Note
* that if we are in the rare situation where the spare
* has the only valid copy of some data then the
* zpool_vdev_detach() will simply fail.
*/
sibvdevname = zpool_vdev_name(zpool_get_handle(zhp),
zhp, sibling, B_FALSE, B_FALSE);
if (sibvdevname != NULL) {
(void) zpool_vdev_detach(zhp, sibvdevname);
free(sibvdevname);
}
}
} else if (arg != NULL) {
/*
* Only have non-null arg for LOFI devices.
*/
fmd_zfs_cb_t *fzp = (fmd_zfs_cb_t *)arg;
(void) fmd_zfs_autoreplace(zhp, vdev, fzp, type, sibling);
}
free(vdevname);
return (FMD_ZFS_ITER_CONTINUE);
}
/*ARGSUSED*/
int
fmd_zfs_set_location(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
vdev_type_t type, nvlist_t *sibling)
{
fmd_zfs_cb_t *fzp = (fmd_zfs_cb_t *)arg;
uint64_t vdev_guid;
if (nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid) == 0)
(void) zpool_location_set(zhp, vdev_guid, fzp->fz_devchassis,
fzp->fz_chassissn, fzp->fz_location);
return (FMD_ZFS_ITER_CONTINUE);
}
/*ARGSUSED*/
int
fmd_zfs_autoexpand(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
vdev_type_t type, nvlist_t *sibling)
{
char *path, fullpath[MAXPATHLEN];
uint64_t wholedisk = 0ULL;
vdev_state_t newstate;
if (type == VDEV_IS_AVAIL_SPARE)
return (FMD_ZFS_ITER_CONTINUE);
(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
(void) strlcpy(fullpath, path, sizeof (fullpath));
if (wholedisk)
fullpath[strlen(fullpath) - 2] = '\0';
if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL) &&
zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
(void) zpool_vdev_online(zhp, fullpath,
ZFS_ONLINE_AUTO | ZFS_ONLINE_EXPAND, &newstate);
return (FMD_ZFS_ITER_CONTINUE);
}