2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A/*
2N/A * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
2N/A */
2N/A#include <strings.h>
2N/A#include <sys/libdevid.h>
2N/A#include <fcntl.h>
2N/A#include <libdevinfo.h>
2N/A#include <unistd.h>
2N/A#include <wait.h>
2N/A#include <sys/stat.h>
2N/A#include <spawn.h>
2N/A#include "fmd_zfs.h"
2N/A
2N/A#define ZR_NOT_FOUND 0 /* keep looking */
2N/A#define ZR_VDEV_FOUND 1 /* we found the vdev in the tree */
2N/A#define ZR_REPLACING_PARENT 2 /* vdev is a child of a "replacing" vdev */
2N/A
2N/A/*
2N/A * This returns true if the vdev in question has a "replacing" vdev as an
2N/A * ancestor. As there is no way to work "upwards" through an nvlist,
2N/A * we must recurse downwards to determine if the child of any "replacing" vdev
2N/A * contains the specified vdev.
2N/A */
2N/Astatic int
2N/Avdev_replacing_parent(zpool_handle_t *zhp, char *vdev, nvlist_t *tree)
2N/A{
2N/A nvlist_t **child;
2N/A uint_t children, c;
2N/A
2N/A if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
2N/A &child, &children) != 0) {
2N/A char *devname =
2N/A zpool_vdev_name(NULL, zhp, tree, B_FALSE, B_FALSE);
2N/A boolean_t match = (strcmp(devname, vdev) == 0);
2N/A
2N/A free(devname);
2N/A
2N/A /*
2N/A * If the vdev names match, we found it. If we return
2N/A * ZR_VDEV_FOUND, we'll unwind the stack.
2N/A */
2N/A return (match ? ZR_VDEV_FOUND : ZR_NOT_FOUND);
2N/A }
2N/A
2N/A /*
2N/A * This is an inner vdev, not a leaf vdev. We need to search
2N/A * all of its children for the leaf we're interested in.
2N/A */
2N/A for (c = 0; c < children; c++) {
2N/A char *type;
2N/A int result;
2N/A
2N/A /* ignore config errors */
2N/A if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE,
2N/A &type) != 0)
2N/A continue;
2N/A
2N/A /* see if the disk vdev is a child of this vdev */
2N/A result = vdev_replacing_parent(zhp, vdev, child[c]);
2N/A
2N/A /*
2N/A * If we found the vdev, then determine what to return. If
2N/A * we're a replacing, then the vdev is a child of a replacing.
2N/A * Otherwise, return the result we got from searching below us.
2N/A */
2N/A if (result != ZR_NOT_FOUND)
2N/A return (strcmp(type, VDEV_TYPE_REPLACING) == 0 ?
2N/A ZR_REPLACING_PARENT : result);
2N/A }
2N/A
2N/A /* The vdev couldn't be found at this level of the tree */
2N/A return (ZR_NOT_FOUND);
2N/A}
2N/A
2N/A/*
2N/A * Given a vdev, attempt to replace it with every known spare until one
2N/A * succeeds.
2N/A */
2N/A/*ARGSUSED*/
2N/Aint
2N/Afmd_zfs_replace_with_spare(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
2N/A vdev_type_t vdev_type, nvlist_t *sibling)
2N/A{
2N/A nvlist_t *config, *nvroot, *replacement;
2N/A nvlist_t **spares;
2N/A uint_t s, nspares;
2N/A char *dev_name;
2N/A char *type;
2N/A
2N/A /*
2N/A * Don't try and spare something which is already spared or replacing or
2N/A * which is a cache or available spare node.
2N/A */
2N/A switch (vdev_type) {
2N/A case VDEV_IS_AVAIL_SPARE:
2N/A case VDEV_IS_CACHE:
2N/A case VDEV_IS_SPARED_DATA:
2N/A case VDEV_IS_SPARED_LOG:
2N/A case VDEV_IS_REPLACING:
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A }
2N/A
2N/A if (vdev == NULL ||
2N/A nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2N/A (strcmp(type, VDEV_TYPE_DISK) != 0 &&
2N/A strcmp(type, VDEV_TYPE_FILE) != 0))
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A
2N/A config = zpool_get_config(zhp, NULL);
2N/A if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2N/A &nvroot) != 0)
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A
2N/A /*
2N/A * Find out if there are any hot spares available in the pool.
2N/A */
2N/A if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
2N/A &spares, &nspares) != 0)
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A
2N/A dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE, B_FALSE);
2N/A
2N/A /*
2N/A * If the failing device is in the process of replacing a previously
2N/A * failed disk, then we will already have allocated a spare for that
2N/A * previous disk, and we don't want to allocate another one here. So
2N/A * check whether we have an ancestor which is of type "replacing"
2N/A * and if so then return.
2N/A */
2N/A if (vdev_replacing_parent(zhp, dev_name, nvroot) ==
2N/A ZR_REPLACING_PARENT) {
2N/A free(dev_name);
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A }
2N/A
2N/A if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) {
2N/A free(dev_name);
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A }
2N/A
2N/A (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
2N/A VDEV_TYPE_ROOT);
2N/A
2N/A /*
2N/A * Try to replace each spare, ending when we successfully
2N/A * replace it.
2N/A */
2N/A for (s = 0; s < nspares; s++) {
2N/A char *spare_name;
2N/A
2N/A if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
2N/A &spare_name) != 0)
2N/A continue;
2N/A
2N/A (void) nvlist_add_nvlist_array(replacement,
2N/A ZPOOL_CONFIG_CHILDREN, &spares[s], 1);
2N/A
2N/A if (zpool_vdev_attach(zhp, dev_name, spare_name,
2N/A replacement, B_TRUE) == 0)
2N/A break;
2N/A }
2N/A
2N/A free(dev_name);
2N/A nvlist_free(replacement);
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A}
2N/A
2N/A/*ARGSUSED*/
2N/Aint
2N/Afmd_zfs_fault_vdev(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
2N/A vdev_type_t vdev_type, nvlist_t *sibling)
2N/A{
2N/A uint64_t vdev_guid;
2N/A char *type;
2N/A vdev_stat_t *vs;
2N/A uint_t c;
2N/A
2N/A if (vdev == NULL ||
2N/A nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2N/A (strcmp(type, VDEV_TYPE_DISK) != 0 &&
2N/A strcmp(type, VDEV_TYPE_FILE) != 0))
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A
2N/A (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
2N/A if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
2N/A (uint64_t **)&vs, &c) == 0 && vs->vs_state >= VDEV_STATE_UNUSABLE)
2N/A (void) zpool_vdev_fault(zhp, vdev_guid, arg);
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A}
2N/A
2N/A/*ARGSUSED*/
2N/Aint
2N/Afmd_zfs_degrade_vdev(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
2N/A vdev_type_t vdev_type, nvlist_t *sibling)
2N/A{
2N/A uint64_t vdev_guid;
2N/A char *type;
2N/A vdev_stat_t *vs;
2N/A uint_t c;
2N/A
2N/A if (vdev == NULL ||
2N/A nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2N/A (strcmp(type, VDEV_TYPE_DISK) != 0 &&
2N/A strcmp(type, VDEV_TYPE_FILE) != 0))
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A
2N/A (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
2N/A if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
2N/A (uint64_t **)&vs, &c) == 0 && vs->vs_state >= VDEV_STATE_DEGRADED)
2N/A (void) zpool_vdev_degrade(zhp, vdev_guid, arg);
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A}
2N/A
2N/A/*ARGSUSED*/
2N/Aint
2N/Afmd_zfs_clear_vdev(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
2N/A vdev_type_t vdev_type, nvlist_t *sibling)
2N/A{
2N/A uint64_t vdev_guid;
2N/A nvlist_t *policy;
2N/A
2N/A if (vdev == NULL) {
2N/A /*
2N/A * If no vdev specified, we've been asked to clear the pool.
2N/A */
2N/A if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) == 0) {
2N/A if (nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND,
2N/A ZPOOL_NEVER_REWIND) == 0)
2N/A (void) zpool_clear(zhp, NULL, policy, B_TRUE);
2N/A nvlist_free(policy);
2N/A }
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A }
2N/A (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
2N/A (void) zpool_vdev_clear(zhp, vdev_guid);
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A}
2N/A
2N/A/*ARGSUSED*/
2N/Aint
2N/Afmd_zfs_vdev_state(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
2N/A vdev_type_t vdev_type, nvlist_t *sibling)
2N/A{
2N/A uint32_t *rvalp = (uint32_t *)arg;
2N/A vdev_stat_t *vs;
2N/A uint_t c;
2N/A
2N/A if (vdev == NULL) {
2N/A /*
2N/A * If no vdev specified, we've been asked for the state of
2N/A * the pool - find state of root node.
2N/A */
2N/A nvlist_t *cfg = zpool_get_config(zhp, NULL);
2N/A (void) nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev);
2N/A }
2N/A
2N/A /*
2N/A * update state if less than current value.
2N/A */
2N/A if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
2N/A (uint64_t **)&vs, &c) == 0 && vs->vs_state < *rvalp)
2N/A *rvalp = vs->vs_state;
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A}
2N/A
2N/A/*ARGSUSED*/
2N/Aint
2N/Afmd_zfs_in_config(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
2N/A vdev_type_t vdev_type, nvlist_t *sibling)
2N/A{
2N/A *(int *)arg = 1;
2N/A return (FMD_ZFS_ITER_TERMINATE);
2N/A}
2N/A
2N/A#define TYPE_DEVID 1
2N/A#define TYPE_GUID 2
2N/A#define TYPE_LOCATION 3
2N/A#define TYPE_DEVPATH 4
2N/A#define TYPE_ALL 5
2N/A
2N/Atypedef struct zfs_iter_cbdata {
2N/A int cb_type;
2N/A uint64_t cb_pool_guid;
2N/A uint64_t cb_vdev_guid;
2N/A char *cb_devid;
2N/A char *cb_devpath;
2N/A char *cb_chassissn;
2N/A char *cb_location;
2N/A fmd_zfs_func_t cb_func;
2N/A void *cb_arg;
2N/A} zfs_iter_cbdata_t;
2N/A
2N/Astatic int
2N/Aiter_by_vdev(zpool_handle_t *zhp, nvlist_t *vdev, zfs_iter_cbdata_t *cbp,
2N/A vdev_type_t type, nvlist_t *sib)
2N/A{
2N/A nvlist_t **child;
2N/A uint_t c, children;
2N/A char *devid, *chassissn, *location, *path;
2N/A uint64_t guid;
2N/A
2N/A switch (cbp->cb_type) {
2N/A case TYPE_GUID:
2N/A if (nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid) == 0 &&
2N/A guid == cbp->cb_vdev_guid)
2N/A if (cbp->cb_func(zhp, vdev, cbp->cb_arg, type, sib) ==
2N/A FMD_ZFS_ITER_TERMINATE)
2N/A return (FMD_ZFS_ITER_TERMINATE);
2N/A break;
2N/A case TYPE_LOCATION:
2N/A if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_CHASSISSN,
2N/A &chassissn) == 0 &&
2N/A strcmp(cbp->cb_chassissn, chassissn) == 0 &&
2N/A nvlist_lookup_string(vdev, ZPOOL_CONFIG_LOCATION,
2N/A &location) == 0 &&
2N/A strcmp(cbp->cb_location, location) == 0)
2N/A if (cbp->cb_func(zhp, vdev, cbp->cb_arg, type, sib) ==
2N/A FMD_ZFS_ITER_TERMINATE)
2N/A return (FMD_ZFS_ITER_TERMINATE);
2N/A break;
2N/A case TYPE_DEVPATH:
2N/A if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &path) ==
2N/A 0 && strncmp(cbp->cb_devpath, path,
2N/A strlen(cbp->cb_devpath)) == 0 &&
2N/A (path[strlen(cbp->cb_devpath)] == '\0' ||
2N/A path[strlen(cbp->cb_devpath)] == ':'))
2N/A if (cbp->cb_func(zhp, vdev, cbp->cb_arg, type, sib) ==
2N/A FMD_ZFS_ITER_TERMINATE)
2N/A return (FMD_ZFS_ITER_TERMINATE);
2N/A break;
2N/A case TYPE_DEVID:
2N/A if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_DEVID, &devid) ==
2N/A 0 && devid_str_compare(cbp->cb_devid, devid) == 0)
2N/A if (cbp->cb_func(zhp, vdev, cbp->cb_arg, type, sib) ==
2N/A FMD_ZFS_ITER_TERMINATE)
2N/A return (FMD_ZFS_ITER_TERMINATE);
2N/A break;
2N/A case TYPE_ALL:
2N/A if (cbp->cb_func(zhp, vdev, cbp->cb_arg, type, sib) ==
2N/A FMD_ZFS_ITER_TERMINATE)
2N/A return (FMD_ZFS_ITER_TERMINATE);
2N/A break;
2N/A }
2N/A
2N/A if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
2N/A &child, &children) == 0)
2N/A for (c = 0; c < children; c++) {
2N/A vdev_type_t tmp;
2N/A uint64_t islog = 0LL;
2N/A char *vdevtype = "";
2N/A nvlist_t *sibling = NULL;
2N/A
2N/A if (type == VDEV_NULL) {
2N/A /*
2N/A * If type is VDEV_NULL, this is a top-level
2N/A * vdev. See if it is of type log or data.
2N/A */
2N/A (void) nvlist_lookup_uint64(vdev,
2N/A ZPOOL_CONFIG_IS_LOG, &islog);
2N/A tmp = islog ? VDEV_IS_LOG : VDEV_IS_DATA;
2N/A } else {
2N/A /*
2N/A * If the parent node is SPARED or REPLACING
2N/A * work out what type of node the child is
2N/A * and keep track of the child's sibling.
2N/A */
2N/A (void) nvlist_lookup_string(vdev,
2N/A ZPOOL_CONFIG_TYPE, &vdevtype);
2N/A if (vdevtype && strcmp(vdevtype,
2N/A VDEV_TYPE_REPLACING) == 0) {
2N/A if (c == 0) {
2N/A tmp = VDEV_IS_REPLACING;
2N/A sibling = child[1];
2N/A } else {
2N/A tmp = type;
2N/A sibling = child[c - 1];
2N/A }
2N/A } else if (vdevtype && strcmp(vdevtype,
2N/A VDEV_TYPE_SPARE) == 0) {
2N/A if (c == 0) {
2N/A tmp = (type == VDEV_IS_LOG) ?
2N/A VDEV_IS_SPARED_LOG :
2N/A VDEV_IS_SPARED_DATA;
2N/A sibling = child[1];
2N/A } else {
2N/A tmp = VDEV_IS_ACTIVE_SPARE;
2N/A sibling = child[c - 1];
2N/A }
2N/A } else
2N/A tmp = type;
2N/A }
2N/A if (iter_by_vdev(zhp, child[c], cbp, tmp, sibling) ==
2N/A FMD_ZFS_ITER_TERMINATE)
2N/A return (FMD_ZFS_ITER_TERMINATE);
2N/A }
2N/A
2N/A if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE,
2N/A &child, &children) == 0)
2N/A for (c = 0; c < children; c++)
2N/A if (iter_by_vdev(zhp, child[c], cbp, VDEV_IS_CACHE,
2N/A NULL) == FMD_ZFS_ITER_TERMINATE)
2N/A return (FMD_ZFS_ITER_TERMINATE);
2N/A
2N/A if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_SPARES,
2N/A &child, &children) == 0)
2N/A for (c = 0; c < children; c++)
2N/A if (iter_by_vdev(zhp, child[c], cbp,
2N/A VDEV_IS_AVAIL_SPARE, NULL) ==
2N/A FMD_ZFS_ITER_TERMINATE)
2N/A return (FMD_ZFS_ITER_TERMINATE);
2N/A
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A}
2N/A
2N/Astatic int
2N/Aiter_by_pool(zpool_handle_t *zhp, void *data)
2N/A{
2N/A zfs_iter_cbdata_t *cbp = data;
2N/A nvlist_t *config;
2N/A nvlist_t *nvroot;
2N/A int rval = FMD_ZFS_ITER_CONTINUE;
2N/A
2N/A if (cbp->cb_type == TYPE_GUID) {
2N/A if (cbp->cb_pool_guid != zpool_get_prop_int(zhp,
2N/A ZPOOL_PROP_GUID, NULL))
2N/A goto done;
2N/A if (cbp->cb_vdev_guid == 0ll) {
2N/A rval = cbp->cb_func(zhp, NULL, cbp->cb_arg, VDEV_NULL,
2N/A NULL);
2N/A goto done;
2N/A }
2N/A }
2N/A config = zpool_get_config(zhp, NULL);
2N/A if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2N/A &nvroot) == 0)
2N/A rval = iter_by_vdev(zhp, nvroot, cbp, VDEV_NULL, NULL);
2N/Adone:
2N/A zpool_close(zhp);
2N/A return ((rval == FMD_ZFS_ITER_TERMINATE) ? 1 : 0);
2N/A}
2N/A
2N/Avoid
2N/Afmd_zfs_iter_by_devid(libzfs_handle_t *zhdl, char *devid, fmd_zfs_func_t func,
2N/A void *arg)
2N/A{
2N/A zfs_iter_cbdata_t cb;
2N/A
2N/A cb.cb_type = TYPE_DEVID;
2N/A cb.cb_devid = devid;
2N/A cb.cb_func = func;
2N/A cb.cb_arg = arg;
2N/A (void) zpool_iter(zhdl, iter_by_pool, &cb);
2N/A}
2N/A
2N/Avoid
2N/Afmd_zfs_iter_by_location(libzfs_handle_t *zhdl, char *chassissn, char *location,
2N/A fmd_zfs_func_t func, void *arg)
2N/A{
2N/A zfs_iter_cbdata_t cb;
2N/A
2N/A cb.cb_type = TYPE_LOCATION;
2N/A cb.cb_chassissn = chassissn;
2N/A cb.cb_location = location;
2N/A cb.cb_func = func;
2N/A cb.cb_arg = arg;
2N/A (void) zpool_iter(zhdl, iter_by_pool, &cb);
2N/A}
2N/A
2N/Avoid
2N/Afmd_zfs_iter_all(libzfs_handle_t *zhdl, fmd_zfs_func_t func, void *arg)
2N/A{
2N/A zfs_iter_cbdata_t cb;
2N/A
2N/A cb.cb_type = TYPE_ALL;
2N/A cb.cb_func = func;
2N/A cb.cb_arg = arg;
2N/A (void) zpool_iter(zhdl, iter_by_pool, &cb);
2N/A}
2N/A
2N/Avoid
2N/Afmd_zfs_iter_by_devpath(libzfs_handle_t *zhdl, char *devpath,
2N/A fmd_zfs_func_t func, void *arg)
2N/A{
2N/A zfs_iter_cbdata_t cb;
2N/A
2N/A cb.cb_type = TYPE_DEVPATH;
2N/A cb.cb_devpath = devpath;
2N/A cb.cb_func = func;
2N/A cb.cb_arg = arg;
2N/A (void) zpool_iter(zhdl, iter_by_pool, &cb);
2N/A}
2N/A
2N/Avoid
2N/Afmd_zfs_iter_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid,
2N/A uint64_t vdev_guid, fmd_zfs_func_t func, void *arg)
2N/A{
2N/A zfs_iter_cbdata_t cb;
2N/A
2N/A cb.cb_type = TYPE_GUID;
2N/A cb.cb_vdev_guid = vdev_guid;
2N/A cb.cb_pool_guid = pool_guid;
2N/A cb.cb_func = func;
2N/A cb.cb_arg = arg;
2N/A (void) zpool_iter(zhdl, iter_by_pool, &cb);
2N/A}
2N/A
2N/A#define ZFS_LABEL_EXT "/usr/lib/zfs/labeldisk"
2N/A#define ZFS_LABELFAIL_EXT "/usr/lib/zfs/labelfail"
2N/A#define ZFS_CHECK_EXT "/usr/lib/zfs/checkdisk"
2N/A
2N/Astatic int
2N/Afmd_zfs_spawn(char **argv)
2N/A{
2N/A pid_t pid;
2N/A int ret;
2N/A
2N/A if (posix_spawn(&pid, argv[0], NULL, NULL, argv, NULL) != 0 ||
2N/A waitpid(pid, &ret, 0) != pid ||
2N/A !WIFEXITED(ret) || WEXITSTATUS(ret) != 0)
2N/A return (-1);
2N/A return (0);
2N/A}
2N/A
2N/Aint
2N/Afmd_zfs_autoreplace(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
2N/A vdev_type_t type, nvlist_t *sibling)
2N/A{
2N/A fmd_zfs_cb_t *fzp = (fmd_zfs_cb_t *)arg;
2N/A nvlist_t *nvroot = NULL, *newvd = NULL;
2N/A uint64_t wholedisk = 0ULL;
2N/A char *vdevname = NULL, *origvdevname;
2N/A char *xdevid, *devidstr, *savedevidstr = NULL, *minor;
2N/A ddi_devid_t devid;
2N/A char *devdskpath = NULL, *path = NULL, *physpath = NULL, *xpath;
2N/A const char *oldsuffix;
2N/A char *argv[4];
2N/A size_t len;
2N/A int fd;
2N/A struct stat64 statbuf;
2N/A uint32_t old_pool_state = VDEV_STATE_HEALTHY;
2N/A
2N/A /*
2N/A * Ignore vdevs that are already being replaced.
2N/A */
2N/A if (type == VDEV_IS_REPLACING)
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A
2N/A if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL))
2N/A goto forcefault;
2N/A if ((vdevname = zpool_vdev_name(zpool_get_handle(zhp), zhp, vdev,
2N/A B_FALSE, B_FALSE)) == NULL)
2N/A goto forcefault;
2N/A (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
2N/A
2N/A if (strncmp(fzp->fz_path, "/dev/lofi", 9) == 0) {
2N/A len = strlen(fzp->fz_physpath);
2N/A if ((physpath = malloc(len + 1)) == NULL)
2N/A goto forcefault;
2N/A (void) strcpy(physpath, fzp->fz_physpath);
2N/A len = strlen(fzp->fz_path);
2N/A if ((path = malloc(len + 1)) == NULL)
2N/A goto forcefault;
2N/A (void) strcpy(path, fzp->fz_path);
2N/A len = strlen(fzp->fz_path);
2N/A if ((devdskpath = malloc(len + 1)) == NULL)
2N/A goto forcefault;
2N/A (void) strcpy(devdskpath, fzp->fz_path);
2N/A devidstr = fzp->fz_devid;
2N/A goto do_lofi;
2N/A }
2N/A
2N/A /*
2N/A * Modify physpath/path/devid to be for the new disk, but make
2N/A * sure they have the same slice (if any) as before.
2N/A */
2N/A if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &xpath) == 0) {
2N/A len = strlen(fzp->fz_physpath);
2N/A if ((oldsuffix = strchr(xpath, ':')) == NULL)
2N/A oldsuffix = "";
2N/A if ((physpath = malloc(len + strlen(oldsuffix) + 1)) == NULL)
2N/A goto forcefault;
2N/A (void) strcpy(physpath, fzp->fz_physpath);
2N/A (void) strcpy(physpath + len, oldsuffix);
2N/A } else
2N/A physpath = strdup(fzp->fz_physpath);
2N/A if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &xpath) == 0) {
2N/A len = strlen(fzp->fz_path);
2N/A /* look for suffix - ignoring "/dev/dsk" */
2N/A if ((oldsuffix = strchr(&xpath[9], 's')) == NULL &&
2N/A (oldsuffix = strchr(&xpath[9], 'p')) == NULL)
2N/A oldsuffix = "";
2N/A if ((path = malloc(len + strlen(oldsuffix) + 1)) == NULL)
2N/A goto forcefault;
2N/A (void) strcpy(path, fzp->fz_path);
2N/A (void) strcpy(path + len, oldsuffix);
2N/A } else
2N/A path = strdup(fzp->fz_path);
2N/A len = strlen(path) + 10;
2N/A if ((devdskpath = malloc(len)) == NULL)
2N/A goto forcefault;
2N/A (void) snprintf(devdskpath, len, "/dev/dsk/%s", path);
2N/A if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_DEVID, &xdevid) == 0 &&
2N/A devid_str_decode(fzp->fz_devid, &devid, NULL) == 0 &&
2N/A devid_str_decode(xdevid, NULL, &minor) == 0) {
2N/A savedevidstr = devidstr = devid_str_encode(devid, minor);
2N/A devid_free(devid);
2N/A devid_str_free(minor);
2N/A } else
2N/A devidstr = fzp->fz_devid;
2N/A
2N/A /*
2N/A * If the 'checkdisk' program exists, then verify that the type of the
2N/A * disk inserted matches the type of the disk it is attempting to
2N/A * replace.
2N/A */
2N/A if (stat64(ZFS_CHECK_EXT, &statbuf) == 0) {
2N/A argv[0] = ZFS_CHECK_EXT;
2N/A argv[1] = wholedisk ? fzp->fz_path : path;
2N/A argv[2] = type == VDEV_IS_CACHE ? "cache" :
2N/A (type == VDEV_IS_LOG || type == VDEV_IS_SPARED_LOG) ?
2N/A "log" : "data";
2N/A argv[3] = NULL;
2N/A if (fmd_zfs_spawn(argv) != 0)
2N/A goto forcefault;
2N/A }
2N/A
2N/A /*
2N/A * Before we attempt a replacement, we zero out the labels. While
2N/A * unlikely, it's possible that we are re-inserting a stale disk that
2N/A * was offlined, and its old label is going to conflict with what's
2N/A * currently in the config. But don't do this for cache or active
2N/A * spare devices as there's no zpool_vdev_attach to recreate the labels.
2N/A */
2N/A if (type != VDEV_IS_CACHE && type != VDEV_IS_AVAIL_SPARE) {
2N/A if ((fd = open(devdskpath, O_RDWR)) >= 0) {
2N/A (void) zpool_clear_label(fd);
2N/A (void) fsync(fd);
2N/A (void) close(fd);
2N/A }
2N/A }
2N/A
2N/A /*
2N/A * If this is a request to label a whole disk, then attempt to
2N/A * write out the label. If it is not a whole disk, and the
2N/A * external 'labeldisk' program exists, then invoke this
2N/A * program to label the disk.
2N/A *
2N/A * If any part of this process fails, then do a force
2N/A * a hot spare replacement).
2N/A */
2N/A if (wholedisk) {
2N/A if (zpool_label_disk(zpool_get_handle(zhp), zhp,
2N/A fzp->fz_path, ZPOOL_LABEL_MATCH_REQ_PART, NULL) != 0) {
2N/A if (stat64(ZFS_LABELFAIL_EXT, &statbuf) == 0) {
2N/A argv[0] = ZFS_LABELFAIL_EXT;
2N/A argv[1] = fzp->fz_path;
2N/A argv[2] = NULL;
2N/A (void) fmd_zfs_spawn(argv);
2N/A }
2N/A goto forcefault;
2N/A }
2N/A } else {
2N/A if (stat64(ZFS_LABEL_EXT, &statbuf) == 0) {
2N/A argv[0] = ZFS_LABEL_EXT;
2N/A argv[1] = path;
2N/A argv[2] = NULL;
2N/A if (fmd_zfs_spawn(argv) != 0) {
2N/A if (stat64(ZFS_LABELFAIL_EXT,
2N/A &statbuf) == 0) {
2N/A argv[0] = ZFS_LABELFAIL_EXT;
2N/A argv[1] = path;
2N/A argv[2] = NULL;
2N/A (void) fmd_zfs_spawn(argv);
2N/A }
2N/A goto forcefault;
2N/A }
2N/A }
2N/A }
2N/A
2N/Ado_lofi:
2N/A /*
2N/A * Construct the root vdev to pass to zpool_vdev_attach(). While adding
2N/A * the entire vdev structure is harmless, we construct a reduced set of
2N/A * path/physpath/wholedisk to keep it simple.
2N/A */
2N/A if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 ||
2N/A nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0 ||
2N/A nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
2N/A nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, devdskpath) != 0 ||
2N/A (fzp->fz_devchassis != NULL && nvlist_add_string(newvd,
2N/A ZPOOL_CONFIG_DEVCHASSIS, fzp->fz_devchassis) != 0) ||
2N/A (fzp->fz_chassissn != NULL && nvlist_add_string(newvd,
2N/A ZPOOL_CONFIG_CHASSISSN, fzp->fz_chassissn) != 0) ||
2N/A (fzp->fz_location != NULL && nvlist_add_string(newvd,
2N/A ZPOOL_CONFIG_LOCATION, fzp->fz_location) != 0) ||
2N/A nvlist_add_string(newvd, ZPOOL_CONFIG_PHYS_PATH, physpath) != 0 ||
2N/A (devidstr != NULL &&
2N/A nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, devidstr) != 0) ||
2N/A nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
2N/A ((type == VDEV_IS_LOG || type == VDEV_IS_SPARED_LOG) &&
2N/A nvlist_add_uint64(newvd, ZPOOL_CONFIG_IS_LOG, B_TRUE) != 0) ||
2N/A nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0)
2N/A goto forcefault;
2N/A
2N/A (void) fmd_zfs_vdev_state(zhp, NULL, &old_pool_state, type, sibling);
2N/A if (type == VDEV_IS_CACHE) {
2N/A /*
2N/A * Do a remove/add pair instead of a zpool_vdev_attach().
2N/A */
2N/A if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
2N/A &newvd, 1) != 0)
2N/A goto forcefault;
2N/A if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2N/A NULL, 0) != 0)
2N/A goto forcefault;
2N/A if (zpool_vdev_remove(zhp, vdevname) == 0)
2N/A (void) zpool_add(zhp, nvroot);
2N/A } else if (type == VDEV_IS_AVAIL_SPARE) {
2N/A /*
2N/A * Do a remove/add pair instead of a zpool_vdev_attach().
2N/A */
2N/A if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
2N/A &newvd, 1) != 0)
2N/A goto forcefault;
2N/A if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2N/A NULL, 0) != 0)
2N/A goto forcefault;
2N/A if (zpool_vdev_remove(zhp, vdevname) == 0)
2N/A (void) zpool_add(zhp, nvroot);
2N/A } else if (type == VDEV_IS_ACTIVE_SPARE) {
2N/A /*
2N/A * Do a detach of the active spare first.
2N/A */
2N/A if (zpool_vdev_detach(zhp, vdevname) != 0)
2N/A goto forcefault;
2N/A
2N/A /*
2N/A * Now remove the old hot spare and add the new hot spare.
2N/A */
2N/A if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
2N/A &newvd, 1) != 0)
2N/A goto forcefault;
2N/A if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2N/A NULL, 0) != 0)
2N/A goto forcefault;
2N/A if (zpool_vdev_remove(zhp, vdevname) != 0)
2N/A goto forcefault;
2N/A if (zpool_add(zhp, nvroot) != 0)
2N/A goto forcefault;
2N/A
2N/A /*
2N/A * Now do the attach of the new active spare.
2N/A */
2N/A if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
2N/A NULL, 0) != 0)
2N/A goto forcefault;
2N/A if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2N/A &newvd, 1) != 0)
2N/A goto forcefault;
2N/A origvdevname = zpool_vdev_name(zpool_get_handle(zhp), zhp,
2N/A sibling, B_FALSE, B_FALSE);
2N/A (void) zpool_vdev_attach(zhp, origvdevname,
2N/A devdskpath, nvroot, B_TRUE);
2N/A free(origvdevname);
2N/A } else {
2N/A if (nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2N/A &newvd, 1) != 0)
2N/A goto forcefault;
2N/A (void) zpool_vdev_attach(zhp, vdevname, devdskpath, nvroot,
2N/A B_TRUE);
2N/A }
2N/A /*
2N/A * If this action might have made the pool usable again then enable
2N/A * datasets.
2N/A */
2N/A if (old_pool_state < VDEV_STATE_DEGRADED)
2N/A (void) zpool_enable_datasets(zhp, NULL, 0);
2N/Aout:
2N/A if (nvroot)
2N/A nvlist_free(nvroot);
2N/A if (newvd)
2N/A nvlist_free(newvd);
2N/A if (physpath)
2N/A free(physpath);
2N/A if (path)
2N/A free(path);
2N/A if (devdskpath)
2N/A free(devdskpath);
2N/A if (vdevname)
2N/A free(vdevname);
2N/A if (savedevidstr)
2N/A devid_str_free(savedevidstr);
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A
2N/Aforcefault:
2N/A /*
2N/A * If we are being called after an fmd restart, we may have missed
2N/A * a check event. So force a fault here by doing a re-online of the
2N/A * original device - which we know will fail.
2N/A */
2N/A if (fzp->fz_isrestart)
2N/A (void) fmd_zfs_reonline(zhp, vdev, NULL, type, sibling);
2N/A goto out;
2N/A}
2N/A
2N/A/*ARGSUSED*/
2N/Aint
2N/Afmd_zfs_reonline(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
2N/A vdev_type_t type, nvlist_t *sibling)
2N/A{
2N/A char *vdevname;
2N/A vdev_state_t newstate;
2N/A char *sibvdevname;
2N/A uint32_t old_pool_state = VDEV_STATE_HEALTHY;
2N/A
2N/A if (type == VDEV_IS_AVAIL_SPARE)
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A
2N/A if ((vdevname = zpool_vdev_name(zpool_get_handle(zhp), zhp, vdev,
2N/A B_FALSE, B_FALSE)) == NULL)
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A (void) fmd_zfs_vdev_state(zhp, NULL, &old_pool_state, type, sibling);
2N/A if (zpool_vdev_online(zhp, vdevname, ZFS_ONLINE_AUTO, &newstate) == 0 &&
2N/A (newstate == VDEV_STATE_HEALTHY ||
2N/A newstate == VDEV_STATE_DEGRADED)) {
2N/A /*
2N/A * If this action might have made the pool usable again then
2N/A * enable datasets.
2N/A */
2N/A if (old_pool_state < VDEV_STATE_DEGRADED)
2N/A (void) zpool_enable_datasets(zhp, NULL, 0);
2N/A if (newstate == VDEV_STATE_HEALTHY &&
2N/A (type == VDEV_IS_SPARED_DATA ||
2N/A type == VDEV_IS_SPARED_LOG)) {
2N/A /*
2N/A * If there was a hot spare activated, then detach the
2N/A * spare now rather than waiting for resilvering to
2N/A * complete. If we don't do this, then we are forced to
2N/A * wait for the spare itself to resilver, which can be
2N/A * orders of magnitude longer than the original disk
2N/A * (which is only missing a small amount of data). Note
2N/A * that if we are in the rare situation where the spare
2N/A * has the only valid copy of some data then the
2N/A * zpool_vdev_detach() will simply fail.
2N/A */
2N/A sibvdevname = zpool_vdev_name(zpool_get_handle(zhp),
2N/A zhp, sibling, B_FALSE, B_FALSE);
2N/A if (sibvdevname != NULL) {
2N/A (void) zpool_vdev_detach(zhp, sibvdevname);
2N/A free(sibvdevname);
2N/A }
2N/A }
2N/A } else if (arg != NULL) {
2N/A /*
2N/A * Only have non-null arg for LOFI devices.
2N/A */
2N/A fmd_zfs_cb_t *fzp = (fmd_zfs_cb_t *)arg;
2N/A (void) fmd_zfs_autoreplace(zhp, vdev, fzp, type, sibling);
2N/A }
2N/A free(vdevname);
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A}
2N/A
2N/A/*ARGSUSED*/
2N/Aint
2N/Afmd_zfs_set_location(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
2N/A vdev_type_t type, nvlist_t *sibling)
2N/A{
2N/A fmd_zfs_cb_t *fzp = (fmd_zfs_cb_t *)arg;
2N/A uint64_t vdev_guid;
2N/A
2N/A if (nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid) == 0)
2N/A (void) zpool_location_set(zhp, vdev_guid, fzp->fz_devchassis,
2N/A fzp->fz_chassissn, fzp->fz_location);
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A}
2N/A
2N/A/*ARGSUSED*/
2N/Aint
2N/Afmd_zfs_autoexpand(zpool_handle_t *zhp, nvlist_t *vdev, void *arg,
2N/A vdev_type_t type, nvlist_t *sibling)
2N/A{
2N/A char *path, fullpath[MAXPATHLEN];
2N/A uint64_t wholedisk = 0ULL;
2N/A vdev_state_t newstate;
2N/A
2N/A if (type == VDEV_IS_AVAIL_SPARE)
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A
2N/A (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path);
2N/A (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
2N/A (void) strlcpy(fullpath, path, sizeof (fullpath));
2N/A if (wholedisk)
2N/A fullpath[strlen(fullpath) - 2] = '\0';
2N/A if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL) &&
2N/A zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
2N/A (void) zpool_vdev_online(zhp, fullpath,
2N/A ZFS_ONLINE_AUTO | ZFS_ONLINE_EXPAND, &newstate);
2N/A return (FMD_ZFS_ITER_CONTINUE);
2N/A}