libzfs_import.c revision bda8819455defbccd06981d9a13b240b682a3d50
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
* Pool import support functions.
*
* To import a pool, we rely on reading the configuration information from the
* ZFS label of each device. If we successfully read the label, then we
* organize the configuration information in the following hierarchy:
*
* pool guid -> toplevel vdev guid -> label txg
*
* Duplicate entries matching this same tuple will be discarded. Once we have
* examined every device, we pick the best label txg config for each toplevel
* vdev. We then arrange these toplevel vdevs into a complete pool config, and
* update any paths that have changed. Finally, we attempt to import the pool
* using our derived config, and record the results.
*/
#include <ctype.h>
#include <devid.h>
#include <dirent.h>
#include <errno.h>
#include <libintl.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/efi_partition.h>
#include <thread_pool.h>
#include <sys/vdev_impl.h>
#include "libzfs.h"
#include "libzfs_impl.h"
/*
* Intermediate structures used to gather configuration information.
*/
typedef struct config_entry {
struct config_entry *ce_next;
typedef struct vdev_entry {
struct vdev_entry *ve_next;
} vdev_entry_t;
typedef struct pool_entry {
struct pool_entry *pe_next;
} pool_entry_t;
typedef struct name_entry {
char *ne_name;
struct name_entry *ne_next;
} name_entry_t;
typedef struct pool_list {
} pool_list_t;
static char *
{
int fd;
return (NULL);
}
return (ret);
}
/*
* configuration.
*/
static int
{
int matched;
for (c = 0; c < children; c++)
return (-1);
return (0);
}
/*
* This is a leaf (file or disk) vdev. In either case, go through
* the name list and see if we find a matching guid. If so, replace
* the path and see if we can calculate a new devid.
*
* There may be multiple names associated with a particular guid, in
* which case we have overlapping slices or multiple paths to the same
* disk. If this is the case, then we want to pick the path that is
* the most similar to the original, where "most similar" is the number
* of matching characters starting from the end of the path. This will
* preserve slice numbers even if the disks have been reorganized, and
* will also catch preferred disk names if multiple paths exist.
*/
matched = 0;
int count;
break;
}
break;
/*
* At this point, 'count' is the number of characters
* matched from the end.
*/
}
}
}
return (0);
return (-1);
} else {
return (-1);
}
return (0);
}
/*
* Add the given configuration to the list of known devices.
*/
static int
{
/*
* If this is a hot spare not currently in use or level 2 cache
* device, add it to the list of names to translate, but don't do
* anything else.
*/
&state) == 0 &&
return (-1);
return (-1);
}
return (0);
}
/*
* If we have a valid config but cannot read any of these fields, then
* it means we have a half-initialized label. In vdev_label_init()
* we write a label with txg == 0 so that we can identify the device
* in case the user refers to the same disk later on. If we fail to
* create the pool, we'll be left with a label in this state
* which should not be considered part of a valid pool.
*/
&pool_guid) != 0 ||
&vdev_guid) != 0 ||
&top_guid) != 0 ||
return (0);
}
/*
* First, see if we know about this pool. If not, then add it to the
* list of known pools.
*/
break;
}
return (-1);
}
}
/*
* Second, see if we know about this toplevel vdev. Add it if its
* missing.
*/
break;
}
return (-1);
}
}
/*
* Third, see if we have a config with a matching transaction group. If
* so, then we do nothing. Otherwise, add it to the list of known
* configs.
*/
break;
}
return (-1);
}
} else {
}
/*
* At this point we've successfully added our config to the list of
* known configs. The last thing to do is add the vdev guid -> path
* mappings so that we can fix up the configuration as necessary before
* doing the import.
*/
return (-1);
return (-1);
}
return (0);
}
/*
* Returns true if the named pool matches the given GUID.
*/
static int
{
return (-1);
return (0);
}
&theguid) == 0);
return (0);
}
static nvlist_t *
{
int err;
return (NULL);
return (NULL);
}
return (NULL);
}
}
if (err) {
return (NULL);
}
return (NULL);
}
return (nvl);
}
/*
* Determine if the vdev id is a hole in the namespace.
*/
{
for (int c = 0; c < holes; c++) {
/* Top-level is a hole */
if (hole_array[c] == id)
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Convert our list of pools into the definitive set of configurations. We
* start by picking the best config for each toplevel vdev. Once that's done,
* we assemble the toplevel vdevs into a full config for the pool. We make a
* pass to fix up any incorrect paths, and then add it to the main list to
* return to the user.
*/
static nvlist_t *
{
uint_t c;
if (nvlist_alloc(&ret, 0, 0) != 0)
goto nomem;
goto nomem;
/*
* Iterate over all toplevel vdevs. Grab the pool configuration
* from the first one we find, and then go through the rest and
* add them as necessary to the 'vdevs' member of the config.
*/
/*
* Determine the best configuration for this vdev by
* selecting the config with the latest transaction
* group.
*/
best_txg = 0;
}
}
/*
* We rely on the fact that the max txg for the
* pool will contain the most up-to-date information
* about the valid top-levels in the vdev namespace.
*/
(void) nvlist_remove(config,
(void) nvlist_remove(config,
hole_array = NULL;
holes = 0;
max_id = 0;
if (nvlist_lookup_uint64(tmp,
ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
max_id) == 0);
}
&holes) == 0) {
hole_array, holes) == 0);
}
}
if (!config_seen) {
/*
* Copy the relevant pieces of data to the pool
* configuration:
*
* version
* pool guid
* name
* comment (if available)
* pool state
* hostid (if available)
* hostname (if available)
*/
if (nvlist_lookup_string(tmp,
ZPOOL_CONFIG_COMMENT, &comment) == 0)
hostid = 0;
if (nvlist_lookup_uint64(tmp,
ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
}
}
/*
* Add this top-level vdev to the child array.
*/
ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
&id) == 0);
sizeof (nvlist_t *));
goto nomem;
for (c = 0; c < children; c++)
}
goto nomem;
}
/*
* If we have information about all the top-levels then
* clean up the nvlist which we've constructed. This
* means removing any extraneous devices that are
* beyond the valid range or adding devices to the end
* of our array which appear to be missing.
*/
if (valid_top_config) {
nvlist_free(child[c]);
sizeof (nvlist_t *));
goto nomem;
for (c = 0; c < children; c++)
}
}
&guid) == 0);
/*
* The vdev namespace may contain holes as a result of
* device removal. We must add them back into the vdev
* tree before we process any missing devices.
*/
if (holes > 0) {
for (c = 0; c < children; c++) {
continue;
0) != 0)
goto nomem;
/*
* Holes in the namespace are treated as
* "hole" top-level vdevs and have a
* special flag set on them.
*/
if (nvlist_add_string(holey,
VDEV_TYPE_HOLE) != 0 ||
ZPOOL_CONFIG_ID, c) != 0 ||
ZPOOL_CONFIG_GUID, 0ULL) != 0)
goto nomem;
}
}
/*
* Look for any missing top-level vdevs. If this is the case,
* create a faked up 'missing' vdev as a placeholder. We cannot
* simply compress the child array, because the kernel performs
* certain checks to make sure the vdev IDs match their location
* in the configuration.
*/
for (c = 0; c < children; c++) {
0) != 0)
goto nomem;
if (nvlist_add_string(missing,
VDEV_TYPE_MISSING) != 0 ||
ZPOOL_CONFIG_ID, c) != 0 ||
ZPOOL_CONFIG_GUID, 0ULL) != 0) {
goto nomem;
}
}
}
/*
* Put all of this pool's top-level vdevs into a root vdev.
*/
goto nomem;
VDEV_TYPE_ROOT) != 0 ||
goto nomem;
}
for (c = 0; c < children; c++)
nvlist_free(child[c]);
children = 0;
/*
* known list of vdev GUID -> path mappings.
*/
goto nomem;
}
/*
* Add the root vdev to this pool's configuration.
*/
nvroot) != 0) {
goto nomem;
}
/*
* zdb uses this path to report on active pools that were
* imported or created using -R.
*/
if (active_ok)
goto add_pool;
/*
* Determine if this pool is currently active, in which case we
* can't actually import it.
*/
&name) == 0);
&guid) == 0);
goto error;
if (isactive) {
continue;
}
continue;
}
/*
* Go through and update the paths for spares, now that we have
* them.
*/
&nvroot) == 0);
for (i = 0; i < nspares; i++) {
goto nomem;
}
}
/*
* Update the paths for l2cache devices.
*/
for (i = 0; i < nl2cache; i++) {
goto nomem;
}
}
/*
* Restore the original information read from the actual label.
*/
if (hostid != 0) {
hostid) == 0);
hostname) == 0);
}
/*
* Add this pool to the list of configs.
*/
&name) == 0);
goto nomem;
}
if (!found_one) {
}
return (ret);
for (c = 0; c < children; c++)
nvlist_free(child[c]);
return (NULL);
}
/*
* Return the offset of the given label.
*/
static uint64_t
{
}
/*
* Given a file descriptor, read the label information and return an nvlist
* describing the configuration, if there is one.
*/
int
{
int l;
return (0);
return (-1);
for (l = 0; l < VDEV_LABELS; l++) {
continue;
continue;
continue;
}
continue;
}
return (0);
}
return (0);
}
typedef struct rdsk_node {
char *rn_name;
int rn_dfd;
} rdsk_node_t;
static int
{
int rv;
/*
* slices zero and two are the most likely to provide results,
* so put those first
*/
return (-1);
}
return (1);
}
return (-1);
}
return (1);
}
if (rv == 0)
return (0);
}
static void
{
char sname[MAXNAMELEN];
/*
* protect against division by zero for disk labels that
* contain a bogus sector size
*/
if (blksz == 0)
/* too small to contain a zpool? */
}
static void
{
char diskname[MAXNAMELEN];
char *ptr;
int i;
return;
ptr[0] = 's';
for (i = 0; i < NDKMAP; i++)
ptr[0] = 'p';
for (i = 0; i <= FD_NUMPART; i++)
}
static void
{
char diskname[MAXNAMELEN];
char *ptr;
int i;
return;
for (i = 0; i < NDKMAP; i++)
check_one_slice(r, diskname, i,
/*
* on x86 we'll still have leftover links that point
* to slices s[9-15], so use NDKMAP instead
*/
for (i = 0; i < NDKMAP; i++)
check_one_slice(r, diskname, i,
/* nodes p[1-4] are never used with EFI labels */
ptr[0] = 'p';
for (i = 1; i <= FD_NUMPART; i++)
}
}
static void
zpool_open_func(void *arg)
{
int fd;
if (rn->rn_nozpool)
return;
/* symlink to a device that's no longer there */
return;
}
/*
* Ignore failed stats. We only want regular
* files, character devs and block devs.
*/
return;
}
/* this file is too small to hold a zpool */
return;
/*
* Try to read the disk label first so we don't have to
* open a bunch of minor nodes that can't have a zpool.
*/
}
return;
}
}
}
/*
* Given a file descriptor, clear (zero) the label information. This function
* is currently only used in the appliance stack as part of the ZFS sysevent
* module.
*/
int
zpool_clear_label(int fd)
{
int l;
return (0);
return (-1);
for (l = 0; l < VDEV_LABELS; l++) {
return (-1);
}
return (0);
}
/*
* Given a list of directories to search, find all pools stored on disk. This
* includes partial pools which are not available to import. If no args are
* poolname or guid (but not both) are provided by the caller when trying
* to import a specific pool.
*/
static nvlist_t *
{
char path[MAXPATHLEN];
static char *default_dir = "/dev/dsk";
pool_list_t pools = { 0 };
void *cookie;
if (dirs == 0) {
dirs = 1;
dir = &default_dir;
}
/*
* Go through and read the label configuration information from every
* possible device, organizing the information according to pool GUID
* and toplevel GUID.
*/
for (i = 0; i < dirs; i++) {
tpool_t *t;
char *rdsk;
int dfd;
/* use realpath to normalize the path */
goto error;
}
*end++ = '/';
*end = 0;
/*
* Using raw devices instead of block devices when we're
* reading the labels skips a bunch of slow operations during
*/
else
rdsk);
goto error;
}
/*
* This is not MT-safe, but we have no MT consumers of libzfs
*/
if (name[0] == '.' &&
continue;
}
/*
* create a thread pool to do all of this in parallel;
* rn_nozpool is not protected, so this is racy in that
* multiple tasks could decide that the same slice can
* not hold a zpool, which is benign. Also choose
* double the number of processors; we hold a lot of
* locks in the kernel, so going beyond this doesn't
* buy us much.
*/
0, NULL);
AVL_AFTER)))
tpool_wait(t);
tpool_destroy(t);
char *pname;
&pname) == 0 &&
&this_guid) == 0 &&
}
if (!matched) {
continue;
}
/* use the non-raw path for the config */
goto error;
}
}
}
}
}
}
}
if (dirp)
return (ret);
}
nvlist_t *
{
importargs_t iarg = { 0 };
}
/*
* Given a cache file, return the contents as a list of importable pools.
* poolname or guid (but not both) are provided by the caller when trying
* to import a specific pool.
*/
nvlist_t *
{
char *buf;
int fd;
char *name;
return (NULL);
}
return (NULL);
}
return (NULL);
}
"failed to read cache file contents"));
return (NULL);
}
"invalid or corrupt cache file contents"));
return (NULL);
}
/*
* Go through and get the current state of the pools and refresh their
* state.
*/
if (nvlist_alloc(&pools, 0, 0) != 0) {
return (NULL);
}
&name) == 0);
continue;
&this_guid) == 0);
if (guid != 0) {
&this_guid) == 0);
continue;
}
return (NULL);
}
if (active)
continue;
return (NULL);
}
return (NULL);
}
}
return (pools);
}
static int
{
int found = 0;
char *pool_name;
ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
found = 1;
} else {
ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
found = 1;
}
return (found);
}
nvlist_t *
{
}
{
return (B_TRUE);
for (c = 0; c < children; c++)
return (B_TRUE);
}
return (B_FALSE);
}
typedef struct aux_cbdata {
const char *cb_type;
} aux_cbdata_t;
static int
{
&nvroot) == 0);
for (i = 0; i < count; i++) {
ZPOOL_CONFIG_GUID, &guid) == 0);
return (1);
}
}
}
return (0);
}
/*
* Determines if the pool is in use. If so, it returns true and the state of
* the pool as well as the name of the pool. Both strings are allocated and
* must be freed by the caller.
*/
int
{
char *name;
aux_cbdata_t cb = { 0 };
return (-1);
}
return (0);
&stateval) == 0);
&vdev_guid) == 0);
&name) == 0);
&guid) == 0);
}
switch (stateval) {
case POOL_STATE_EXPORTED:
/*
* A pool with an exported state may in fact be imported
* read-only, so check the in-core state to see if it's
* active and imported read-only. If it is, set
* its state to active.
*/
break;
case POOL_STATE_ACTIVE:
/*
* For an active pool, we have to determine if it's really part
* of a currently active pool (in which case the pool will exist
* and the guid will be the same), or whether it's part of an
* active pool that was disconnected without being explicitly
* exported.
*/
return (-1);
}
if (isactive) {
/*
* Because the device may have been removed while
* offlined, we only report it as active if the vdev is
* still present in the config. Otherwise, pretend like
* it's not in use.
*/
!= NULL) {
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
} else {
}
/*
* If this is an active spare within another pool, we
* treat it like an unused hot spare. This allows the
* user to create a pool with a hot spare that currently
* in use within another pool. Since we return B_TRUE,
* libdiskmgt will continue to prevent generic consumers
* from using the device.
*/
} else {
}
break;
case POOL_STATE_SPARE:
/*
* For a hot spare, it can be either definitively in use, or
* potentially active. To determine if it's in use, we iterate
* over all pools in the system and search for one with a spare
* with a matching guid.
*
* Due to the shared nature of spares, we don't actually report
* the potentially active case as in use. This means the user
* can freely create pools on the hot spares of exported pools,
* but to do otherwise makes the resulting code complicated, and
* we end up having to deal with this case anyway.
*/
} else {
}
break;
case POOL_STATE_L2CACHE:
/*
* Check if any pool is currently using this l2cache device.
*/
} else {
}
break;
default:
}
if (ret) {
return (-1);
}
}
return (0);
}