libzfs_pool.c revision 39c23413b8df94a95f67b34cfd4a4dfc3fd0b48d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <alloca.h>
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <devid.h>
#include <dirent.h>
#include <fcntl.h>
#include <libintl.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
#include <sys/zfs_ioctl.h>
#include <strings.h>
#include "zfs_namecheck.h"
#include "libzfs_impl.h"
/*
* Validate the given pool name, optionally putting an extended error message in
* 'buf'.
*/
static boolean_t
{
char what;
int ret;
/*
* The rules for reserved pool names were extended at a later point.
* But we need to support users with existing pools that may now be
* invalid. So we only check for this expanded set of names during a
* create (or import), and only in userland.
*/
return (B_FALSE);
}
if (ret != 0) {
switch (why) {
case NAME_ERR_TOOLONG:
break;
case NAME_ERR_INVALCHAR:
"'%c' in pool name"), what);
break;
case NAME_ERR_NOLETTER:
"name must begin with a letter"));
break;
case NAME_ERR_RESERVED:
"name is reserved"));
break;
case NAME_ERR_DISKLIKE:
"pool name is reserved"));
break;
case NAME_ERR_LEADING_SLASH:
"leading slash in name"));
break;
case NAME_ERR_EMPTY_COMPONENT:
"empty component in name"));
break;
case NAME_ERR_TRAILING_SLASH:
"trailing slash in name"));
break;
case NAME_ERR_MULTIPLE_AT:
"multiple '@' delimiters in name"));
break;
}
}
return (B_FALSE);
}
return (B_TRUE);
}
/*
* Set the pool-wide health based on the vdev state of the root vdev.
*/
int
{
char *health;
&nvroot) == 0);
case VDEV_STATE_CLOSED:
case VDEV_STATE_CANT_OPEN:
case VDEV_STATE_OFFLINE:
break;
case VDEV_STATE_DEGRADED:
break;
case VDEV_STATE_HEALTHY:
break;
default:
abort();
}
}
/*
* Open a handle to the given pool, even if the pool is currently in the FAULTED
* state.
*/
{
/*
* Make sure the pool name is valid.
*/
pool);
return (NULL);
}
return (NULL);
return (NULL);
}
if (missing) {
"no such pool"));
pool);
return (NULL);
}
return (zhp);
}
/*
* Like the above, but silent on error. Used when iterating over pools (because
* the configuration cache may be out of date).
*/
int
{
return (-1);
return (-1);
}
if (missing) {
return (0);
}
return (0);
}
/*
* Similar to zpool_open_canfail(), but refuses to open pools in the faulted
* state.
*/
{
return (NULL);
return (NULL);
}
return (zhp);
}
/*
* Close the handle. Simply frees the memory associated with the handle.
*/
void
{
if (zhp->zpool_config)
if (zhp->zpool_old_config)
if (zhp->zpool_error_log) {
int i;
for (i = 0; i < zhp->zpool_error_count; i++)
}
}
/*
* Return the name of the pool.
*/
const char *
{
return (zhp->zpool_name);
}
/*
* Return the GUID of the pool.
*/
{
&guid) == 0);
return (guid);
}
/*
* Return the version of the pool.
*/
{
&version) == 0);
return (version);
}
/*
* Return the amount of space currently consumed by the pool.
*/
{
&nvroot) == 0);
}
/*
* Return the total space in the pool.
*/
{
&nvroot) == 0);
}
/*
* Return the alternate root for this pool, if any.
*/
int
{
return (-1);
return (0);
}
/*
* Return the state of the pool (ACTIVE or UNAVAILABLE)
*/
int
{
return (zhp->zpool_state);
}
/*
* Create the named pool, using the provided vdev list. It is assumed
* that the consumer has already validated the contents of the nvlist, so we
* don't have to worry about error semantics.
*/
int
const char *altroot)
{
char msg[1024];
"cannot create '%s'"), pool);
return (-1);
switch (errno) {
case EBUSY:
/*
* This can happen if the user has specified the same
* device multiple times. We can't reliably detect this
* until we try to add it and see we already have a
* label.
*/
"one or more vdevs refer to the same device"));
case EOVERFLOW:
/*
* This occurs when one of the devices is below
* SPA_MINDEVSIZE. Unfortunately, we can't detect which
* device was the problem device since there's no
* reliable way to determine device size from userland.
*/
{
char buf[64];
"one or more devices is less than the "
"minimum size (%s)"), buf);
}
case ENOSPC:
"one or more devices is out of space"));
default:
}
}
/*
* If this is an alternate root pool, then we automatically set the
* mountpoint of the root dataset to be '/'.
*/
"/") == 0);
}
return (0);
}
/*
* Destroy the given pool. It is up to the caller to ensure that there are no
* datasets left in the pool.
*/
int
{
char msg[1024];
ZFS_TYPE_FILESYSTEM)) == NULL)
return (-1);
if (zpool_remove_zvol_links(zhp) != 0)
return (-1);
"one or more devices is read only"));
} else {
}
if (zfp)
return (-1);
}
if (zfp) {
}
return (0);
}
/*
* Add the given vdevs to the pool. The caller must have already performed the
* necessary verification to ensure that the vdev specification is well-formed.
*/
int
{
int ret;
char msg[1024];
"upgraded to add hot spares"));
}
return (-1);
switch (errno) {
case EBUSY:
/*
* This can happen if the user has specified the same
* device multiple times. We can't reliably detect this
* until we try to add it and see we already have a
* label.
*/
"one or more vdevs refer to the same device"));
break;
case EOVERFLOW:
/*
* This occurrs when one of the devices is below
* SPA_MINDEVSIZE. Unfortunately, we can't detect which
* device was the problem device since there's no
* reliable way to determine device size from userland.
*/
{
char buf[64];
"device is less than the minimum "
"size (%s)"), buf);
}
break;
case ENOTSUP:
"pool must be upgraded to add raidz2 vdevs"));
break;
default:
}
ret = -1;
} else {
ret = 0;
}
return (ret);
}
/*
* Exports the pool from the system. The caller must ensure that there are no
* mounted datasets in the pool.
*/
int
{
if (zpool_remove_zvol_links(zhp) != 0)
return (-1);
zhp->zpool_name));
return (0);
}
/*
* Import the given pool using the known configuration. The configuration
* should have come from zpool_find_import(). The 'newname' and 'altroot'
* parameters control whether the pool is imported with a different name or with
* an alternate root, respectively.
*/
int
const char *altroot)
{
char *thename;
char *origname;
int ret;
&origname) == 0);
newname));
} else {
}
altroot));
else
return (-1);
ret = 0;
char desc[1024];
thename);
else
switch (errno) {
case ENOTSUP:
/*
* Unsupported version.
*/
break;
case EINVAL:
break;
default:
}
ret = -1;
} else {
/*
* This should never fail, but play it safe anyway.
*/
ret = -1;
}
}
return (ret);
}
/*
* Scrub the pool.
*/
int
{
char msg[1024];
return (0);
else
}
/*
* 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
* spare; but FALSE if its an INUSE spare.
*/
static nvlist_t *
{
char *path;
/*
* If the device has never been present since import, the only
* reliable way to match the vdev is by GUID.
*/
return (nv);
&wholedisk);
if (wholedisk) {
/*
* For whole disks, the internal path has 's0', but the
* path passed in by the user doesn't.
*/
return (nv);
return (nv);
}
}
return (NULL);
for (c = 0; c < children; c++)
avail_spare)) != NULL)
return (ret);
for (c = 0; c < children; c++) {
avail_spare)) != NULL) {
*avail_spare = B_TRUE;
return (ret);
}
}
}
return (NULL);
}
nvlist_t *
{
char buf[MAXPATHLEN];
const char *search;
char *end;
} else if (path[0] != '/') {
} else {
}
&nvroot) == 0);
*avail_spare = B_FALSE;
}
/*
* Returns TRUE if the given guid corresponds to a spare (INUSE or not).
*/
static boolean_t
{
int i;
&nvroot) == 0);
for (i = 0; i < nspares; i++) {
ZPOOL_CONFIG_GUID, &spare_guid) == 0);
if (guid == spare_guid)
return (B_TRUE);
}
}
return (B_FALSE);
}
/*
* Bring the specified vdev online
*/
int
{
char msg[1024];
return (0);
}
/*
* Take the specified vdev offline
*/
int
{
char msg[1024];
return (0);
switch (errno) {
case EBUSY:
/*
* There are no other replicas of this device.
*/
default:
}
}
/*
* Returns TRUE if the given nvlist is a vdev that was originally swapped in as
* a hot spare.
*/
static boolean_t
{
char *type;
&children) == 0) {
&type) == 0);
return (B_TRUE);
for (c = 0; c < children; c++)
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Attach new_disk (fully described by nvroot) to old_disk.
* If 'replacing' is specified, tne new disk will replace the old one.
*/
int
{
char msg[1024];
int ret;
char *path;
if (replacing)
else
if (avail_spare)
"new device must be a single disk"));
}
ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
/*
* If the target is a hot spare that has been swapped in, we can only
* replace it with another hot spare.
*/
if (replacing &&
"can only be replaced by another hot spare"));
}
/*
* If we are attempting to replace a spare, it canot be applied to an
* already spared device.
*/
if (replacing &&
"device has already been replaced with a spare"));
}
return (-1);
if (ret == 0)
return (0);
switch (errno) {
case ENOTSUP:
/*
* Can't attach to or replace this type of vdev.
*/
if (replacing)
"cannot replace a replacing device"));
else
"can only attach to mirrors and top-level "
"disks"));
break;
case EINVAL:
/*
* The new device must be a single disk.
*/
"new device must be a single disk"));
break;
case EBUSY:
new_disk);
break;
case EOVERFLOW:
/*
* The new device is too small.
*/
"device is too small"));
break;
case EDOM:
/*
* The new device has a different alignment requirement.
*/
"devices have different sector alignment"));
break;
case ENAMETOOLONG:
/*
* The resulting top-level vdev spec won't fit in the label.
*/
break;
default:
}
return (-1);
}
/*
* Detach the specified device.
*/
int
{
char msg[1024];
if (avail_spare)
return (0);
switch (errno) {
case ENOTSUP:
/*
* Can't detach from this type of vdev.
*/
"applicable to mirror and replacing vdevs"));
break;
case EBUSY:
/*
* There are no other replicas of this device.
*/
break;
default:
}
return (-1);
}
/*
* Remove the given device. Currently, this is supported only for hot spares.
*/
int
{
char msg[1024];
if (!avail_spare) {
"only inactive hot spares can be removed"));
}
return (0);
}
/*
* Clear the errors for the pool, or the particular device if specified.
*/
int
{
char msg[1024];
if (path)
path);
else
zhp->zpool_name);
if (path) {
if (avail_spare)
}
return (0);
}
/*
* hierarchy.
*/
int
void *data)
{
char (*paths)[MAXPATHLEN];
}
/*
* Oddly this wasn't a directory -- ignore that failure since we
* know there are no links lower in the (non-existant) hierarchy.
*/
return (0);
}
return (-1);
}
curr = 0;
while (curr >= 0) {
goto err;
goto err;
goto err;
}
continue;
goto err;
}
size *= 2;
}
curr++;
}
} else {
break;
}
curr--;
}
return (ret);
err:
return (-1);
}
typedef struct zvol_cb {
} zvol_cb_t;
/*ARGSUSED*/
static int
{
int ret;
if (ZFS_IS_VOLUME(zhp))
return (ret);
}
/*
* Iterate over all zvols in the pool and make any necessary minor nodes.
*/
int
{
int ret;
/*
* If the pool is unavailable, just return success.
*/
return (0);
return (ret);
}
static int
{
}
/*
* Iterate over all zvols in the pool and remove any minor nodes. We iterate
* by examining the /dev links so that a corrupted pool doesn't impede this
* operation.
*/
int
{
}
/*
* Convert from a devid string to a path.
*/
static char *
devid_to_path(char *devid_str)
{
char *minor;
char *path;
int ret;
return (NULL);
if (ret != 0)
return (NULL);
return (NULL);
return (path);
}
/*
* Convert from a path to a devid string.
*/
static char *
path_to_devid(const char *path)
{
int fd;
return (NULL);
}
return (ret);
}
/*
* Issue the necessary ioctl() to update the stored path value for the vdev. We
* ignore any failure here, since a common case is for an unprivileged user to
* type 'zpool status', and we'll display the correct information anyway.
*/
static void
{
}
/*
* Given a vdev, return the name to display in iostat. If the vdev has a path,
* We also check if this is a whole disk, in which case we strip off the
* trailing 's0' slice name.
*
* This routine is also responsible for identifying when disks have been
* reconfigured in a new location. The kernel will have opened the device by
* devid, but the path will still refer to the old location. To catch this, we
* first do a path -> devid translation (which is fast for the common case). If
* the devid matches, we're done. If not, we do a reverse devid -> path
* translation and issue the appropriate ioctl() to update the path of the vdev.
* If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
* of these checks.
*/
char *
{
char buf[64];
&value) == 0) {
&value) == 0);
/*
* Determine if the current path is correct.
*/
char *newpath;
/*
* Update the path appropriately.
*/
if (nvlist_add_string(nv,
ZPOOL_CONFIG_PATH, newpath) == 0)
&path) == 0);
}
}
if (newdevid)
}
path += 9;
return (NULL);
return (tmp);
}
} else {
/*
* If it's a raidz device, we need to stick in the parity level.
*/
&value) == 0);
}
}
}
static int
zbookmark_compare(const void *a, const void *b)
{
return (memcmp(a, b, sizeof (zbookmark_t)));
}
/*
* Retrieve the persistent error log, uniquify the members, and return to the
* caller.
*/
int
{
int i, j;
return (0);
}
/*
* Retrieve the raw error list from the kernel. If the number of errors
* has increased, allocate more space and continue until we get the
* entire list.
*/
&count) == 0);
return (-1);
for (;;) {
&zc) != 0) {
return (-1);
} else {
return (-1);
}
} else {
break;
}
}
/*
* Sort the resulting bookmarks. This is a little confusing due to the
* implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last
* to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
* _not_ copied as part of the process. So we point the start of our
* array appropriate and decrement the total number of elements.
*/
/*
* Count the number of unique elements
*/
j = 0;
for (i = 0; i < count; i++) {
sizeof (zbookmark_t)) == 0)
continue;
j++;
}
/*
* If the user has only requested the number of items, return it now
* without bothering with the extra work.
*/
*nelem = j;
return (0);
}
zhp->zpool_error_count = j;
/*
* Allocate an array of nvlists to hold the results
*/
return (-1);
}
/*
* Fill in the results with names from the kernel.
*/
j = 0;
for (i = 0; i < count; i++) {
char buf[64];
sizeof (zbookmark_t)) == 0)
continue;
goto nomem;
for (;;) {
ZFS_IOC_BOOKMARK_NAME, &zc) != 0) {
!= 0) {
goto nomem;
}
continue;
} else {
0) != 0)
goto nomem;
"%llx", (longlong_t)
if (nvlist_add_string(nv,
ZPOOL_ERR_DATASET, buf) != 0)
goto nomem;
"%llx", (longlong_t)
if (nvlist_add_string(nv,
ZPOOL_ERR_OBJECT, buf) != 0)
goto nomem;
"lvl=%u blkid=%llu",
if (nvlist_add_string(nv,
ZPOOL_ERR_RANGE, buf) != 0)
goto nomem;
}
} else {
&zhp->zpool_error_log[j]) != 0) {
goto nomem;
}
}
break;
}
j++;
}
return (0);
for (i = 0; i < zhp->zpool_error_count; i++)
}
/*
* Upgrade a ZFS pool to the latest on-disk version.
*/
int
{
zhp->zpool_name));
return (0);
}
/*
* Log command history.
*
* 'pool' is B_TRUE if we are logging a command for 'zpool'; B_FALSE
* otherwise ('zfs'). 'pool_create' is B_TRUE if we are logging the creation
* of the pool; B_FALSE otherwise. 'path' is the pathanme containing the
* poolname. 'argc' and 'argv' are used to construct the command string.
*/
void
{
char cmd_buf[HIS_MAX_RECORD_LEN];
char *dspath;
int i;
/* construct the command string */
for (i = 0; i < argc; i++) {
break;
}
/* figure out the poolname */
} else {
}
/* overloading zc_history_offset */
}
/*
* Perform ioctl to get some command history of a pool.
*
* 'buf' is the buffer to fill up to 'len' bytes. 'off' is the
* logical offset of the history buffer to start reading from.
*
* Upon return, 'off' is the next logical offset to read from and
* 'len' is the actual amount of bytes read into 'buf'.
*/
static int
{
switch (errno) {
case EPERM:
"cannot show history for pool '%s'"),
zhp->zpool_name));
case ENOENT:
default:
}
}
return (0);
}
/*
* Process the buffer of nvlists, unpacking and storing each nvlist record
* into 'records'. 'leftover' is set to the number of bytes that weren't
* processed as there wasn't a complete record.
*/
static int
{
int i;
while (bytes_read > sizeof (reclen)) {
/* get length of packed record (stored as little endian) */
break;
/* unpack record */
return (ENOMEM);
/* add record to nvlist array */
(*numrecords)++;
}
}
*leftover = bytes_read;
return (0);
}
/*
* Retrieve the command history of a pool.
*/
int
{
char buf[HIS_BUF_LEN];
uint_t numrecords = 0;
int err, i;
do {
break;
/* if nothing else was read in, we're at EOF, just return */
if (!bytes_read)
break;
break;
/* CONSTCOND */
} while (1);
if (!err) {
records, numrecords) == 0);
}
for (i = 0; i < numrecords; i++)
nvlist_free(records[i]);
return (err);
}