libzfs_pool.c revision 2a6b87f07ac0c0b819179c84afe5a60afa04cfa5
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <alloca.h>
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <devid.h>
#include <dirent.h>
#include <fcntl.h>
#include <libintl.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
#include <sys/efi_partition.h>
#include <sys/zfs_ioctl.h>
#include <strings.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
#include "libzfs_impl.h"
/*
* Validate the given pool name, optionally putting an extended error message in
* 'buf'.
*/
static boolean_t
{
char what;
int ret;
/*
* The rules for reserved pool names were extended at a later point.
* But we need to support users with existing pools that may now be
* invalid. So we only check for this expanded set of names during a
* create (or import), and only in userland.
*/
return (B_FALSE);
}
if (ret != 0) {
switch (why) {
case NAME_ERR_TOOLONG:
break;
case NAME_ERR_INVALCHAR:
"'%c' in pool name"), what);
break;
case NAME_ERR_NOLETTER:
"name must begin with a letter"));
break;
case NAME_ERR_RESERVED:
"name is reserved"));
break;
case NAME_ERR_DISKLIKE:
"pool name is reserved"));
break;
case NAME_ERR_LEADING_SLASH:
"leading slash in name"));
break;
case NAME_ERR_EMPTY_COMPONENT:
"empty component in name"));
break;
case NAME_ERR_TRAILING_SLASH:
"trailing slash in name"));
break;
case NAME_ERR_MULTIPLE_AT:
"multiple '@' delimiters in name"));
break;
}
}
return (B_FALSE);
}
return (B_TRUE);
}
static int
{
return (-1);
return (-1);
}
} else {
return (-1);
}
}
return (-1);
}
return (0);
}
/*
* Open a handle to the given pool, even if the pool is currently in the FAULTED
* state.
*/
{
/*
* Make sure the pool name is valid.
*/
pool);
return (NULL);
}
return (NULL);
return (NULL);
}
if (missing) {
"no such pool"));
pool);
return (NULL);
}
return (zhp);
}
/*
* Like the above, but silent on error. Used when iterating over pools (because
* the configuration cache may be out of date).
*/
int
{
return (-1);
return (-1);
}
if (missing) {
return (0);
}
return (0);
}
/*
* Similar to zpool_open_canfail(), but refuses to open pools in the faulted
* state.
*/
{
return (NULL);
return (NULL);
}
return (zhp);
}
/*
* Close the handle. Simply frees the memory associated with the handle.
*/
void
{
if (zhp->zpool_config)
if (zhp->zpool_old_config)
if (zhp->zpool_props)
}
/*
* Return the name of the pool.
*/
const char *
{
return (zhp->zpool_name);
}
/*
* Return the GUID of the pool.
*/
{
&guid) == 0);
return (guid);
}
/*
* Return the version of the pool.
*/
{
&version) == 0);
return (version);
}
/*
* Return the amount of space currently consumed by the pool.
*/
{
&nvroot) == 0);
}
/*
* Return the total space in the pool.
*/
{
&nvroot) == 0);
}
/*
* Return the alternate root for this pool, if any.
*/
int
{
return (-1);
return (0);
}
/*
* Return the state of the pool (ACTIVE or UNAVAILABLE)
*/
int
{
return (zhp->zpool_state);
}
/*
* Create the named pool, using the provided vdev list. It is assumed
* that the consumer has already validated the contents of the nvlist, so we
* don't have to worry about error semantics.
*/
int
const char *altroot)
{
char msg[1024];
"cannot create '%s'"), pool);
return (-1);
switch (errno) {
case EBUSY:
/*
* This can happen if the user has specified the same
* device multiple times. We can't reliably detect this
* until we try to add it and see we already have a
* label.
*/
"one or more vdevs refer to the same device"));
case EOVERFLOW:
/*
* This occurs when one of the devices is below
* SPA_MINDEVSIZE. Unfortunately, we can't detect which
* device was the problem device since there's no
* reliable way to determine device size from userland.
*/
{
char buf[64];
"one or more devices is less than the "
"minimum size (%s)"), buf);
}
case ENOSPC:
"one or more devices is out of space"));
default:
}
}
/*
* If this is an alternate root pool, then we automatically set the
* mountpoint of the root dataset to be '/'.
*/
"/") == 0);
}
return (0);
}
/*
* Destroy the given pool. It is up to the caller to ensure that there are no
* datasets left in the pool.
*/
int
{
char msg[1024];
ZFS_TYPE_FILESYSTEM)) == NULL)
return (-1);
if (zpool_remove_zvol_links(zhp) != 0)
return (-1);
"one or more devices is read only"));
} else {
}
if (zfp)
return (-1);
}
if (zfp) {
}
return (0);
}
/*
* Add the given vdevs to the pool. The caller must have already performed the
* necessary verification to ensure that the vdev specification is well-formed.
*/
int
{
int ret;
char msg[1024];
"upgraded to add hot spares"));
}
return (-1);
switch (errno) {
case EBUSY:
/*
* This can happen if the user has specified the same
* device multiple times. We can't reliably detect this
* until we try to add it and see we already have a
* label.
*/
"one or more vdevs refer to the same device"));
break;
case EOVERFLOW:
/*
* This occurrs when one of the devices is below
* SPA_MINDEVSIZE. Unfortunately, we can't detect which
* device was the problem device since there's no
* reliable way to determine device size from userland.
*/
{
char buf[64];
"device is less than the minimum "
"size (%s)"), buf);
}
break;
case ENOTSUP:
"pool must be upgraded to add these vdevs"));
break;
case EDOM:
"root pool can not have multiple vdevs"
" or separate logs"));
break;
default:
}
ret = -1;
} else {
ret = 0;
}
return (ret);
}
/*
* Exports the pool from the system. The caller must ensure that there are no
* mounted datasets in the pool.
*/
int
{
if (zpool_remove_zvol_links(zhp) != 0)
return (-1);
zhp->zpool_name));
return (0);
}
/*
* Import the given pool using the known configuration. The configuration
* should have come from zpool_find_import(). The 'newname' and 'altroot'
* parameters control whether the pool is imported with a different name or with
* an alternate root, respectively.
*/
int
const char *altroot)
{
char *thename;
char *origname;
int ret;
&origname) == 0);
newname));
} else {
}
altroot));
else
return (-1);
ret = 0;
char desc[1024];
thename);
else
switch (errno) {
case ENOTSUP:
/*
* Unsupported version.
*/
break;
case EINVAL:
break;
default:
}
ret = -1;
} else {
/*
* This should never fail, but play it safe anyway.
*/
ret = -1;
}
}
return (ret);
}
/*
* Scrub the pool.
*/
int
{
char msg[1024];
return (0);
else
}
/*
* 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
* spare; but FALSE if its an INUSE spare.
*/
static nvlist_t *
{
char *path;
/*
* If the device has never been present since import, the only
* reliable way to match the vdev is by GUID.
*/
return (nv);
&wholedisk);
if (wholedisk) {
/*
* For whole disks, the internal path has 's0', but the
* path passed in by the user doesn't.
*/
return (nv);
return (nv);
}
}
return (NULL);
for (c = 0; c < children; c++)
avail_spare)) != NULL)
return (ret);
for (c = 0; c < children; c++) {
avail_spare)) != NULL) {
*avail_spare = B_TRUE;
return (ret);
}
}
}
return (NULL);
}
nvlist_t *
{
char buf[MAXPATHLEN];
const char *search;
char *end;
} else if (path[0] != '/') {
} else {
}
&nvroot) == 0);
*avail_spare = B_FALSE;
}
/*
* Returns TRUE if the given guid corresponds to a spare (INUSE or not).
*/
static boolean_t
{
int i;
&nvroot) == 0);
for (i = 0; i < nspares; i++) {
ZPOOL_CONFIG_GUID, &spare_guid) == 0);
if (guid == spare_guid)
return (B_TRUE);
}
}
return (B_FALSE);
}
/*
* Bring the specified vdev online. The 'flags' parameter is a set of the
* ZFS_ONLINE_* flags.
*/
int
{
char msg[1024];
return (0);
}
/*
* Take the specified vdev offline
*/
int
{
char msg[1024];
return (0);
switch (errno) {
case EBUSY:
/*
* There are no other replicas of this device.
*/
default:
}
}
/*
* Mark the given vdev faulted.
*/
int
{
char msg[1024];
return (0);
switch (errno) {
case EBUSY:
/*
* There are no other replicas of this device.
*/
default:
}
}
/*
* Mark the given vdev degraded.
*/
int
{
char msg[1024];
return (0);
}
/*
* Returns TRUE if the given nvlist is a vdev that was originally swapped in as
* a hot spare.
*/
static boolean_t
{
char *type;
&children) == 0) {
&type) == 0);
return (B_TRUE);
for (c = 0; c < children; c++)
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Attach new_disk (fully described by nvroot) to old_disk.
* If 'replacing' is specified, the new disk will replace the old one.
*/
int
{
char msg[1024];
int ret;
char *path;
if (replacing)
else
if (avail_spare)
"new device must be a single disk"));
}
ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
/*
* If the target is a hot spare that has been swapped in, we can only
* replace it with another hot spare.
*/
if (replacing &&
"can only be replaced by another hot spare"));
}
/*
* If we are attempting to replace a spare, it canot be applied to an
* already spared device.
*/
if (replacing &&
"device has already been replaced with a spare"));
}
return (-1);
if (ret == 0)
return (0);
switch (errno) {
case ENOTSUP:
/*
* Can't attach to or replace this type of vdev.
*/
if (replacing) {
&is_log);
if (is_log)
"cannot replace a log with a spare"));
else
"cannot replace a replacing device"));
} else {
"can only attach to mirrors and top-level "
"disks"));
}
break;
case EINVAL:
/*
* The new device must be a single disk.
*/
"new device must be a single disk"));
break;
case EBUSY:
new_disk);
break;
case EOVERFLOW:
/*
* The new device is too small.
*/
"device is too small"));
break;
case EDOM:
/*
* The new device has a different alignment requirement.
*/
"devices have different sector alignment"));
break;
case ENAMETOOLONG:
/*
* The resulting top-level vdev spec won't fit in the label.
*/
break;
default:
}
return (-1);
}
/*
* Detach the specified device.
*/
int
{
char msg[1024];
if (avail_spare)
return (0);
switch (errno) {
case ENOTSUP:
/*
* Can't detach from this type of vdev.
*/
"applicable to mirror and replacing vdevs"));
break;
case EBUSY:
/*
* There are no other replicas of this device.
*/
break;
default:
}
return (-1);
}
/*
* Remove the given device. Currently, this is supported only for hot spares.
*/
int
{
char msg[1024];
if (!avail_spare) {
"only inactive hot spares can be removed"));
}
return (0);
}
/*
* Clear the errors for the pool, or the particular device if specified.
*/
int
{
char msg[1024];
if (path)
path);
else
zhp->zpool_name);
if (path) {
if (avail_spare)
}
return (0);
}
/*
* Similar to zpool_clear(), but takes a GUID (used by fmd).
*/
int
{
char msg[1024];
guid);
return (0);
}
/*
* hierarchy.
*/
int
void *data)
{
char (*paths)[MAXPATHLEN];
}
/*
* Oddly this wasn't a directory -- ignore that failure since we
* know there are no links lower in the (non-existant) hierarchy.
*/
return (0);
}
return (-1);
}
curr = 0;
while (curr >= 0) {
goto err;
goto err;
goto err;
}
continue;
goto err;
}
size *= 2;
}
curr++;
}
} else {
break;
}
curr--;
}
return (ret);
err:
return (-1);
}
typedef struct zvol_cb {
} zvol_cb_t;
/*ARGSUSED*/
static int
{
int ret = 0;
if (ZFS_IS_VOLUME(zhp)) {
}
if (ret == 0)
return (ret);
}
/*
* Iterate over all zvols in the pool and make any necessary minor nodes.
*/
int
{
int ret;
/*
* If the pool is unavailable, just return success.
*/
return (0);
return (ret);
}
static int
{
}
/*
* Iterate over all zvols in the pool and remove any minor nodes. We iterate
* by examining the /dev links so that a corrupted pool doesn't impede this
* operation.
*/
int
{
}
/*
* Convert from a devid string to a path.
*/
static char *
devid_to_path(char *devid_str)
{
char *minor;
char *path;
int ret;
return (NULL);
if (ret != 0)
return (NULL);
return (NULL);
return (path);
}
/*
* Convert from a path to a devid string.
*/
static char *
path_to_devid(const char *path)
{
int fd;
return (NULL);
}
return (ret);
}
/*
* Issue the necessary ioctl() to update the stored path value for the vdev. We
* ignore any failure here, since a common case is for an unprivileged user to
* type 'zpool status', and we'll display the correct information anyway.
*/
static void
{
}
/*
* Given a vdev, return the name to display in iostat. If the vdev has a path,
* We also check if this is a whole disk, in which case we strip off the
* trailing 's0' slice name.
*
* This routine is also responsible for identifying when disks have been
* reconfigured in a new location. The kernel will have opened the device by
* devid, but the path will still refer to the old location. To catch this, we
* first do a path -> devid translation (which is fast for the common case). If
* the devid matches, we're done. If not, we do a reverse devid -> path
* translation and issue the appropriate ioctl() to update the path of the vdev.
* If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
* of these checks.
*/
char *
{
char buf[64];
&value) == 0) {
&value) == 0);
/*
* If the device is dead (faulted, offline, etc) then don't
* bother opening it. Otherwise we may be forcing the user to
* open a misbehaving device, which can have undesirable
* effects.
*/
/*
* Determine if the current path is correct.
*/
char *newpath;
/*
* Update the path appropriately.
*/
if (nvlist_add_string(nv,
ZPOOL_CONFIG_PATH, newpath) == 0)
&path) == 0);
}
}
if (newdevid)
}
path += 9;
return (NULL);
return (tmp);
}
} else {
/*
* If it's a raidz device, we need to stick in the parity level.
*/
&value) == 0);
}
}
}
static int
zbookmark_compare(const void *a, const void *b)
{
return (memcmp(a, b, sizeof (zbookmark_t)));
}
/*
* Retrieve the persistent error log, uniquify the members, and return to the
* caller.
*/
int
{
int i;
/*
* Retrieve the raw error list from the kernel. If the number of errors
* has increased, allocate more space and continue until we get the
* entire list.
*/
&count) == 0);
if (count == 0)
return (0);
return (-1);
for (;;) {
&zc) != 0) {
return (-1);
} else {
return (-1);
}
} else {
break;
}
}
/*
* Sort the resulting bookmarks. This is a little confusing due to the
* implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last
* to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
* _not_ copied as part of the process. So we point the start of our
* array appropriate and decrement the total number of elements.
*/
/*
* Fill in the nverrlistp with nvlist's of dataset and object numbers.
*/
for (i = 0; i < count; i++) {
/* ignoring zb_blkid and zb_level for now */
continue;
goto nomem;
goto nomem;
}
goto nomem;
}
goto nomem;
}
}
return (0);
}
/*
* Upgrade a ZFS pool to the latest on-disk version.
*/
int
{
zhp->zpool_name));
return (0);
}
void
char *history_str)
{
int i;
for (i = 1; i < argc; i++) {
break;
}
}
/*
* Stage command history for logging.
*/
int
{
if (history_str == NULL)
return (EINVAL);
return (EINVAL);
return (0);
}
/*
* Perform ioctl to get some command history of a pool.
*
* 'buf' is the buffer to fill up to 'len' bytes. 'off' is the
* logical offset of the history buffer to start reading from.
*
* Upon return, 'off' is the next logical offset to read from and
* 'len' is the actual amount of bytes read into 'buf'.
*/
static int
{
switch (errno) {
case EPERM:
"cannot show history for pool '%s'"),
zhp->zpool_name));
case ENOENT:
case ENOTSUP:
default:
}
}
return (0);
}
/*
* Process the buffer of nvlists, unpacking and storing each nvlist record
* into 'records'. 'leftover' is set to the number of bytes that weren't
* processed as there wasn't a complete record.
*/
static int
{
int i;
while (bytes_read > sizeof (reclen)) {
/* get length of packed record (stored as little endian) */
break;
/* unpack record */
return (ENOMEM);
/* add record to nvlist array */
(*numrecords)++;
}
}
*leftover = bytes_read;
return (0);
}
/*
* Retrieve the command history of a pool.
*/
int
{
char buf[HIS_BUF_LEN];
uint_t numrecords = 0;
int err, i;
do {
break;
/* if nothing else was read in, we're at EOF, just return */
if (!bytes_read)
break;
break;
/* CONSTCOND */
} while (1);
if (!err) {
records, numrecords) == 0);
}
for (i = 0; i < numrecords; i++)
nvlist_free(records[i]);
return (err);
}
void
{
char dsname[MAXNAMELEN];
if (dsobj == 0) {
/* special case for the MOS */
return;
}
/* get the dataset's name */
ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
/* just write out a path of two object numbers */
return;
}
/* find out if the dataset is mounted */
/* get the corrupted object's path */
&zc) == 0) {
if (mounted) {
} else {
}
} else {
}
}
#define RDISK_ROOT "/dev/rdsk"
#define BACKUP_SLICE "s2"
/*
* Don't start the slice at the default block of 34; many storage
* devices will use a stripe width of 128k, so start there instead.
*/
#define NEW_START_BLOCK 256
/*
* determine where a partition starts on a disk in the current
* configuration
*/
static diskaddr_t
{
char *path;
int fd;
char diskname[MAXPATHLEN];
return (MAXOFFSET_T);
}
ZPOOL_CONFIG_PATH, &path) != 0) {
return (MAXOFFSET_T);
}
}
}
return (sb);
}
for (c = 0; c < children; c++) {
if (sb != MAXOFFSET_T) {
return (sb);
}
}
return (MAXOFFSET_T);
}
/*
* Label an individual disk. The name provided is the short name,
* stripped of any leading /dev path.
*/
int
{
char path[MAXPATHLEN];
int fd;
char errbuf[1024];
if (zhp) {
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
if (zhp->zpool_start_block == 0)
else
} else {
/* new pool */
}
/*
* This shouldn't happen. We've long since verified that this
* is a valid device.
*/
"label '%s': unable to open device"), name);
}
/*
* The only way this can fail is if we run out of memory, or we
* were unable to read the disk's capacity
*/
"label '%s': unable to read disk capacity"), name);
}
if (start_block == MAXOFFSET_T)
/*
* Why we use V_USR: V_BACKUP confuses users, and is considered
* disposable by some EFI utilities (since EFI doesn't have a backup
* slice). V_UNASSIGNED is supposed to be used only for zero size
* partitions, and efi_write() will fail if we use it. V_ROOT, V_BOOT,
* etc. were all pretty specific. V_USR is as close to reality as we
* can get, in the absence of V_OTHER.
*/
/*
* Some block drivers (like pcata) may not support EFI
* GPT labels. Print out a helpful error message dir-
* ecting the user to manually label the disk and give
* a specific slice.
*/
"cannot label '%s': try using fdisk(1M) and then "
"provide a specific slice"), name);
}
return (0);
}
int
{
int ret = -1;
char errbuf[1024];
zhp->zpool_name);
"upgraded to support pool properties"));
}
}
return (-1);
}
/*
* Execute the corresponding ioctl() to set this property.
*/
return (-1);
if (ret)
return (ret);
}
{
return (0);
return (zpool_prop_default_numeric(prop));
switch (prop) {
case ZPOOL_PROP_AUTOREPLACE:
} else {
&value) == 0);
}
return (value);
break;
default:
assert(0);
}
return (0);
}
int
{
"upgraded to support pool properties"));
}
prop != ZPOOL_PROP_NAME)
switch (prop) {
case ZPOOL_PROP_NAME:
break;
case ZPOOL_PROP_BOOTFS:
strvalue = "-";
} else {
ZFS_PROP_SOURCE, &value) == 0);
&strvalue) == 0);
return (-1);
}
break;
case ZPOOL_PROP_DELEGATION:
case ZPOOL_PROP_AUTOREPLACE:
} else {
ZFS_PROP_SOURCE, &value) == 0);
&value) == 0);
}
break;
default:
return (-1);
}
if (srctype)
return (0);
}
int
{
}
int
{
char buf[ZFS_MAXPROPLEN];
return (-1);
continue;
NULL) == 0) {
}
}
return (0);
}