meta_raid.c revision b2178a54bfad47b786da1c125ad5c89bbffb0a03
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Just in case we're not in a build environment, make sure that
* TEXT_DOMAIN gets set to something.
*/
#if !defined(TEXT_DOMAIN)
#define TEXT_DOMAIN "SYS_TEST"
#endif
/*
* RAID operations
*/
#include <stdlib.h>
#include <meta.h>
#include <stddef.h>
/*
* FUNCTION: meta_get_raid_names()
* INPUT: sp - the set name to get raid from
* options - options from the command line
* OUTPUT: nlpp - list of all raid names
* ep - return error pointer
* RETURNS: int - -1 if error, 0 success
* PURPOSE: returns a list of all raid in the metadb
* for all devices in the specified set
*/
int
mdnamelist_t **nlpp,
int options,
)
{
}
/*
* free raid unit
*/
void
)
{
}
}
/*
* get raid (common)
*/
int fast,
)
{
char *miscname;
/* must have set */
/* short circuit */
}
/* get miscname and unit */
return (NULL);
return (NULL);
}
return (NULL);
/* allocate raid */
/* allocate columns */
/* get common info */
/* get options */
goto out;
}
/* get columns, update unit state */
/* get column name */
goto out;
/* override any start_blk */
#ifdef DEBUG
} else {
mdclrerror(ep);
}
#endif /* DEBUG */
/* if hotspared */
/* get hotspare name */
goto out;
mdclrerror(ep);
(rcp->un_hs_pwstart != 0))
"%s: suspected bad start block,"
" seems labelled [raid]\n"),
(rcp->un_hs_pwstart == 0))
"%s: suspected bad start block, "
" seems unlabelled [raid]\n"),
}
/* override any start_blk */
}
/* get state, flags, and timestamp */
}
/* get resync info */
goto out;
}
/* cleanup, return success */
return (raidp);
/* cleanup, return error */
out:
return (NULL);
}
/*
* get raid
*/
)
{
}
/*
* check raid for dev
*/
static int
)
{
/* should be in the same set */
/* get unit */
return (-1);
/* look in columns */
int err;
/* check same drive since metagetstart() can fail */
return (-1);
else if (err == 0)
continue;
/* check overlap */
return (-1);
return (-1);
}
}
/* return success */
return (0);
}
/*
* check to see if we're in a raid
*/
int
)
{
mdnamelist_t *p;
int rval = 0;
/* should have a set */
/* for each raid */
return (-1);
/* check raid */
rval = -1;
break;
}
}
/* cleanup, return success */
return (rval);
}
/*
* check column
*/
int
)
{
/* check for soft partitions */
/* make sure we have a disk */
return (-1);
}
/* check to ensure that it is not already in use */
return (-1);
}
/* make sure it is in the set */
return (-1);
/* make sure its not in a metadevice */
return (-1);
/* return success */
return (0);
}
/*
* print raid
*/
static int
char *fname,
)
{
int rval = -1;
if (options & PRINT_LARGEDEVICES) {
rval = 0;
goto out;
}
}
rval = 0;
goto out;
}
}
/* print name and -r */
goto out;
/*
* Print columns. Always print the full path.
*/
goto out;
}
goto out;
/* print options */
goto out;
goto out;
goto out;
}
goto out;
}
/* terminate last line */
goto out;
/* success */
rval = 0;
/* cleanup, return error */
out:
if (rval != 0)
return (rval);
}
static int
)
{
int col;
return (col);
}
/* No resyncing columns */
return (-1);
}
/*
* convert raid state to name
*/
char *
)
{
/* grab time */
/*
* If the device has a transient error state (due to it being DR'ed or
* failed) and there has been no I/O to it (the actual device is still
* marked as 'Okay') then we cannot know what the state is or what
* action to take on it. Therefore report the device as 'Unavailable'.
* A subsequent I/O to the device will cause the 'Okay' status to
* disappear if the device is actually gone and then we will print out
* the appropriate status. The MD_INACCESSIBLE state is only set
* on the raid when we open it or probe it. One the raid is open
* then we will just have regular error status on the device.
*/
if (tstate & MD_INACCESSIBLE) {
}
/* resyncing */
if (find_resyncing_column(raidp) >= 0)
/* everything else */
case RUS_INIT :
case RUS_OKAY :
case RUS_ERRED :
/*FALLTHROUGH*/
case RUS_LAST_ERRED :
case RUS_DOI :
case RUS_REGEN :
default :
} /* switch */
}
static int
{
int col;
return (col);
}
/* No erred columns */
return (-1);
}
/*
* convert raid state to repair action
*/
char *
{
static char emsg[1024];
int err_col;
/* first check for full init failure */
return (emsg);
}
/* replace errored or init errored raid column */
(RCS_ERRED | RCS_INIT_ERRED))) >= 0) {
/* get column with error */
"metareplace %s%s %s <%s>",
return (emsg);
}
/* replace last errored raid column */
"metareplace %s %s %s <%s>",
return (emsg);
}
/* OK */
return (NULL);
}
/*
* get printable raid column state
*/
char *
)
{
/* grab time */
if (tstate != 0) {
}
/* everything else */
case RCS_INIT:
case RCS_OKAY:
case RCS_INIT_ERRED:
/*FALLTHROUGH*/
case RCS_ERRED:
case RCS_LAST_ERRED:
case RCS_RESYNC:
default:
}
}
/*
* print raid column
*/
static int
char *fname,
int print_len,
)
{
char *devid = "";
int has_mddb;
char *has_mddb_str;
char *col_state;
int rval = -1;
/* get info */
return (-1);
return (-1);
if (has_mddb)
else
if (metaismeta(namep)) {
return (-1);
tstate & MD_DEV_ERRORED);
} else {
/*
* if top_tstate is set, that implies that you have
* a ctd type device with an unavailable metadevice
* on top of it. If so, print a - for it's state
*/
if (top_tstate != 0)
col_state = "-";
else
}
/* populate the key in the name_p structure */
return (-1);
/* determine if devid does NOT exist */
if (options & PRINT_DEVID) {
else {
}
}
/* print column */
/*
* Building a format string on the fly that will
* be used in (f)printf. This allows the length
* of the ctd to vary from small to large without
* looking horrible.
*/
if (! (options & PRINT_TIMES)) {
"\t%-*.*s %8lld %5.5s %12.12s %5.5s %s\n",
goto out;
}
} else {
"\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n",
goto out;
}
}
/* success */
rval = 0;
/* cleanup, return error */
out:
if (rval != 0)
return (rval);
}
/*
* print raid options
*/
int
char *fname,
)
{
int rval = -1;
/* print options */
goto out;
}
/* success */
rval = 0;
/* cleanup, return error */
out:
if (rval != 0)
return (rval);
}
/*
* report raid
*/
static int
char *fname,
)
{
char *p;
char *raid_state;
char *timep;
int rval = -1;
int len = 0;
if (options & PRINT_LARGEDEVICES) {
rval = 0;
goto out;
}
}
rval = 0;
goto out;
}
}
/* print header */
if (options & PRINT_HEADER) {
goto out;
}
}
/* print state */
return (-1);
}
if (options & PRINT_TIMES) {
} else {
timep = "";
}
goto out;
}
/*
* Display recovery action if we're marked in the Unavailable state.
*/
/* print what to do */
if (tstate & MD_INACCESSIBLE) {
if (metaislocalset(sp)) {
sname[0] = '\0';
} else {
}
goto out;
}
" Invoke: %s\n"), p) == EOF) {
goto out;
}
}
/* resync status */
" Resync in progress: %2d.%1d%% done\n"),
goto out;
}
" Initialization in progress: %2d.%1d%% "
"done\n"),
goto out;
}
" Parity regeneration in progress: %2d.%1d%% "
"done\n"),
goto out;
}
}
}
/* print hotspare pool */
return (-1);
}
}
/* print interlace */
goto out;
}
/* print size */
goto out;
}
/* MD_DEBUG stuff */
if (options & PRINT_DEBUG) {
/* get additional info */
return (-1);
/* print prewrite count and size */
" Prewrite Count: %u slots\n"),
goto out;
}
" Prewrite Slot Size: %u blocks\n"),
goto out;
}
" Prewrite Total Size: %u blocks\n"),
goto out;
}
}
/* print original devices */
goto out;
== EOF) {
goto out;
}
/*
* Building a format string on the fly that will
* be used in (f)printf. This allows the length
* of the ctd to vary from small to large without
* looking horrible.
*/
}
len += 2;
if (! (options & PRINT_TIMES)) {
"\t%-*.*s %-12.12s %-5.5s %12.12s %-5.5s %s\n",
goto out;
}
} else {
"\t%-*s %5s %-5s %-11s %-5s %-9s %s\n",
len,
goto out;
}
}
return (-1);
}
}
/* print concatenated devices */
"Concatenated Devices:\n")) == EOF) {
goto out;
}
" Size: %lld blocks (%s)\n"),
== EOF) {
goto out;
}
/*
* This allows the length
* of the ctd to vary from small to large without
* looking horrible.
*/
if (! (options & PRINT_TIMES)) {
"\t%-*.*s %-12.12s %-5.5s %-12.12s %5.5s %s\n",
goto out;
}
} else {
"\t%-*s %5s %-5s %-11s %-9s %s\t%s\n",
len,
goto out;
}
}
return (-1);
}
}
}
/* add extra line */
goto out;
/* success */
rval = 0;
/* cleanup, return error */
out:
if (rval != 0)
return (rval);
}
/*
*/
int
mdnamelist_t **nlpp,
char *fname,
)
{
int col;
/* should have same set */
/* print all raids */
mdnamelist_t *p;
int cnt;
int rval = 0;
/* get list */
return (-1);
else if (cnt == 0)
return (0);
/* recurse */
rval = -1;
}
/* cleanup, return success */
return (rval);
}
/* get unit structure */
return (-1);
/* check for parented */
if ((! (options & PRINT_SUBDEVS)) &&
return (0);
}
/* print appropriate detail */
if (options & PRINT_SHORT) {
return (-1);
} else {
return (-1);
}
/* Recurse on components that are metadevices */
if ((metaismeta(namep)) &&
return (-1);
}
}
return (0);
}
/*
* adjust raid geometry
*/
static int
)
{
/* get reinstructs */
return (-1);
/* adjust geometry */
return (-1);
/* return success */
return (0);
}
/*
* add another column to the raid unit structure
*/
static int
)
{
/* setup state and timestamp */
/* get start, size, and maxio */
return (-1);
return (-1);
return (-1);
/* adjust start and size by prewrite */
/* make sure we still have something left */
}
}
if (options & MDCMD_DOIT) {
/* store name in namespace */
return (-1);
}
/* setup column */
mdc->un_alt_pwstart = 0;
mdc->un_alt_devstart = 0;
/* add the size (we use) of the device to the total */
/* adjust geometry */
return (-1);
/* count column */
mr->un_totalcolumncnt++;
/* return success */
return (0);
}
/*
* invalidate column names
*/
static int
)
{
return (-1);
}
return (0);
}
/*
* attach columns to raid
*/
int
)
{
uint_t concat_cnt = 0;
mdnamelist_t *p;
int rval = -1;
int create_flag = MD_CRO_32BIT;
/* should have a set */
/* check type */
return (-1);
/* check and count new columns */
/* check against existing devices */
return (-1);
/* check against ourselves */
return (-1);
}
}
/* count */
++concat_cnt;
}
/* get old unit */
return (-1);
/*
* calculate the size needed for the new raid unit and allocate
* the appropriate structure. allocate new unit.
*/
/* We always want a do-it, this is for attach_raid_col below */
options |= MDCMD_DOIT;
/* build new unit structure */
/* attach column */
goto out;
}
}
/* grow raid */
if (create_flag == MD_CRO_32BIT) {
} else {
}
goto out;
}
/* clear cache */
goto out;
/* let em know */
if (options & MDCMD_PRINT) {
if (concat_cnt == 1) {
"%s: component is attached\n"),
} else {
"%s: components are attached\n"),
}
}
/* grow any parents */
goto out;
rval = 0; /* success */
/* cleanup, return error */
out:
if (rval != 0)
return (rval);
}
/*
* get raid parameters
*/
int
)
{
/* should have a set */
/* check name */
return (-1);
/* get unit */
return (-1);
/* return parameters */
else
return (0);
}
/*
* set raid parameters
*/
int
)
{
/* should have a set */
/* check name */
return (-1);
/* set parameters */
/* clear cache */
/* return success */
return (0);
}
/*
* validate raid replace column
*/
static int
int dup_ok,
)
{
int rval = -1;
/* get raid unit */
return (-1);
/* check it out */
goto out;
mdclrerror(ep);
}
goto out;
goto out;
goto out;
goto out;
}
goto out;
goto out;
}
/* success */
rval = 0;
/* cleanup, return error */
out:
return (rval);
}
/*
* replace raid column
*/
int
)
{
int rebind;
char *new_devidp = NULL;
int ret;
/* should have same set */
/* check name */
return (-1);
/* save new binding incase this is a rebind where oldnp==newnp */
/* invalidate, then get the raid (fill in oldnp from metadb) */
return (-1);
/* can't replace a component if the raid inaccessible */
return (-1);
}
if (tstate & MD_INACCESSIBLE) {
}
/* the old device binding is now established */
/* setup raid info */
rebind = 1;
} else {
rebind = 0;
}
if (rebind) {
}
/*
* Save a copy of the devid associated with the new disk, the
* reason is that the checks for the column (meta_check_column)
* via validate_new_raid(), could cause the disk's devid to be
* changed to that of the devid that is currently stored in the
* replica namespace for the disk in question. This devid could
* be stale if we are replacing the disk. The actual function
* that overwrites the devid is dr2drivedesc().
*/
/* don't setup new_devid if no devid's or MN diskset */
if (!metaislocalset(sp)) {
return (-1);
if (MD_MNSET_DESC(sd))
new_devidp = NULL;
}
/* check out new (sets up start_blk, has_label, number_blks) */
ep) != 0) {
return (-1);
}
/*
* Copy back the saved devid.
*/
if (new_devidp) {
}
/* store name in namespace, allocate new key */
return (-1);
/*
* We are 'rebind'ing a disk that is in a diskset so as well
* as updating the diskset's namespace the local set needs
* to be updated because it also contains a reference to the
* disk in question.
*/
if (ret != METADEVADM_SUCCESS) {
return (-1);
}
}
/* replace column */
}
/* clear cache */
/* let em know */
if (options & MDCMD_PRINT) {
"%s: device %s is replaced with %s\n"),
}
/* return success */
return (0);
}
/*
* enable raid column
*/
int
)
{
int err = 0;
char *devnm;
int ret;
/* should have same set */
/* check name */
return (-1);
/* get the file_system dev binding */
return (-1);
/* get the raid unit (fill in colnp->dev with metadb version) */
return (-1);
/* enabling a component can't work if the raid inaccessible */
return (-1);
}
if (tstate & MD_INACCESSIBLE) {
}
/* the metadb device binding is now established */
/*
* check for the case where the dev_t has changed between the
* filesystem and the metadb. This is called a rebind, and
* is handled by meta_raid_replace.
*/
/*
* Save the devt of mddb version
*/
/*
* Don't do it if meta_raid_replace returns an error
*/
}
return (err);
}
/* setup raid info */
if (force)
else
/* check it out */
return (-1);
/* enable column */
/*
* are we dealing with a non-local set? If so need to update the
* local namespace so that the disk record has the correct devid.
*/
if (!metaislocalset(sp)) {
ep);
if (ret != METADEVADM_SUCCESS) {
/*
* Failed to update the local set. Nothing to do here
* apart from report the error. The namespace is
* most likely broken and some form of remedial
* recovery is going to be required.
*/
mdclrerror(ep);
}
}
/* clear cache */
/* let em know */
if (options & MDCMD_PRINT) {
"%s: device %s is enabled\n"),
}
/* return success */
return (0);
}
/*
* check for dups in the raid itself
*/
static int
)
{
uint_t c;
for (c = 0; (c < col); ++c) {
return (-1);
}
}
return (0);
}
/*
* default raid interlace
*/
{
/* default to 512k, round up if necessary */
return (interlace);
}
/*
* convert interlaces
*/
int
char *uname,
)
{
}
return (0);
}
/*
* check raid
*/
int
)
{
/* check number */
}
/* compute default interlace */
}
/* check state */
case RUS_INIT:
case RUS_OKAY:
break;
default:
}
/* check interlace */
return (-1);
/* check hotspare pool name */
if (doit) {
return (-1);
}
}
/* check columns */
/* setup column */
else
/* check column */
if (!updateit) {
return (-1);
ep)) == MD_DISKADDR_ERROR)) {
return (-1);
}
if (size == 0)
}
/* check this raid too */
return (-1);
}
/* return success */
return (0);
}
/*
* setup raid geometry
*/
static int
)
{
uint_t write_reinstruct = 0;
uint_t read_reinstruct = 0;
/* get worst reinstructs */
return (-1);
}
/* setup geometry from first column */
return (-1);
}
return (-1);
/* return success */
return (0);
}
int
{
int statecnt = 0;
int col;
statecnt++;
return (statecnt);
}
/*
* validate that a raid device being created with the -k flag is a real
* raid device
*/
int
{
int col;
int fd;
goto error_exit;
goto error_exit;
goto error_exit;
/*
* If our raid device is a 64 bit device, we can accept the
* pw header we just read in.
* Otherwise it's of type raid_pwhdr32_od_t and has to
* be converted.
*/
} else {
}
goto error_exit;
if (col == 0)
goto error_exit;
/* 4.1 prewrite header */
!= mr->un_totalcolumncnt) ||
(rpw->rpw_pwstart !=
(rpw->rpw_devstart !=
goto error_exit;
}
/*
* this is an old prewrite header (4.0) the unit structure
* will have to be trusted.
*/
}
return (0);
return (-1);
}
/*
* create raid
*/
int
)
{
diskaddr_t disk_size = 0;
uint_t disk_maxio = 0;
int rval = -1;
int create_flag = MD_CRO_32BIT;
/* validate raid */
return (-1);
/* allocate raid unit */
/*
* initialize the top level mr_unit_t structure
* setup the unit state to indicate whether to retain
* any data currently on the metadevice or to clear it
*/
} else {
}
/*
* setup original columns, saving start_block and
* finding smallest size and maxio
*/
/* setup state */
/* setup creation time */
/* get start, size, and maxio */
goto out;
goto out;
goto out;
if (options & MDCMD_DOIT) {
/* store name in namespace */
goto out;
}
/* setup column */
mdc->un_alt_pwstart = 0;
mdc->un_alt_devstart = 0;
else
/* adjust for smallest disk */
if (disk_size == 0) {
}
if (disk_maxio == 0) {
disk_maxio = maxio;
} else if (maxio < disk_maxio) {
disk_maxio = maxio;
}
}
/*
* before processing any of the attached column(s)
* set up the composition of the metadevice for column
* sizes and pre-write information
*/
else
goto out;
}
/* now calculate the number of segments per column */
goto out;
}
/*
* adjust the pw_cnt, pw_size, to fit into any fragmentation
* left over after column_size has been computed
*/
/*
* calculate the actual block count available based on the
* segment size and the number of segments per column ...
* ... and adjust for the number of parity segments
*/
goto out;
/*
* now calculate the pre-write offset and update the column
* structures to include the address of the individual pre-write
* areas
*/
/* get size */
goto out;
/* adjust start and size by prewrite */
/* make sure we still have something left */
}
/* do concat cols */
/* attach column */
goto out;
}
}
/* fill in the size of the raid */
if (options & MDCMD_UPDATE) {
}
/* if we're not doing anything, return success */
if (! (options & MDCMD_DOIT)) {
rval = 0; /* success */
goto out;
}
goto out;
}
/* create raid */
/* did the user tell us to generate a large device? */
if (create_flag == MD_CRO_64BIT) {
} else {
}
goto out;
}
rval = 0; /* success */
/* cleanup, return success */
out:
if (rval != 0) {
}
rval = -1;
}
return (rval);
}
/*
* initialize raid
* NOTE: this functions is metainit(1m)'s command line parser!
*/
int
mdsetname_t **spp,
int argc,
char *argv[],
)
{
int old_optind;
int c;
int rval = -1;
/* get raid name */
if (argc < 1)
goto syntax;
goto out;
/*
* Raid metadevice not allowed on multi-node diskset.
*/
if (! metaislocalset(*spp)) {
goto out;
if (MD_MNSET_DESC(sd)) {
goto out;
}
}
goto out;
if (!(options & MDCMD_NOLOCK)) {
/* grab set lock */
goto out;
goto out;
}
/* see if it exists already */
goto out;
goto out;
} else {
mdclrerror(ep);
}
/* grab -r */
goto syntax;
/* parse general options */
optind = 0;
opterr = 0;
goto options;
/* allocate raid */
/* setup common */
/* allocate and parse cols */
;
if (ncol != 0) {
}
/* parse column name */
goto out;
/* check for soft partitions */
/* check disks */
goto out;
}
}
/* parse raid options */
old_optind = optind = 0;
opterr = 0;
switch (c) {
case 'h':
goto out;
}
/*
* Get out if the specified hotspare pool really
* doesn't exist.
*/
goto out;
}
break;
case 'i':
ep) != 0) {
goto out;
}
goto out;
break;
case 'k':
break;
case 'o':
goto syntax;
}
goto out;
}
break;
case 'w':
goto syntax;
goto out;
}
break;
default:
argc += old_optind;
argv -= old_optind;
goto options;
}
old_optind = optind;
}
/* we should be at the end */
if (argc != 0)
goto syntax;
/* default to all original columns */
/* create raid */
goto out;
rval = 0; /* success */
/* let em know */
if (options & MDCMD_PRINT) {
uname);
}
goto out;
/* syntax error */
goto out;
/* options error */
goto out;
/* cleanup, return error */
out:
return (rval);
}
/*
* reset RAIDs
*/
int
)
{
int rval = -1;
int col;
/* should have same set */
/* reset all raids */
mdnamelist_t *p;
/* for each raid */
rval = 0;
return (-1);
/* reset RAID */
rval = -1;
break;
}
}
/* cleanup, return success */
return (rval);
}
/* check name */
return (-1);
/* get unit structure */
return (-1);
/* make sure nobody owns us */
}
/* clear subdevices cache */
return (-1);
/* clear metadevice */
goto out;
rval = 0; /* success */
/* let em know */
if (options & MDCMD_PRINT) {
}
/* clear subdevices */
if (! (options & MDCMD_RECURSE))
goto out;
/* only recurse on metadevices */
if (! metaismeta(colnp))
continue;
rval = -1;
}
/* cleanup, return success */
out:
return (rval);
}
/*
* reports TRUE if any RAID component is in error
*/
int
{
goto out;
}
goto out;
}
}
out:
mdclrerror(ep);
return (any_errs);
}
/*
* regen parity on a raid
*/
int
md_error_t *ep)
{
char *miscname;
/* should have a set */
/* make sure we have a raid */
return (-1);
}
/* start resync */
/* return success */
return (0);
}