stripe.c revision 7fad02ee84994b49ae4b1505c21b22149e44d2a5
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#ifndef lint
char _depends_on[] = "drv/md";
#endif
extern kcondvar_t md_cv;
extern int md_status;
extern mdq_anchor_t md_done_daemon;
static int md_stripe_mcs_buf_off;
/*ARGSUSED1*/
static int
{
return (0);
}
static void
stripe_parent_init(void *ps)
{
}
/*ARGSUSED1*/
static void
stripe_parent_destructor(void *p, void *d)
{
}
/*ARGSUSED1*/
static int
{
return (0);
}
static void
{
}
/*ARGSUSED1*/
static void
stripe_child_destructor(void *p, void *d)
{
}
/*ARGSUSED*/
static void
stripe_run_queue(void *d)
{
if (!(md_status & MD_GBL_DAEMONS_LIVE))
}
static void
{
int row;
int i;
int c;
if (md_cflags & MD_OFLG_PROBEDEV) {
/*
* It is possible that the md_layered_open
* failed because the stripe unit structure
* contained a NODEV. In such a case since
* there is nothing to open, there is nothing
* to close.
*/
continue;
}
if ((md_cflags & MD_OFLG_PROBEDEV) &&
}
}
}
}
static int
{
int row;
int i;
int c;
int err;
int probe_err_cnt = 0;
int total_comp_cnt = 0;
/*
* For a probe call, if any component of a stripe or a concat
* can be opened, it is considered to be a success. The total number
* of components in a stripe are computed prior to starting a probe.
* This number is then compared against the number of components
* that could be be successfully opened. If none of the components
* in a stripe can be opened, only then an ENXIO is returned for a
* probe type open.
*/
if (md_oflags & MD_OFLG_PROBEDEV)
/*
* Do the open by device id
* Check if this comp is hotspared and
* if it is then use the key for hotspare.
* MN disksets don't use devids, so we better don't use
* md_devid_found/md_resolve_bydevid there. Rather do,
* what's done in stripe_build_incore()
*/
if (MD_MNSET_SETNO(setno)) {
(void) md_hot_spare_ifc(HS_MKDEV, 0, 0,
}
} else {
}
}
/*
* For a submirror, we only want to open those devices
* that are not errored. If the device is errored then
* then there is no reason to open it and leaving it
* errored device can be replaced.
*/
if ((md_oflags & MD_OFLG_PROBEDEV) ||
} else {
}
/*
* Only set the un_dev if the tmpdev != NODEV64. If
* it is NODEV64 then the md_layered_open() will have
* failed in some manner.
*/
if (err) {
if (!cont_on_errors) {
return (ENXIO);
}
if (md_oflags & MD_OFLG_PROBEDEV)
} else {
if (md_oflags & MD_OFLG_PROBEDEV) {
} else
}
}
}
/* If every component in a stripe could not be opened fail */
if ((md_oflags & MD_OFLG_PROBEDEV) &&
(probe_err_cnt == total_comp_cnt))
return (ENXIO);
else
return (0);
}
int
stripe_build_incore(void *p, int snarfing)
{
int row;
int i;
int c;
int ncomps;
return (0);
/*
* Reset all the is_open flags, these are probably set
* cause they just came out of the database.
*/
ncomps = 0;
}
if (!snarfing)
continue;
/*
* Check for hotspares. If the hotspares haven't been
* snarfed yet, stripe_open_all_devs() will do the
* remapping of the dev's later.
*/
(void) md_hot_spare_ifc(HS_MKDEV, 0, 0,
}
}
}
return (0);
}
void
{
int i, c;
int row;
int nsv;
int isv;
int rid = 0;
/*
* Attempt release of its minor node
*/
(void) md_remove_minor_node(mnum);
if (!removing)
return;
nsv = 0;
/* Count the number of devices */
}
/*
* allocate recids array. since we may have to commit
* underlying soft partition records, we need an array
* of size: total number of components in stripe + 3
* (one for the stripe itself, one for the hotspare, one
* for the end marker).
*/
/*
* Save the md_dev64_t's and driver nm indexes.
* Because after the mddb_deleterec() we will
* not be able to access the unit structure.
*
* NOTE: Deleting the names before deleting the
* unit structure would cause problems if
* the machine crashed in between the two.
*/
isv = 0;
recids[0] = 0;
recids[2] = 0;
}
/*
* check if we've got metadevice below us and
* deparent it if we do.
* NOTE: currently soft partitions are the
* the only metadevices stripes can be
* built on top of.
*/
}
}
}
/*
* Decrement the HSP reference count and
* remove the knowledge of the HSP from the unit struct.
* This is done atomically to remove a window.
*/
}
/* set end marker and commit records */
/*
* Remove self from the namespace
*/
}
/* Remove the unit structure */
/* Remove the vtoc, if present */
if (vtoc_id)
}
static void
{
char *str;
str = "read";
} else {
str = "write";
}
}
} else {
}
md_biodone(pb);
}
static int
{
mdi_unit_t *ui;
/*LINTED*/
}
return (1);
}
return (1);
}
} else {
}
md_biodone(pb);
return (0);
}
/*
* This routine does the mapping from virtual (dev, blkno) of a metapartition
* to the real (dev, blkno) of a real disk partition.
* It goes to the md_conf[] table to find out the correct real partition
* dev and block number for this buffer.
*
* A single buf request can not go across real disk partition boundary.
* When the virtual request specified by (dev, blkno) spans more than one
* real partition, md_mapbuf will return 1. Then the caller should prepare
* another real buf and continue calling md_mapbuf to do the mapping until
* it returns 0.
*
*/
static int
{
extern unsigned md_maxphys;
/* Work var's when bp==NULL */
/*
* Do a real calculation to derive the minor device of the
* Virtual Disk, which in turn will let us derive the
*/
break;
}
more = 0;
else
more = 1;
} else {
more = 0;
}
} else { /* Have striping */
more = 1;
} else {
more = 0;
}
}
/* only break up the I/O if we're not built on another metadevice */
more = 1;
}
/*
* wb_bcount is limited by md_maxphys which is 'int'
*/
}
return (more);
}
static void
{
int doing_writes;
int more;
mdi_unit_t *ui;
/*
* When doing IO to a multi owner meta device, check if set is halted.
* We do this check without the needed lock held, for performance
* reasons.
* If an IO just slips through while the set is locked via an
* MD_MN_SUSPEND_SET, we don't care about it.
* Only check for a suspended set if we are a top-level i/o request
* (MD_STR_NOTTOP is cleared in 'flag').
*/
(MD_SET_HALTED | MD_SET_MNSET)) {
if ((flag & MD_STR_NOTTOP) == 0) {
mutex_enter(&md_mx);
/* Here we loop until the set is no longer halted */
}
mutex_exit(&md_mx);
}
}
if ((flag & MD_NOBLOCK) == 0) {
if (md_inc_iocount(setno) != 0) {
return;
}
} else {
}
if (!(flag & MD_STR_NOTTOP)) {
return;
}
}
/*
* Save essential information from the original buffhdr
* in the md_save structure.
*/
doing_writes = 1;
else
doing_writes = 0;
current_offset = 0;
do {
/*
* Do these calculations now,
* so that we pickup a valid b_bcount from the chld_bp.
*/
if (more) {
}
if (doing_writes &&
(void) stripe_done(cb);
continue;
}
} while (more);
drv_usecwait(10);
}
}
}
static int
{
int gotsomething;
int all_stripes_gotten;
if (cmd == MD_SNARF_CLEANUP)
return (0);
all_stripes_gotten = 1;
gotsomething = 0;
continue;
switch (rbp->rb_revision) {
case MDDB_REV_RB:
case MDDB_REV_RBFN:
/*
* This means, we have an old and small record
* and this record hasn't already been
* converted. Before we create an incore
* metadevice from this we have to convert it to
* a big record.
*/
small_un =
KM_SLEEP);
} else {
/* Small device had already been converted */
}
break;
case MDDB_REV_RB64:
case MDDB_REV_RB64FN:
/* Big device */
break;
}
/* Create minor node for snarfed unit. */
continue;
}
all_stripes_gotten = 0;
gotsomething = 1;
}
}
if (!all_stripes_gotten)
return (gotsomething);
return (0);
}
static int
{
int i;
mdi_unit_t *ui;
if (cmd == MD_HALT_CLOSE)
return (0);
if (cmd == MD_HALT_OPEN)
return (0);
if (cmd == MD_HALT_UNLOAD)
return (0);
if (cmd == MD_HALT_CHECK) {
for (i = 0; i < md_nunits; i++) {
continue;
continue;
if (md_unit_isopen(ui))
return (1);
}
return (0);
}
if (cmd != MD_HALT_DOIT)
return (1);
for (i = 0; i < md_nunits; i++) {
continue;
continue;
}
return (0);
}
/*ARGSUSED3*/
static int
{
int err = 0;
/*
* When doing an open of a multi owner metadevice, check to see if this
* node is a starting node and if a reconfig cycle is underway.
* If so, the system isn't sufficiently set up enough to handle the
* open (which involves I/O during sp_validate), so fail with ENXIO.
*/
(MD_SET_MNSET | MD_SET_MN_START_RC)) {
return (ENXIO);
}
/* single thread */
/* open devices, if necessary */
goto out;
}
}
/* count open */
goto out;
/* unlock, return success */
out:
return (err);
}
/*ARGSUSED1*/
static int
int flag,
int otyp,
int md_cflags
)
{
int err = 0;
/* single thread */
/* count closed */
goto out;
/* close devices, if necessary */
}
/* unlock, return success */
out:
return (err);
}
/*
* This routine dumps memory to the disk. It assumes that the memory has
* already been mapped into mainbus space. It is called at disk interrupt
* priority when the system is in trouble.
*
*/
static int
{
int result;
int more;
int saveresult = 0;
/*
* Don't need to grab the unit lock.
* Cause nothing else is suppose to be happenning.
* Also dump is not suppose to sleep.
*/
return (EINVAL);
return (EINVAL);
do {
/*
* bdev_dump() is currently only able to take
* 32 bit wide blkno's.
*/
nblk);
if (result)
saveresult = result;
}
} while (more);
return (saveresult);
}
/*ARGSUSED*/
static intptr_t
void *junk,
{
}
/*
* stripe_block_count_skip_size() returns the following values
* so that the logical to physical block mappings can
* be calculated without intimate knowledge of the underpinnings.
*
* block - first logical block number of the device.
* block = [ # of blocks before THE row ] +
* [ # of blocks in THE row before the component ]
* count - # of segments (interlaced size).
* skip - # of logical blocks between segments, or delta to
* get to next segment
* size - interlace size used for the block, count, skip.
*/
/*ARGSUSED*/
static intptr_t
void *junk,
int ci,
{
int row;
int cmpcount = 0;
break;
}
/*
* Concatenations are always contiguous blocks,
* you cannot depend on the interlace being a usable
* value (except for stripes).
*/
*count = 1;
*skip = 0;
} else { /* Stripes */
}
return (0);
}
/*ARGSUSED*/
static intptr_t
{
}
/*ARGSUSED*/
{
/*
* See comments for stripe_get_dev
*/
int count = 0;
int row;
return (count);
}
/*ARGSUSED*/
{
/*
* It should be noted that stripe_replace in stripe_ioctl.c calls this
* routine using makedevice(0, minor) for the first argument.
*
* If this routine at some point in the future needs to use the major
* number stripe_replace must be changed.
*/
/*
* Try to resolve devt again if NODEV64
* Check if this comp is hotspared and if it is
* then use key for hotspare
*/
}
return (0);
}
/*ARGSUSED*/
void
{
/*
* See comments for stripe_get_dev
*/
}
}
/*ARGSUSED*/
void **replace_data)
{
mdi_unit_t *ui;
int row;
int ncomps = 0;
int cmpcount = 0;
int rid = 0;
mddb_recid_t hs_id = 0;
*replace_data = NULL;
/*
* Count the number of components
*/
}
recids[0] = 0;
/*
* No need of checking size of new device,
* when hotsparing (it has already been done), or
* when enabling the device.
*/
break;
}
/* Concatenations have a ncomp = 1 */
/*
* now check to see if new comp can be used in
* place of old comp
*/
nd->nd_labeled)
nd->nd_start_blk = 0;
else
return (MDE_COMP_TOO_SMALL);
}
}
/*
* Close this component.
*/
}
/*
* If the component is hotspared, return to the pool.
*/
}
/*
* Open by device id; for enable (indicated by a NULL
* nd pointer), use the existing component info. For
* replace, use the new device.
*/
/*
* If someone replaced a new disk in the same slot
* we get NODEV64 since old device id cannot be
* resolved. The new devt is obtained from the
* mddb since devt is going to be unchanged for the
* enable case. No need to check for multiple
* keys here because the caller (comp_replace)
* has already sanity checked it for us.
*/
}
} else {
/*
* If this is a hotspare, save the original dev_t for later
* use. If this has occured during boot then the value of
* comp->un_dev will be NODEV64 because of the failure to look
* up the devid of the device.
*/
}
/*
* Now open the new device if required. Note for a single component
* stripe it will not be open - leave this for the mirror driver to
* deal with.
*/
if (md_unit_isopen(ui)) {
ids[2] = 0;
/*
* Revert back to the original device.
*/
"md: %s: open error of hotspare %s",
NULL, 0));
}
return (MDE_COMP_OPEN_ERR);
}
}
recids[2] = 0;
return (0);
}
/* if hot sparing this device */
char devname[MD_MAX_CTDLEN];
char hs_devname[MD_MAX_CTDLEN];
sizeof (devname));
sizeof (hs_devname));
} else { /* replacing the device */
*replace_data = (void *)sv;
/*
* For the old device, make sure to reset the parent
* if it's a metadevice.
*/
}
}
/*
* For the new device, make sure to set the parent if it's a
* metadevice.
*
* If we ever support using metadevices as hot spares, this
* will need to be tested, and possibly moved into the
* preceding "else" clause, immediately following the parent
* reset block. For now, it's convenient to leave it here and
* only compress nd->nd_dev once.
*/
}
return (0);
}
/*ARGSUSED*/
static intptr_t
void *junk,
int ci,
int nrecids,
void (**replace_done)(),
void **replace_data)
{
mdi_unit_t *ui;
int row;
int err;
int i;
int cmpcount = 0;
return (1);
return (1);
break;
}
/* Concatenations have a ncomp = 1 */
else
nd.nd_labeled = 0;
&nd.nd_start_blk);
if (err) {
replace_done, replace_data)) {
}
recids[0] = 0;
return (1);
}
replace_done, replace_data)) {
goto again;
}
/* Leave a slot for the null recid */
for (i = 0; i < (nrecids - 1); i++) {
if (recids[i] == 0) {
recids[i] = 0;
}
}
return (0);
}
static int
)
{
int i, row, c, gotsomething;
gotsomething = 0;
continue;
switch (rbp->rb_revision) {
case MDDB_REV_RB:
case MDDB_REV_RBFN:
/*
* Small device
*/
goto out;
}
}
break;
case MDDB_REV_RB64:
case MDDB_REV_RB64FN:
goto out;
}
}
break;
}
/*
* If this is a top level and a friendly name metadevice,
* update its minor in the namespace.
*/
if ((*parent_id == MD_NO_PARENT) &&
goto out;
}
/*
* Update unit with the imported setno
*
*/
if (*hsp_id != -1)
if (*parent_id != MD_NO_PARENT)
gotsomething = 1;
}
out:
return (gotsomething);
}
static md_named_services_t stripe_named_services[] = {
{stripe_shared_by_blk, "shared by blk" },
{stripe_shared_by_indx, "shared by indx" },
{stripe_component_count, "get component count" },
{stripe_block_count_skip_size, "get block count skip size" },
{stripe_get_dev, "get device" },
{stripe_replace_dev, "replace device" },
{stripe_hotspare_dev, "hotspare device" },
{NULL, 0}
};
stripe_open, /* open */
stripe_close, /* close */
md_stripe_strategy, /* strategy */
NULL, /* print */
stripe_dump, /* dump */
NULL, /* read */
NULL, /* write */
md_stripe_ioctl, /* stripe_ioctl, */
stripe_snarf, /* stripe_snarf */
stripe_halt, /* stripe_halt */
NULL, /* aread */
NULL, /* awrite */
stripe_imp_set, /* import set */
};
static void
{
sizeof (md_sps_t), 0, stripe_parent_constructor,
0);
}
static void
{
}
/* define the module linkage */