meta_set_tkr.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Metadevice diskset interfaces
*/
#include "meta_set_prv.h"
static int
int forceflg,
)
{
md_drive_desc *p;
md_replica_t *r;
int i;
int dbcnt;
int rval = 0;
/* find the smallest existing replica */
}
if (nblks <= 0)
dbcnt = 0;
/*
* Before we bump up the dbcnt, if we're
* running with device ids in disksets, let's
* compare the device ids otherwise we compare
* the ctd names.
*
* There is a possibility the device ids might
* have changed. To account for that case, we
* fallback to comparing the ctd names if the
* device id comparison fails. If we aren't running
* in device id mode and a disk has moved, the ctd's
* won't match.
*/
dbcnt++;
} else {
dbcnt++;
}
}
}
/* Lock the set on current set members */
if (MD_MNSET_DESC(sd)) {
while (nd) {
/* If this is forced, don't lock other sides */
!= 0) {
continue;
}
/* We already locked this side in the caller */
continue;
}
continue;
}
rval = -1;
goto out;
}
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* If this is forced, don't lock other sides */
continue;
/* We already locked this side in the caller */
continue;
rval = -1;
goto out;
}
}
}
if (MD_MNSET_DESC(sd)) {
while (nd) {
/* If this is forced, then only care about this node */
!= 0) {
continue;
}
continue;
}
ep) == -1) {
rval = -1;
break;
}
mdclrerror(ep);
}
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* If this is forced, then only care about this node */
continue;
ep) == -1) {
rval = -1;
break;
}
mdclrerror(ep);
}
}
}
out:
if (MD_MNSET_DESC(sd)) {
while (nd) {
/* If this is forced, don't unlock other sides */
!= 0) {
continue;
}
/* We will unlocked this side in the caller */
continue;
}
continue;
}
if (rval == 0)
rval = -1;
}
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* If this is forced, don't unlock other sides */
continue;
/* We will unlocked this side in the caller */
continue;
if (rval == 0)
rval = -1;
}
}
}
/* Do not clear the key, via cl_set_setkey(NULL) this is nested */
return (rval);
}
static int
{
return (0);
}
static int
{
return (0);
}
/*
* Update the master block with the device id information for the disks
* in the diskset. The device id information will be consumed by the
* diskset import code in case of remotely replicated disksets.
*
* For the drives that have a valid diskset mddb on them, we add the
* device id for the drive to the unused portion of the mddb.
*
* For the drives that don't have a diskset mddb on them, we add a dummy
* master block that contains the device id for the drive. A dummy master
* block is signified by changing the master block magic number, mb_magic,
* to MDDB_MAGIC_DU.
*
* This code is responsible primarily for adding the appropriate device id
* information to diskset disks that didn't have the information. This would
* typically occur when the OS has been upgraded from an OS release prior to
* Solaris 10
*
* The error path in this routine is defined as - if an error occurs while
* updating the mddb for one disk in the diskset, don't bother updating *any*
* of the mddbs because it's game over anyways as far as disaster recovery for
* that diskset is concerned.
*
* This code will need to be revisited if and when support for importing
* partial disksets is added.
*
* NOTE: This code relies heavily on the meta_repartition() working correctly
* and reformatting a drive, so that there's enough room for a dummy master
* block, every time a drive is added to a diskset. Should
* the meta_repartition() code change in future, this code will have to be
* revisited.
*
* Returns 0 on success and -1 on failure
*/
int
{
void *mb;
int fd = -1;
int dbcnt;
int dbsize;
/*
* Don't do anything for MN diskset for now.
*/
if (! metaislocalset(sp)) {
return (-1);
if (MD_MNSET_DESC(sd))
return (0);
}
/*
* For every drive in the drive descriptor, iterate through all
* the mddbs present on it and check to see if mb_devid_magic is
* set. If it isn't, then update the master block with the correct
* device id information
*/
int i = 0;
/*
* When the import support for remotely replicated
* disksets gets implemented, we probably want to
* inform the user that the disks won't be self
* identifying if any of these calls fails
*/
return (-1);
return (-1);
goto cleanup;
/* if devid_str_decode fails, make sure devid is null */
}
do {
int push = 0;
++i;
goto cleanup;
goto cleanup;
goto cleanup;
/*
* If the disk is one of the ones that doesn't
* have a shared mddb on it, we put a dummy
* master block on it.
*/
if (dbcnt == 0) {
break;
}
}
/*
* if mb_setcreatetime is 0, this field was never
* filled in so do it now.
*/
push = 1;
}
/*
* If MDDB_MAGIC_DE is set in the
* mb_devid_magic field then we know we
* have a valid device id and we don't
* need to add it to the master block.
*
* This would have to be revisited if device
* ids change as a result of device id
* algorithms changing or somesuch.
*/
sizeof (mddb_mb_t))) {
/*
* there's enough space to
* store the devid
*/
push = 1;
}
}
}
/*
* write out (push) any changes we have to the mb
*/
if (push) {
< 0)
goto cleanup;
goto cleanup;
}
if (devid)
} while (i < dbcnt);
}
/* success */
return (0);
if (fd != -1)
if (devid)
return (-1);
}
/*
* Exported Entry Points
*/
int
int flags,
int usetag,
)
{
md_drive_desc *d = NULL;
int rval = 0;
int i;
int has_set;
int matches = 0;
int numsides = 0;
int rb_level = 0;
int ret = 0;
if (flags & TAKE_USETAG) {
return (-1);
} else {
return (-1);
}
mdclrerror(ep);
}
/* Do we own the set? */
return (-1);
}
if (i == MD_SETOWNER_NO) {
return (-1);
}
}
/* We already own it, we are done. */
if (i == MD_SETOWNER_YES)
return (0);
return (-1);
/* You can not take ownership of a set that has no drives */
else
return (-1);
return (0);
}
/* END CHECK CODE */
/* Lock the set on our side */
rval = -1;
goto out;
}
/*
* Get the current side number - do not use getmyside()
* as this code is essentially getnodeside() and this saves
* some instructions.
*/
for (i = 0; i < MD_MAXSIDES; i++) {
continue;
/*
* SKEW is required for the local set
* as side 0 in this set is the node
* associated with it (this node).
*/
break;
}
}
if (side == MD_KEYWILD)
/*
* Check the local devid namespace to see if the disks
* have been moved. Use the local set first of all as this contains
* entries for the disks in the set.
*
* This is being done before the tk_own_bydd because the disks
* in the dd list could be wrong! But it should be done with the lock
* held for the set.
*/
/*
* Actually do the check of the disks.
*/
ep);
if ((ret == METADEVADM_ERR) ||
(ret == METADEVADM_DSKNAME_ERR)) {
/* check failed in some unknown manner */
rval = -1;
goto out;
} else if (ret == METADEVADM_DISKMOVE) {
/*
* Update the dd namelist so that the rpc.metamhd
* gets the correct disks to reserve - it is the rname
* we are interested in.
*/
/*
* Need to save the side names key as this
* points to the namespace entry that will
* need to be updated. In addition the call
* to meta_make_sidenmlist does not actually
* set the namespace key.
*/
metafreedrivename(d->dd_dnp);
/*
* null newname so we are reset for next time
* through
*/
if (ret == -1) {
rval = -1;
goto out;
}
}
}
}
if (!MD_ATSET_DESC(sd)) {
goto rollback;
}
goto rollback;
goto rollback;
mdclrerror(ep);
}
/* Snarf set of traditional diskset doesn't use stale information */
rval = -1;
goto out;
}
goto rollback;
/*
* Look at the set on all other hosts, if every other host
* has the same set with a larger genid, then we destroy this
* copy.
*/
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* Skip this node */
continue;
numsides++;
NHS_NST_EQ_G_GT, &xep);
if (has_set < 0) {
goto rollback;
matches++;
mdclrerror(&xep);
continue;
}
if (has_set)
matches++;
}
/* Destroy the set */
mdclrerror(&xep);
rval = -1;
goto out;
}
goto rollback;
}
goto rollback;
}
goto rollback;
goto rollback;
}
/*
* If the set doesn't have the MD_SR_MB_DEVID bit set, i.e
* the drives in the set don't have the device id information,
* then stick it in if possible.
*
* If updating the master block fails for whatever reason, it's
* okay. It just means the disk(s) in the diskset won't be self
* identifying.
*/
/* Lock the set on current set members */
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* We already locked this side */
continue;
rval = -1;
goto out;
}
}
/* update the sr_flags on all hosts */
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
goto rollback;
}
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* Unlocked of this side is done later */
continue;
if (rval == 0)
rval = -1;
}
}
}
/*
* If we get here, we need to unlock the set before the resync
* gets called, otherwise the "daemon" will hold the set lock
* until the resync is done!
*/
if (rval == 0)
rval = -1;
}
/* We try to get things resync'ed, but this can fail */
mdclrerror(&xep);
if (rval == 0)
rval = -1;
}
return (rval);
out:
if (rval == 0)
rval = -1;
}
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* We already unlocked this side */
continue;
if (rval == 0)
rval = -1;
}
}
}
return (rval);
/* Make sure we are blocking all signals */
mdclrerror(&xep);
rval = -1;
/* level 4 */
if (rb_level > 3) {
/* update the sr_flags on all hosts */
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
mdclrerror(&xep);
}
}
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* We will unlocked this side below */
continue;
mdclrerror(&xep);
}
}
/* level 3 */
if (rb_level > 2) {
mdclrerror(&xep);
}
/* level 2 */
if (rb_level > 1) {
mdclrerror(&xep);
}
/* level 1 */
if (rb_level > 0) {
if (!MD_ATSET_DESC(sd)) {
mdclrerror(&xep);
}
}
/* level 0 */
mdclrerror(&xep);
/* release signals back to what they were on entry */
mdclrerror(&xep);
return (rval);
}
int
)
{
int rval = 0;
int rb_level = 0;
/* Make sure we own the set */
return (-1);
/* Get the drive descriptors */
return (-1);
/* Get timeout values in case we need to roll back this release */
return (-1);
/* END CHECK CODE */
/* Lock the set on our side */
rval = -1;
goto out;
}
goto rollback;
goto rollback;
goto rollback;
out:
if (rval == 0)
rval = -1;
}
return (rval);
/* Make sure we are blocking all signals */
mdclrerror(&xep);
rval = -1;
/* level 3 */
if (rb_level > 2) {
mdclrerror(&xep);
}
/* level 2 */
if (rb_level > 1) {
mdclrerror(&xep);
}
/* level 1 */
if (rb_level > 0) {
mdclrerror(&xep);
/* Snarf set of trad diskset doesn't use stale information */
mdclrerror(&xep);
}
/* level 0 */
mdclrerror(&xep);
/* release signals back to what they were on entry */
mdclrerror(&xep);
return (rval);
}