meta_set_tkr.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Metadevice diskset interfaces
*/
#include "meta_set_prv.h"
#include <sys/lvm/md_crc.h>
static int
upd_dr_dbinfo(
mdsetname_t *sp,
md_set_desc *sd,
md_drive_desc *dd,
md_replicalist_t *rlp,
int forceflg,
md_error_t *ep
)
{
md_drive_desc *p;
md_replica_t *r;
md_replicalist_t *rl;
int i;
int dbcnt;
int rval = 0;
daddr_t nblks = 0;
md_setkey_t *cl_sk;
md_error_t xep = mdnullerror;
md_mnnode_desc *nd;
ddi_devid_t devid;
/* find the smallest existing replica */
for (rl = rlp; rl != NULL; rl = rl->rl_next) {
r = rl->rl_repp;
nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
}
if (nblks <= 0)
nblks = (MD_MNSET_DESC(sd)) ? MD_MN_DBSIZE : MD_DBSIZE;
for (p = dd; p != NULL; p = p->dd_next) {
dbcnt = 0;
for (rl = rlp; rl != NULL; rl = rl->rl_next) {
r = rl->rl_repp;
/*
* Before we bump up the dbcnt, if we're
* running with device ids in disksets, let's
* compare the device ids otherwise we compare
* the ctd names.
*
* There is a possibility the device ids might
* have changed. To account for that case, we
* fallback to comparing the ctd names if the
* device id comparison fails. If we aren't running
* in device id mode and a disk has moved, the ctd's
* won't match.
*/
if ((p->dd_dnp->devid != NULL) &&
(r->r_devid != NULL) && (!MD_MNSET_DESC(sd))) {
(void) devid_str_decode(p->dd_dnp->devid,
&devid, NULL);
if ((devid_compare(devid, r->r_devid) == 0) ||
(strcmp(r->r_namep->drivenamep->cname,
p->dd_dnp->cname) == 0))
dbcnt++;
devid_free(devid);
} else {
if (strcmp(r->r_namep->drivenamep->cname,
p->dd_dnp->cname) == 0)
dbcnt++;
}
}
p->dd_dbcnt = dbcnt;
p->dd_dbsize = dbcnt > 0 ? nblks : 0;
}
/* Lock the set on current set members */
if (MD_MNSET_DESC(sd)) {
nd = sd->sd_nodelist;
while (nd) {
/* If this is forced, don't lock other sides */
if (forceflg && strcmp(mynode(), nd->nd_nodename)
!= 0) {
nd = nd->nd_next;
continue;
}
/* We already locked this side in the caller */
if (strcmp(mynode(), nd->nd_nodename) == 0) {
nd = nd->nd_next;
continue;
}
if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
nd = nd->nd_next;
continue;
}
if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
rval = -1;
goto out;
}
nd = nd->nd_next;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
/* If this is forced, don't lock other sides */
if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
continue;
/* We already locked this side in the caller */
if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
continue;
if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
rval = -1;
goto out;
}
}
}
if (MD_MNSET_DESC(sd)) {
nd = sd->sd_nodelist;
while (nd) {
/* If this is forced, then only care about this node */
if (forceflg && strcmp(mynode(), nd->nd_nodename)
!= 0) {
nd = nd->nd_next;
continue;
}
if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
nd = nd->nd_next;
continue;
}
if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd,
ep) == -1) {
if (! mdiserror(ep, MDE_NO_SET) &&
! mdismddberror(ep, MDE_DB_NODB)) {
rval = -1;
break;
}
mdclrerror(ep);
}
nd = nd->nd_next;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
/* If this is forced, then only care about this node */
if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
continue;
if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd,
ep) == -1) {
if (! mdiserror(ep, MDE_NO_SET) &&
! mdismddberror(ep, MDE_DB_NODB)) {
rval = -1;
break;
}
mdclrerror(ep);
}
}
}
out:
cl_sk = cl_get_setkey(sp->setno, sp->setname);
if (MD_MNSET_DESC(sd)) {
nd = sd->sd_nodelist;
while (nd) {
/* If this is forced, don't unlock other sides */
if (forceflg && strcmp(mynode(), nd->nd_nodename)
!= 0) {
nd = nd->nd_next;
continue;
}
/* We will unlocked this side in the caller */
if (strcmp(mynode(), nd->nd_nodename) == 0) {
nd = nd->nd_next;
continue;
}
if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
nd = nd->nd_next;
continue;
}
if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
if (rval == 0)
(void) mdstealerror(ep, &xep);
rval = -1;
}
nd = nd->nd_next;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
/* If this is forced, don't unlock other sides */
if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
continue;
/* We will unlocked this side in the caller */
if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
continue;
if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
if (rval == 0)
(void) mdstealerror(ep, &xep);
rval = -1;
}
}
}
/* Do not clear the key, via cl_set_setkey(NULL) this is nested */
return (rval);
}
static int
usetag_take(set_t setno, int usetag, md_error_t *ep)
{
mddb_dtag_use_parm_t dtup;
(void) memset(&dtup, '\0', sizeof (mddb_dtag_use_parm_t));
dtup.dtup_id = usetag;
dtup.dtup_setno = setno;
if (metaioctl(MD_MED_USE_TAG, &dtup, &dtup.dtup_mde, NULL) != 0)
return (mdstealerror(ep, &dtup.dtup_mde));
return (0);
}
static int
useit_take(set_t setno, md_error_t *ep)
{
mddb_accept_parm_t accp;
(void) memset(&accp, '\0', sizeof (mddb_accept_parm_t));
accp.accp_setno = setno;
if (metaioctl(MD_MED_ACCEPT, &accp, &accp.accp_mde, NULL) != 0)
return (mdstealerror(ep, &accp.accp_mde));
return (0);
}
/*
* Update the master block with the device id information for the disks
* in the diskset. The device id information will be consumed by the
* diskset import code in case of remotely replicated disksets.
*
* For the drives that have a valid diskset mddb on them, we add the
* device id for the drive to the unused portion of the mddb.
*
* For the drives that don't have a diskset mddb on them, we add a dummy
* master block that contains the device id for the drive. A dummy master
* block is signified by changing the master block magic number, mb_magic,
* to MDDB_MAGIC_DU.
*
* This code is responsible primarily for adding the appropriate device id
* information to diskset disks that didn't have the information. This would
* typically occur when the OS has been upgraded from an OS release prior to
* Solaris 10
*
* The error path in this routine is defined as - if an error occurs while
* updating the mddb for one disk in the diskset, don't bother updating *any*
* of the mddbs because it's game over anyways as far as disaster recovery for
* that diskset is concerned.
*
* This code will need to be revisited if and when support for importing
* partial disksets is added.
*
* NOTE: This code relies heavily on the meta_repartition() working correctly
* and reformatting a drive, so that there's enough room for a dummy master
* block, every time a drive is added to a diskset. Should
* the meta_repartition() code change in future, this code will have to be
* revisited.
*
* Returns 0 on success and -1 on failure
*/
int
meta_update_mb(mdsetname_t *sp, md_drive_desc *drivedesc, md_error_t *ep)
{
uint_t sliceno, offset;
void *mb;
mddb_mb_t *mbp;
int fd = -1;
ddi_devid_t devid = NULL;
md_drive_desc *dd;
mddrivename_t *dnp;
mdname_t *rsp;
int dbcnt;
int dbsize;
size_t len;
md_set_desc *sd;
/*
* Don't do anything for MN diskset for now.
*/
if (! metaislocalset(sp)) {
if ((sd = metaget_setdesc(sp, ep)) == NULL)
return (-1);
if (MD_MNSET_DESC(sd))
return (0);
}
mb = Malloc(DEV_BSIZE);
mbp = (mddb_mb_t *)mb;
/*
* For every drive in the drive descriptor, iterate through all
* the mddbs present on it and check to see if mb_devid_magic is
* set. If it isn't, then update the master block with the correct
* device id information
*/
for (dd = drivedesc; dd != NULL; dd = dd->dd_next) {
int i = 0;
dnp = dd->dd_dnp;
dbcnt = dd->dd_dbcnt;
dbsize = dd->dd_dbsize;
/*
* When the import support for remotely replicated
* disksets gets implemented, we probably want to
* inform the user that the disks won't be self
* identifying if any of these calls fails
*/
if (meta_replicaslice(dnp, &sliceno, ep) != 0)
return (-1);
if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL)
return (-1);
if ((fd = open(rsp->rname, O_RDWR)) < 0)
goto cleanup;
/* if devid_str_decode fails, make sure devid is null */
if (devid_str_decode(dnp->devid, &devid, NULL) != 0) {
devid = NULL;
}
do {
int push = 0;
offset = (i * dbsize + 16);
++i;
if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0)
goto cleanup;
if (read(fd, mbp, DEV_BSIZE) != DEV_BSIZE)
goto cleanup;
if (crcchk((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum,
(uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
goto cleanup;
/*
* If the disk is one of the ones that doesn't
* have a shared mddb on it, we put a dummy
* master block on it.
*/
if (mbp->mb_devid_magic != MDDB_MAGIC_DE) {
if (dbcnt == 0) {
meta_mkdummymaster(sp, fd, 16);
break;
}
}
/*
* if mb_setcreatetime is 0, this field was never
* filled in so do it now.
*/
if ((mbp->mb_setcreatetime.tv_sec == 0) &&
(mbp->mb_setcreatetime.tv_usec == 0)) {
mbp->mb_setcreatetime =
meta_get_lb_inittime(sp, ep);
push = 1;
}
/*
* If MDDB_MAGIC_DE is set in the
* mb_devid_magic field then we know we
* have a valid device id and we don't
* need to add it to the master block.
*
* This would have to be revisited if device
* ids change as a result of device id
* algorithms changing or somesuch.
*/
if (mbp->mb_devid_magic != MDDB_MAGIC_DE) {
if (devid != NULL) {
len = devid_sizeof(devid);
if (len <= (DEV_BSIZE -
sizeof (mddb_mb_t))) {
/*
* there's enough space to
* store the devid
*/
mbp->mb_devid_magic =
MDDB_MAGIC_DE;
mbp->mb_devid_len = len;
(void) memcpy(mbp->mb_devid,
(char *)devid, len);
push = 1;
}
}
}
/*
* write out (push) any changes we have to the mb
*/
if (push) {
crcgen((uchar_t *)mbp,
(uint_t *)&mbp->mb_checksum,
(uint_t)DEV_BSIZE, (crc_skip_t *)NULL);
if (lseek(fd, (off_t)dbtob(offset), SEEK_SET)
< 0)
goto cleanup;
if (write(fd, mbp, DEV_BSIZE) != DEV_BSIZE)
goto cleanup;
}
if (devid)
devid_free(devid);
} while (i < dbcnt);
(void) close(fd);
}
/* success */
return (0);
cleanup:
if (fd != -1)
(void) close(fd);
if (devid)
devid_free(devid);
return (-1);
}
/*
* Exported Entry Points
*/
int
meta_set_take(
mdsetname_t *sp,
mhd_mhiargs_t *mhiargsp,
int flags,
int usetag,
md_error_t *ep
)
{
md_set_desc *sd;
md_drive_desc *dd;
md_drive_desc *d = NULL;
char *owner = NULL;
int rval = 0;
int i;
int has_set;
int matches = 0;
int numsides = 0;
md_replicalist_t *rlp = NULL;
sigset_t oldsigs;
md_setkey_t *cl_sk;
int rb_level = 0;
md_error_t xep = mdnullerror;
mdsetname_t *local_sp = NULL;
side_t side = MD_KEYWILD;
int ret = 0;
char *newname = NULL;
mdkey_t side_names_key;
if ((flags & TAKE_USETAG) || (flags & TAKE_USEIT)) {
if (flags & TAKE_USETAG) {
if (usetag_take(sp->setno, usetag, ep))
return (-1);
} else {
if (useit_take(sp->setno, ep))
return (-1);
}
if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, ep) != 0)
mdclrerror(ep);
}
/* Do we own the set? */
i = own_set(sp, &owner, (flags & TAKE_FORCE), ep);
if (! mdisok(ep)) {
if (owner != NULL)
Free(owner);
return (-1);
}
if (i == MD_SETOWNER_NO) {
(void) mddserror(ep, MDE_DS_NOTOWNER, sp->setno, owner, NULL,
sp->setname);
if (owner != NULL)
Free(owner);
return (-1);
}
if (owner != NULL) {
Free(owner);
owner = NULL;
}
/* We already own it, we are done. */
if (i == MD_SETOWNER_YES)
return (0);
if ((sd = metaget_setdesc(sp, &xep)) == NULL)
return (-1);
/* You can not take ownership of a set that has no drives */
if (sd->sd_flags & MD_SR_MB_DEVID)
dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, ep);
else
dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep);
if (dd == NULL) {
if (! mdisok(ep))
return (-1);
return (0);
}
/* END CHECK CODE */
md_rb_sig_handling_on();
/* Lock the set on our side */
if (clnt_lock_set(mynode(), sp, ep)) {
rval = -1;
goto out;
}
/*
* Get the current side number - do not use getmyside()
* as this code is essentially getnodeside() and this saves
* some instructions.
*/
for (i = 0; i < MD_MAXSIDES; i++) {
if (sd->sd_nodes[i][0] == '\0')
continue;
if (strcmp(sd->sd_nodes[i], mynode()) == 0) {
/*
* SKEW is required for the local set
* as side 0 in this set is the node
* associated with it (this node).
*/
side = i + SKEW;
break;
}
}
if (side == MD_KEYWILD)
return (mddserror(ep, MDE_DS_HOSTNOSIDE, sp->setno, mynode(),
NULL, mynode()));
/*
* Check the local devid namespace to see if the disks
* have been moved. Use the local set first of all as this contains
* entries for the disks in the set.
*
* This is being done before the tk_own_bydd because the disks
* in the dd list could be wrong! But it should be done with the lock
* held for the set.
*/
local_sp = metasetname(MD_LOCAL_NAME, ep);
for (d = dd; d != NULL; d = d->dd_next) {
/*
* Actually do the check of the disks.
*/
ret = meta_upd_ctdnames(&local_sp, 0, side, d->dd_dnp, &newname,
ep);
if ((ret == METADEVADM_ERR) ||
(ret == METADEVADM_DSKNAME_ERR)) {
/* check failed in some unknown manner */
rval = -1;
goto out;
} else if (ret == METADEVADM_DISKMOVE) {
/*
* Update the dd namelist so that the rpc.metamhd
* gets the correct disks to reserve - it is the rname
* we are interested in.
*/
if (newname != NULL) {
/*
* Need to save the side names key as this
* points to the namespace entry that will
* need to be updated. In addition the call
* to meta_make_sidenmlist does not actually
* set the namespace key.
*/
side_names_key = d->dd_dnp->side_names_key;
metafreedrivename(d->dd_dnp);
d->dd_dnp = metadrivename(&sp,
metadiskname(newname), ep);
Free(newname);
/*
* null newname so we are reset for next time
* through
*/
newname = NULL;
ret = meta_make_sidenmlist(sp, d->dd_dnp, ep);
d->dd_dnp->side_names_key = side_names_key;
if (ret == -1) {
rval = -1;
goto out;
}
}
}
}
RB_TEST(1, "take", ep)
RB_PREEMPT;
rb_level = 1; /* level 1 */
RB_TEST(2, "take", ep)
if (!MD_ATSET_DESC(sd)) {
if (tk_own_bydd(sp, dd, mhiargsp, FALSE, ep))
goto rollback;
}
RB_TEST(3, "take", ep)
RB_PREEMPT;
rb_level = 2; /* level 2 */
RB_TEST(4, "take", ep)
if (clnt_stimeout(mynode(), sp, mhiargsp, ep) == -1)
goto rollback;
if (setup_db_bydd(sp, dd, (flags & TAKE_FORCE), ep) == -1) {
if (! mdismddberror(ep, MDE_DB_ACCOK) &&
! mdismddberror(ep, MDE_DB_TAGDATA))
goto rollback;
mdclrerror(ep);
}
RB_TEST(5, "take", ep)
RB_PREEMPT;
rb_level = 3; /* level 3 */
RB_TEST(6, "take", ep)
/* Snarf set of traditional diskset doesn't use stale information */
if (snarf_set(sp, FALSE, ep)) {
if (mdismddberror(ep, MDE_DB_STALE) ||
mdismddberror(ep, MDE_DB_ACCOK) ||
mdismddberror(ep, MDE_DB_TAGDATA)) {
rval = -1;
goto out;
}
if (! mdismddberror(ep, MDE_DB_NODB) &&
! mdismddberror(ep, MDE_DB_NOTOWNER))
goto rollback;
/*
* Look at the set on all other hosts, if every other host
* has the same set with a larger genid, then we destroy this
* copy.
*/
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
/* Skip this node */
if (strcmp(sd->sd_nodes[i], mynode()) == 0)
continue;
numsides++;
has_set = nodehasset(sp, sd->sd_nodes[i],
NHS_NST_EQ_G_GT, &xep);
if (has_set < 0) {
if (! mdiserror(&xep, MDE_NO_SET) &&
! mdismddberror(&xep, MDE_DB_NODB))
goto rollback;
matches++;
mdclrerror(&xep);
continue;
}
if (has_set)
matches++;
}
/* Destroy the set */
if (numsides > 0 && (numsides - matches) == 0) {
if (meta_set_destroy(sp, FALSE, &xep))
mdclrerror(&xep);
(void) mddserror(ep, MDE_DS_SETCLEANUP, sp->setno,
sp->setname, NULL, mynode());
rval = -1;
goto out;
}
goto rollback;
}
rval = pathname_reload(&sp, sp->setno, ep);
if ((rval == METADEVADM_ERR) || (rval == METADEVADM_DSKNAME_ERR)) {
goto rollback;
}
if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
goto rollback;
if (upd_dr_dbinfo(sp, sd, dd, rlp, (flags & TAKE_FORCE), ep) < 0) {
metafreereplicalist(rlp);
goto rollback;
}
metafreereplicalist(rlp);
/*
* If the set doesn't have the MD_SR_MB_DEVID bit set, i.e
* the drives in the set don't have the device id information,
* then stick it in if possible.
*
* If updating the master block fails for whatever reason, it's
* okay. It just means the disk(s) in the diskset won't be self
* identifying.
*/
if (!(sd->sd_flags & MD_SR_MB_DEVID)) {
/* Lock the set on current set members */
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
/* We already locked this side */
if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
continue;
if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
rval = -1;
goto out;
}
}
rb_level = 4; /* level 4 */
if (meta_update_mb(sp, dd, ep) == 0)
/* update the sr_flags on all hosts */
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
if (clnt_upd_sr_flags(sd->sd_nodes[i],
sp, (sd->sd_flags | MD_SR_MB_DEVID), ep))
goto rollback;
}
cl_sk = cl_get_setkey(sp->setno, sp->setname);
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
/* Unlocked of this side is done later */
if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
continue;
if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
if (rval == 0)
(void) mdstealerror(ep, &xep);
rval = -1;
}
}
}
/*
* If we get here, we need to unlock the set before the resync
* gets called, otherwise the "daemon" will hold the set lock
* until the resync is done!
*/
cl_sk = cl_get_setkey(sp->setno, sp->setname);
if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
if (rval == 0)
(void) mdstealerror(ep, &xep);
rval = -1;
}
cl_set_setkey(NULL);
md_rb_sig_handling_off(md_got_sig(), md_which_sig());
/* We try to get things resync'ed, but this can fail */
mdclrerror(&xep);
if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, &xep) != 0) {
if (rval == 0)
(void) mdstealerror(ep, &xep);
rval = -1;
}
RB_TEST(7, "take", ep)
return (rval);
out:
cl_sk = cl_get_setkey(sp->setno, sp->setname);
if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
if (rval == 0)
(void) mdstealerror(ep, &xep);
rval = -1;
}
if (!(sd->sd_flags & MD_SR_MB_DEVID) && (rb_level > 2)) {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
/* We already unlocked this side */
if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
continue;
if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
if (rval == 0)
(void) mdstealerror(ep, &xep);
rval = -1;
}
}
}
cl_set_setkey(NULL);
md_rb_sig_handling_off(md_got_sig(), md_which_sig());
return (rval);
rollback:
/* Make sure we are blocking all signals */
if (procsigs(TRUE, &oldsigs, &xep) < 0)
mdclrerror(&xep);
rval = -1;
/* level 4 */
if (rb_level > 3) {
if (sd->sd_flags & MD_SR_MB_DEVID) {
/* update the sr_flags on all hosts */
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
if (clnt_upd_sr_flags(sd->sd_nodes[i], sp,
(sd->sd_flags & ~MD_SR_MB_DEVID), &xep))
mdclrerror(&xep);
}
}
cl_sk = cl_get_setkey(sp->setno, sp->setname);
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
/* We will unlocked this side below */
if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
continue;
if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
mdclrerror(&xep);
}
}
/* level 3 */
if (rb_level > 2) {
if (halt_set(sp, &xep))
mdclrerror(&xep);
}
/* level 2 */
if (rb_level > 1) {
if (clnt_stimeout(mynode(), sp, &defmhiargs, &xep) == -1)
mdclrerror(&xep);
}
/* level 1 */
if (rb_level > 0) {
if (!MD_ATSET_DESC(sd)) {
if (rel_own_bydd(sp, dd, FALSE, &xep))
mdclrerror(&xep);
}
}
/* level 0 */
cl_sk = cl_get_setkey(sp->setno, sp->setname);
if (clnt_unlock_set(mynode(), cl_sk, &xep))
mdclrerror(&xep);
cl_set_setkey(NULL);
/* release signals back to what they were on entry */
if (procsigs(FALSE, &oldsigs, &xep) < 0)
mdclrerror(&xep);
md_rb_sig_handling_off(md_got_sig(), md_which_sig());
return (rval);
}
int
meta_set_release(
mdsetname_t *sp,
md_error_t *ep
)
{
int rval = 0;
md_drive_desc *dd;
mhd_mhiargs_t mhiargs;
sigset_t oldsigs;
md_setkey_t *cl_sk;
int rb_level = 0;
md_error_t xep = mdnullerror;
/* Make sure we own the set */
if (meta_check_ownership(sp, ep) != 0)
return (-1);
/* Get the drive descriptors */
if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
ep)) == NULL)
if (! mdisok(ep))
return (-1);
/* Get timeout values in case we need to roll back this release */
(void) memset(&mhiargs, '\0', sizeof (mhiargs));
if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0)
return (-1);
/* END CHECK CODE */
md_rb_sig_handling_on();
/* Lock the set on our side */
if (clnt_lock_set(mynode(), sp, ep)) {
rval = -1;
goto out;
}
RB_TEST(1, "release", ep)
RB_PREEMPT;
rb_level = 1; /* level 1 */
RB_TEST(2, "release", ep)
if (halt_set(sp, ep))
goto rollback;
RB_TEST(3, "release", ep)
RB_PREEMPT;
rb_level = 2; /* level 2 */
RB_TEST(4, "release", ep)
if (rel_own_bydd(sp, dd, FALSE, ep))
goto rollback;
RB_TEST(5, "release", ep)
RB_PREEMPT;
rb_level = 3; /* level 3 */
RB_TEST(6, "release", ep)
if (clnt_stimeout(mynode(), sp, &defmhiargs, ep) == -1)
goto rollback;
RB_TEST(7, "release", ep)
out:
cl_sk = cl_get_setkey(sp->setno, sp->setname);
if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
if (rval == 0)
(void) mdstealerror(ep, &xep);
rval = -1;
}
cl_set_setkey(NULL);
md_rb_sig_handling_off(md_got_sig(), md_which_sig());
return (rval);
rollback:
/* Make sure we are blocking all signals */
if (procsigs(TRUE, &oldsigs, &xep) < 0)
mdclrerror(&xep);
rval = -1;
/* level 3 */
if (rb_level > 2) {
if (clnt_stimeout(mynode(), sp, &mhiargs, &xep) == -1)
mdclrerror(&xep);
}
/* level 2 */
if (rb_level > 1) {
if (tk_own_bydd(sp, dd, &mhiargs, FALSE, &xep))
mdclrerror(&xep);
}
/* level 1 */
if (rb_level > 0) {
if (setup_db_bydd(sp, dd, TRUE, &xep) == -1)
mdclrerror(&xep);
/* Snarf set of trad diskset doesn't use stale information */
if (snarf_set(sp, FALSE, &xep))
mdclrerror(&xep);
}
/* level 0 */
cl_sk = cl_get_setkey(sp->setno, sp->setname);
if (clnt_unlock_set(mynode(), cl_sk, &xep))
mdclrerror(&xep);
cl_set_setkey(NULL);
/* release signals back to what they were on entry */
if (procsigs(FALSE, &oldsigs, &xep) < 0)
mdclrerror(&xep);
md_rb_sig_handling_off(md_got_sig(), md_which_sig());
return (rval);
}