/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Metadevice diskset interfaces
*/
#include <meta.h>
#include <mdmn_changelog.h>
#include "meta_set_prv.h"
#include "meta_repartition.h"
static int
)
{
int i;
return (-1);
if (MD_MNSET_DESC(sd)) {
while (nd) {
continue;
}
return (-1);
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
return (-1);
}
}
return (0);
}
static int
{
continue;
}
}
return (0);
}
static md_drive_desc *
)
{
return (NULL);
}
return (dd);
}
/*
* Exported Entry Points
*/
int
int import_flag, /* flags partial import */
)
{
int done;
char *bname;
if (!import_flag) {
/*
* Normal (aka NOT partial import) code path.
*/
return (-1);
}
return (-1);
} else {
/*
* When doing a partial import, we'll get the needed
* information from somewhere other than the system.
*/
}
/*CONSTCOND*/
while (1) {
if (import_flag) {
mdclrerror(ep);
} else {
return (-1);
}
}
if (done == 0) {
return (0);
}
/* Add to the end of the linked list */
}
/*NOTREACHED*/
}
int
int force_label,
)
{
int i;
int rval = 0;
int rb_level = 0;
int suspendall_flag = 0;
int suspend1_flag = 0;
int lock_flag = 0;
int flush_set_onerr = 0;
return (-1);
/* Make sure we own the set */
return (-1);
/*
* The drive and node records are stored in the local mddbs of each
* node in the diskset. Each node's rpc.metad daemon reads in the set,
* drive and node records from that node's local mddb and caches them
* internally. Any process needing diskset information contacts its
* local rpc.metad to get this information. Since each node in the
* diskset is independently reading the set information from its local
* mddb, the set, drive and node records in the local mddbs must stay
* in-sync, so that all nodes have a consistent view of the diskset.
*
* For a multinode diskset, explicitly verify that all nodes in the
* diskset are ALIVE (i.e. are in the API membership list). Otherwise,
* fail this operation since all nodes must be ALIVE in order to add
* the new drive record to their local mddb. If a panic of this node
* leaves the local mddbs set, node and drive records out-of-sync, the
* reconfig cycle will fix the local mddbs and force them back into
* synchronization.
*/
if (MD_MNSET_DESC(sd)) {
while (nd) {
return (-1);
}
}
}
return (-1);
/*
* Lock the set on current set members.
* Set locking done much earlier for MN diskset than for traditional
* diskset since lock_set and SUSPEND are used to protect against
* other meta* commands running on the other nodes.
*/
if (MD_MNSET_DESC(sd)) {
/* Make sure we are blocking all signals */
mdclrerror(&xep);
/* All nodes are guaranteed to be ALIVE */
while (nd) {
rval = -1;
goto out;
}
lock_flag = 1;
}
/*
* Lock out other meta* commands by suspending
* class 1 messages across the diskset.
*/
/* All nodes are guaranteed to be ALIVE */
while (nd) {
MD_MSCF_NO_FLAGS, ep)) {
rval = -1;
goto out;
}
suspend1_flag = 1;
}
}
rval = -1;
goto out;
}
ep) == -1) {
rval = -1;
goto out;
}
rval = -1;
goto out;
}
}
/* END CHECK CODE */
/*
* This is a separate loop (from above) so that we validate all the
* drives handed to us before we repartition any one drive.
*/
if (meta_repartition_drive(sp,
NULL, /* Don't return the VTOC. */
ep) != 0) {
rval = -1;
goto out;
}
/*
* Create the names for the drives we are adding per side.
*/
ep) == -1) {
rval = -1;
goto out;
}
}
/*
* Get the list of drives descriptors that we are adding.
*/
rval = -1;
goto out;
}
/*
* Get the set timeout information.
*/
rval = -1;
goto out;
}
/*
* Get timestamp and generation id for new records
*/
/* At this point, in case of error, set should be flushed. */
flush_set_onerr = 1;
/* Lock the set on current set members */
if (!(MD_MNSET_DESC(sd))) {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
rval = -1;
goto out;
}
lock_flag = 1;
}
}
/*
* Get drive descriptors for the drives that are currently in the set.
*/
goto rollback;
/*
* If first drive being added to set, set the mastership
* of the multinode diskset to be this node.
* Only set it on this node. If all goes well
* and there are no errors, the mastership of this node will be set
* on all nodes in user space and in the kernel.
*/
goto rollback;
}
/*
* Set this up in my local cache of the set desc so that
* the set descriptor won't have to be gotten again from
* rpc.metad. If it is flushed and gotten again, these
* values will be set in sr2setdesc.
*/
}
/*
* Add the drive records for the drives that we are adding to
* each host in the set. Marks the drive as MD_DR_ADD.
*/
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
ep) == -1)
goto rollback;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
ep) == -1)
goto rollback;
}
}
/*
* Take ownership of the added drives.
*/
goto rollback;
}
/*
* If this is not a MN set and the state flags do not indicate the
* presence of devids, update the set records on all nodes.
*/
mdclrerror(ep);
/* update the sr_flags on all hosts */
for (i = 0; i < MD_MAXSIDES; i++) {
continue;
goto rollback;
}
}
}
/*
* Balance the DB's according to the list of existing drives and the
* list of added drives.
*/
goto rollback;
/*
* Slam a dummy master block on all the disks that we are adding
* that don't have replicas on them.
* Used by diskset import if the disksets are remotely replicated
*/
char *drive_name;
char *rep_name;
rep_name =
/*
* Disk has a replica on it so don't
* add dummy master block.
*/
break;
}
}
/*
* Drive doesn't have a replica on it so
* we need a dummy master block. Add it.
*/
&xep) != 0) {
mdclrerror(&xep);
continue;
}
mdclrerror(&xep);
continue;
}
}
}
}
}
/*
* Notify rpc.mdcommd on all nodes of a nodelist change.
* Start by suspending rpc.mdcommd (which drains it of all
* messages), then change the nodelist followed by a reinit
* and resume.
*/
/* All nodes are guaranteed to be ALIVE */
while (nd) {
rval = -1;
goto out;
}
suspendall_flag = 1;
}
}
/*
* If a MN diskset and this is the first disk(s) being added
* to set, then pre-allocate change log records here.
* When the other nodes are joined into the MN diskset, the
* USER records will just be snarfed in.
*/
goto rollback;
}
/*
* Mark the drives MD_DR_OK.
* If first drive being added to MN diskset, then set
* master on all nodes to be this node and then join
* all alive nodes (nodes in membership list) to set.
*/
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/* don't set master on this node - done earlier */
/*
* Set master on all alive nodes since
* all alive nodes will become joined nodes.
*/
goto rollback;
}
}
/*
* No special flags for join set. Since
* all nodes are joining if 1st drive is being
* added to set then all nodes will be either
* STALE or non-STALE and each node can
* determine this on its own.
*/
goto rollback;
}
/* Sets join node flag on all nodes in list */
goto rollback;
}
}
/*
* Set MD_DR_OK as last thing before unlock.
* In case of panic on this node, recovery
* code can check for MD_DR_OK to determine
* status of diskset.
*/
goto rollback;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
ep) == -1)
goto rollback;
}
}
out:
/*
* Notify rpc.mdcommd on all nodes of a nodelist change.
* Send reinit command to mdcommd which forces it to get
* fresh set description.
*/
if (suspendall_flag) {
/* Send reinit */
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/* Class is ignored for REINIT */
if (rval == 0)
rval = -1;
"Unable to reinit rpc.mdcommd.\n"));
}
}
}
/*
* Unlock diskset by resuming messages across the diskset.
* Just resume all classes so that resume is the same whether
* just one class was locked or all classes were locked.
*/
if ((suspend1_flag) || (suspendall_flag)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
if (rval == 0)
rval = -1;
"Unable to resume rpc.mdcommd.\n"));
}
}
}
if (lock_flag) {
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
if (rval == 0)
rval = -1;
}
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
if (rval == 0)
rval = -1;
}
}
}
}
if (flush_set_onerr) {
if (!(MD_MNSET_DESC(sd))) {
}
}
if (MD_MNSET_DESC(sd)) {
/* release signals back to what they were on entry */
mdclrerror(&xep);
}
return (rval);
/* all signals already blocked for MN disket */
if (!(MD_MNSET_DESC(sd))) {
/* Make sure we are blocking all signals */
mdclrerror(&xep);
}
rval = -1;
/* level 3 */
if (rb_level > 2) {
/*
* Since the add drive operation is failing, need
* to reset config back to the way it was
* before the add drive opration.
* If a MN diskset and this is the first drive being added,
* then reset master on all ALIVE nodes (which is all nodes)
* since the master would have not been set previously.
* Don't reset master on this node, since this
* is done later.
* This is ok to fail since next node to add first
* disk to diskset will also set the master on all nodes.
*
* Also, if this is the first drive being added,
* need to have each node withdraw itself from the set.
*/
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/*
* Be careful with ordering in case of
* panic between the steps and the
* effect on recovery during reconfig.
*/
mdclrerror(&xep);
/* Sets withdraw flag on all nodes in list */
mdclrerror(&xep);
}
/* Skip this node */
continue;
}
/* Reset master on all of the other nodes. */
mdclrerror(&xep);
}
}
}
/*
* Send resume command to mdcommd. Don't send reinit command
* since nodelist should not have changed.
* If suspendall_flag is set, then user would have been adding
* first drives to set. Since this failed, there is certainly
* no reinit message to send to rpc.commd since no nodes will
* be joined to set at the end of this metaset command.
*/
if (suspendall_flag) {
/* Send resume */
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/*
* Resume all classes but class 1 so that lock is held
* against meta* commands.
* To later resume class1, must issue a class0 resume.
*/
"Unable to resume rpc.mdcommd.\n"));
mdclrerror(&xep);
}
}
}
/* level 3 */
if (rb_level > 2) {
mdclrerror(&xep);
continue;
}
mdclrerror(&xep);
}
/* Re-balance */
mdclrerror(&xep);
/* Only if we are adding the first drive */
/* Handled MN diskset above. */
&xep) == -1)
mdclrerror(&xep);
/* This is needed because of a corner case */
mdclrerror(&xep);
}
max_genid++;
}
/* level 2 */
if (rb_level > 1) {
mdclrerror(&xep);
}
}
/* level 1 */
if (rb_level > 0) {
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
&xep) == -1)
mdclrerror(&xep);
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
&xep) == -1)
mdclrerror(&xep);
}
}
max_genid += 2;
}
if ((suspend1_flag) || (suspendall_flag)) {
/* Send resume */
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/*
* Just resume all classes so that resume is the
* same whether just one class was locked or all
* classes were locked.
*/
mdclrerror(&xep);
}
}
}
/* level 0 */
/* Don't test lock flag since guaranteed to be set if in rollback */
if (MD_MNSET_DESC(sd)) {
/*
* Since the add drive operation is failing, need
* to reset config back to the way it was
* before the add drive opration.
* If a MN diskset and this is the first drive being
* added, then reset master on this node since
* the master would have not been set previously.
* This is ok to fail since next node to add first
* disk to diskset will also set the master on all nodes.
*/
/* Reset master on mynode */
MD_MN_INVALID_NID, &xep))
mdclrerror(&xep);
}
/* All nodes are guaranteed to be ALIVE */
while (nd) {
mdclrerror(&xep);
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
mdclrerror(&xep);
}
}
/* release signals back to what they were on entry */
mdclrerror(&xep);
if (flush_set_onerr) {
if (!(MD_MNSET_DESC(sd))) {
}
}
return (rval);
}
/*
* Add drives routine used during import of a diskset.
*/
int
)
{
int flush_set_onerr = 0;
int rval = 0;
int rb_level = 0;
return (-1);
int imp_flag = 0;
/*
* If we have a partial diskset, meta_make_sidenmlist will
* need information from midp to complete making the
* side name structure.
*/
if (misp->mis_partial) {
break;
}
rval = -1;
goto out;
}
}
/*
* Create the names for the drives we are adding per side.
*/
rval = -1;
goto out;
}
}
/*
* Get the list of drives descriptors that we are adding.
*/
rval = -1;
goto out;
}
/*
* Get the set timeout information.
*/
rval = -1;
goto out;
}
/*
* Get timestamp and generation id for new records
*/
/* At this point, in case of error, set should be flushed. */
flush_set_onerr = 1;
/* same disk */
if (mirp) {
}
}
if ((midp->mid_available &
}
}
}
}
/*
* Add the drive records for the drives that we are adding to
* each host in the set. Marks the drive records as MD_DR_ADD.
* May also mark a drive record as MD_DR_UNRSLV_REPLICATED if
* this flag was set in the dd_flags for that drive.
*/
goto rollback;
/*
* Take ownership of the added drives.
*/
goto rollback;
out:
if (flush_set_onerr) {
}
return (rval);
/* Make sure we are blocking all signals */
mdclrerror(&xep);
rval = -1;
/* level 2 */
if (rb_level > 1) {
if (!MD_ATSET_DESC(sd)) {
mdclrerror(&xep);
}
}
}
/* level 1 */
if (rb_level > 0) {
mdclrerror(&xep);
}
max_genid += 2;
}
/* level 0 */
/* release signals back to what they were on entry */
mdclrerror(&xep);
if (flush_set_onerr) {
}
return (rval);
}
int
int forceflg,
)
{
int deldrvcnt = 0;
int rval = 0;
int i;
int rb_level = 0;
int has_set;
int current_drv_cnt = 0;
int suspend1_flag = 0;
int lock_flag = 0;
int flush_set_onerr = 0;
return (-1);
/* Make sure we own the set */
return (-1);
return (-1);
/*
* Check and see if all the nodes have the set.
*
* The drive and node records are stored in the local mddbs of each
* node in the diskset. Each node's rpc.metad daemon reads in the set,
* drive and node records from that node's local mddb and caches them
* internally. Any process needing diskset information contacts its
* local rpc.metad to get this information. Since each node in the
* diskset is independently reading the set information from its local
* mddb, the set, drive and node records in the local mddbs must stay
* in-sync, so that all nodes have a consistent view of the diskset.
*
* For a multinode diskset, explicitly verify that all nodes in the
* diskset are ALIVE (i.e. are in the API membership list). Otherwise,
* fail this operation since all nodes must be ALIVE in order to delete
* a drive record from their local mddb. If a panic of this node
* leaves the local mddbs set, node and drive records out-of-sync, the
* reconfig cycle will fix the local mddbs and force them back into
* synchronization.
*/
if (MD_MNSET_DESC(sd)) {
while (nd) {
return (-1);
}
}
/* Make sure we are blocking all signals */
mdclrerror(&xep);
/*
* Lock the set on current set members.
* Set locking done much earlier for MN diskset than for
* traditional diskset since lock_set and SUSPEND are used
* to protect against other meta* commands running on the
* other nodes.
*/
/* All nodes are guaranteed to be ALIVE */
while (nd) {
rval = -1;
goto out;
}
lock_flag = 1;
}
/*
* Lock out other meta* commands by suspending
* class 1 messages across the diskset.
*/
/* All nodes are guaranteed to be ALIVE */
while (nd) {
MD_MSCF_NO_FLAGS, ep)) {
rval = -1;
goto out;
}
suspend1_flag = 1;
}
/* All nodes are guaranteed to be ALIVE */
while (nd) {
continue;
}
NHS_NSTG_EQ, ep);
if (has_set < 0) {
rval = -1;
goto out;
}
if (! has_set) {
rval = -1;
goto out;
}
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
continue;
ep);
if (has_set < 0) {
/*
* Can directly return since !MN diskset;
* nothing to unlock.
*/
return (-1);
}
if (! has_set) {
/*
* Can directly return since !MN diskset;
* nothing to unlock.
*/
}
}
}
int is_it;
dnp = p->drivenamep;
== -1) {
rval = -1;
goto out;
}
if (! is_it) {
rval = -1;
goto out;
}
rval = -1;
goto out;
}
deldrvcnt++;
}
/*
* Get drive descriptors for the drives that are currently in the set.
*/
rval = -1;
goto out;
}
/*
* Decrement the the delete drive count for each drive currently in the
* set.
*/
deldrvcnt--;
/*
* If the count of drives we are deleting is equal to the drives in the
* set, and we haven't specified forceflg, return an error
*/
rval = -1;
goto out;
}
/*
* Get the list of drive descriptors that we are deleting.
*/
rval = -1;
goto out;
}
/*
* Get the set timeout information in case we have to roll back.
*/
rval = -1;
goto out;
}
/* At this point, in case of error, set should be flushed. */
flush_set_onerr = 1;
/* END CHECK CODE */
/* Lock the set on current set members */
if (!(MD_MNSET_DESC(sd))) {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
rval = -1;
goto out;
}
lock_flag = 1;
}
}
/*
* Is current set STALE?
*/
(void) memset(&c, 0, sizeof (c));
c.c_id = 0;
rval = -1;
goto out;
}
if (c.c_flags & MDDB_C_STALE) {
stale_bool = TRUE;
}
}
/*
* Mark the drives MD_DR_DEL
*/
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
goto rollback;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
goto rollback;
}
}
/*
* Balance the DB's according to the list of existing drives and the
* list of deleted drives.
*/
goto rollback;
/*
* If the drive(s) to be deleted cannot be accessed,
* they haven't really been deleted yet. Check and delete now
* if need be.
*/
char *delete_name;
char *cur_name;
cur_name =
/* put it on the delete list */
}
}
}
ep) == -1) {
goto rollback;
}
}
}
/*
* Cannot suspend set until after meta_db_balance since
* meta_db_balance uses META_DB_ATTACH/DETACH messages.
*/
/*
* Notify rpc.mdcommd on all nodes of a nodelist change.
* Start by suspending rpc.mdcommd (which drains it of all
* messages), then change the nodelist followed by a reinit
* and resume.
*/
/* All nodes are guaranteed to be ALIVE */
while (nd) {
rval = -1;
goto out;
}
suspendall_flag = 1;
}
}
/*
* Remove the drive records for the drives that were deleted from
* each host in the set. This removes the record and dr_flags.
*/
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
goto rollback;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
goto rollback;
}
}
goto rollback;
}
/* If we deleted all the drives, then we need to halt the set. */
if (deldrvcnt == 0) {
goto rollback;
/* Halt MN diskset on all nodes by having node withdraw */
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/* Only withdraw nodes that are joined */
continue;
}
/*
* Going to set locally cached node flags to
* rollback join so in case of error, the
* rollback code knows which nodes to re-join.
*/
/*
* Be careful in ordering of following steps
* so that recovery from a panic between
* the steps is viable.
* Only reset master info in rpc.metad -
* don't reset local cached information
* which will be used to set master information
* back in case of failure (rollback).
*/
goto rollback;
/* Sets withdraw flag on all nodes in list */
goto rollback;
}
goto rollback;
}
}
} else {
goto rollback;
}
}
out:
/*
* Notify rpc.mdcommd on all nodes of a nodelist change.
* Send reinit command to mdcommd which forces it to get
* fresh set description.
*/
if (suspendall_flag) {
/* Send reinit */
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/* Class is ignored for REINIT */
if (rval == 0)
rval = -1;
"Unable to reinit rpc.mdcommd.\n"));
}
}
}
/*
* Just resume all classes so that resume is the same whether
* just one class was locked or all classes were locked.
*/
if ((suspend1_flag) || (suspendall_flag)) {
/* Send resume */
/* All nodes are guaranteed to be ALIVE */
while (nd) {
if (rval == 0)
rval = -1;
"Unable to resume rpc.mdcommd.\n"));
}
}
}
if (lock_flag) {
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
if (rval == 0)
rval = -1;
}
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
if (rval == 0)
rval = -1;
}
}
}
}
if (flush_set_onerr) {
if (!(MD_MNSET_DESC(sd))) {
}
}
if (MD_MNSET_DESC(sd)) {
/* release signals back to what they were on entry */
mdclrerror(&xep);
}
return (rval);
/* all signals already blocked for MN disket */
if (!(MD_MNSET_DESC(sd))) {
/* Make sure we are blocking all signals */
mdclrerror(&xep);
}
rval = -1;
/* Set the master on all nodes first thing */
if (rb_level > 5) {
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
continue;
}
/*
* Set master on all re-joining nodes to be
* my cached view of master.
*/
mdclrerror(&xep);
}
}
}
}
/* level 3 */
if (rb_level > 2) {
int sr_drive_cnt;
/*
* See if we have to re-add the drives specified.
*/
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/*
* Must get current set record from each
* node to see what else must be done
* to recover.
* Record should be for a multi-node diskset.
*/
mdclrerror(&xep);
continue;
}
/*
* If all drives are already there, skip
* to next node.
*/
sr_drive_cnt = 0;
while (dr) {
sr_drive_cnt++;
}
if (sr_drive_cnt == current_drv_cnt) {
continue;
}
/* Readd all drives */
mdclrerror(&xep);
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* Record should be for a non-multi-node set */
mdclrerror(&xep);
continue;
}
/*
* Set record structure was allocated from RPC
* routine getset so this structure is only of
* size md_set_record even if the MN flag is
* set. So, clear the flag so that the free
* code doesn't attempt to free a structure
* the size of md_mnset_record.
*/
if (MD_MNSET_REC(sr)) {
continue;
}
/* Drive already added, skip to next node */
continue;
}
mdclrerror(&xep);
}
}
max_genid += 2;
}
/*
* Notify rpc.mdcommd on all nodes of a nodelist change.
* At this point in time, don't know which nodes are joined
* to the set. So, send a reinit command to mdcommd
* which forces it to get fresh set description. Then send resume.
*
* Later, this code will use rpc.mdcommd messages to reattach disks
* and then rpc.mdcommd may be suspended again, rest of the nodes
* joined, rpc.mdcommd reinited and then resumed.
*/
if (suspendall_flag) {
/* Send reinit */
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/* Class is ignored for REINIT */
"Unable to reinit rpc.mdcommd.\n"));
mdclrerror(&xep);
}
}
/* Send resume */
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/*
* Resume all classes but class 1 so that lock is held
* against meta* commands.
* To later resume class1, must issue a class0 resume.
*/
"Unable to resume rpc.mdcommd.\n"));
mdclrerror(&xep);
}
}
}
/* level 2 */
if (rb_level > 1) {
mdclrerror(&xep);
continue;
}
mdclrerror(&xep);
}
/* Re-balance */
mdclrerror(&xep);
}
/* level 4 */
if (rb_level > 3) {
mdclrerror(&xep);
}
}
/* level 5 */
if (rb_level > 4) {
mdclrerror(&xep);
}
/*
* If at least one node needs to be rejoined to MN diskset,
* then suspend commd again.
*/
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
continue;
}
break;
}
if (nd) {
/*
* Found node that will be rejoined so
* notify rpc.mdcommd on all nodes of a nodelist change.
* Start by suspending rpc.mdcommd (which drains it of
* all messages), then change the nodelist followed by
* a reinit and resume.
*/
/* All nodes are guaranteed to be ALIVE */
while (nd) {
MD_MSCF_NO_FLAGS, &xep)) {
mdclrerror(&xep);
}
suspendall_flag_rb = 1;
}
}
}
/* level 6 */
if (rb_level > 5) {
if (MD_MNSET_DESC(sd)) {
int join_flags = 0;
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/* Only rejoin nodes that were joined before */
continue;
}
/*
* Rejoin nodes to same state as before -
* either STALE or non-STALE.
*/
if (stale_bool == TRUE)
join_flags, &xep))
mdclrerror(&xep);
/* Sets OWN flag on all nodes in list */
mdclrerror(&xep);
}
}
} else {
mdclrerror(&xep);
/* No special flag for traditional diskset */
mdclrerror(&xep);
}
}
/* level 1 */
if (rb_level > 0) {
/*
* Mark the drives as OK.
*/
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/*
* Must be last action before unlock.
* In case of panic, recovery code checks
* for MD_DR_OK to know that drive
* and possible master are fully added back.
*/
mdclrerror(&xep);
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
mdclrerror(&xep);
}
}
max_genid += 2;
}
/*
* Notify rpc.mdcommd on all nodes of a nodelist change.
* Send a reinit command to mdcommd which forces it to get
* fresh set description.
*/
if (suspendall_flag_rb) {
/* Send reinit */
/* All nodes are guaranteed to be ALIVE */
while (nd) {
/* Class is ignored for REINIT */
"Unable to reinit rpc.mdcommd.\n"));
mdclrerror(&xep);
}
}
}
/*
* Just resume all classes so that resume is the same whether
* just one class was locked or all classes were locked.
*/
/* Send resume */
/* All nodes are guaranteed to be ALIVE */
while (nd) {
"Unable to resume rpc.mdcommd.\n"));
mdclrerror(&xep);
}
}
}
/* level 0 */
/* Don't test lock flag since guaranteed to be set if in rollback */
if (MD_MNSET_DESC(sd)) {
/* All nodes are guaranteed to be ALIVE */
while (nd) {
mdclrerror(&xep);
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
mdclrerror(&xep);
}
}
/* release signals back to what they were on entry */
mdclrerror(&xep);
if (flush_set_onerr) {
if (!(MD_MNSET_DESC(sd))) {
}
}
return (rval);
}