/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* Metadevice diskset interfaces
*/
#include "meta_set_prv.h"
#include <strings.h>
extern char *blkname(char *);
static int
int forceflg,
)
{
md_drive_desc *p;
md_replica_t *r;
int i;
int dbcnt;
int rval = 0;
/* find the smallest existing replica */
}
if (nblks <= 0)
dbcnt = 0;
/*
* Before we bump up the dbcnt, if we're
* running with device ids in disksets, let's
* compare the device ids otherwise we compare
* the ctd names.
*
* There is a possibility the device ids might
* have changed. To account for that case, we
* fallback to comparing the ctd names if the
* device id comparison fails. If we aren't running
* in device id mode and a disk has moved, the ctd's
* won't match.
*/
dbcnt++;
} else {
dbcnt++;
}
}
}
/* Lock the set on current set members */
if (MD_MNSET_DESC(sd)) {
while (nd) {
/* If this is forced, don't lock other sides */
!= 0) {
continue;
}
/* We already locked this side in the caller */
continue;
}
continue;
}
rval = -1;
goto out;
}
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* If this is forced, don't lock other sides */
continue;
/* We already locked this side in the caller */
continue;
rval = -1;
goto out;
}
}
}
if (MD_MNSET_DESC(sd)) {
while (nd) {
/* If this is forced, then only care about this node */
!= 0) {
continue;
}
continue;
}
ep) == -1) {
rval = -1;
break;
}
mdclrerror(ep);
}
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* If this is forced, then only care about this node */
continue;
ep) == -1) {
rval = -1;
break;
}
mdclrerror(ep);
}
}
}
out:
if (MD_MNSET_DESC(sd)) {
while (nd) {
/* If this is forced, don't unlock other sides */
!= 0) {
continue;
}
/* We will unlocked this side in the caller */
continue;
}
continue;
}
if (rval == 0)
rval = -1;
}
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* If this is forced, don't unlock other sides */
continue;
/* We will unlocked this side in the caller */
continue;
if (rval == 0)
rval = -1;
}
}
}
/* Do not clear the key, via cl_set_setkey(NULL) this is nested */
return (rval);
}
static int
{
return (0);
}
static int
{
return (0);
}
/*
* Update the master block with the device id information for the disks
* in the diskset. The device id information will be consumed by the
* diskset import code in case of remotely replicated disksets.
*
* For the drives that have a valid diskset mddb on them, we add the
* device id for the drive to the unused portion of the mddb.
*
* For the drives that don't have a diskset mddb on them, we add a dummy
* master block that contains the device id for the drive. A dummy master
* block is signified by changing the master block magic number, mb_magic,
* to MDDB_MAGIC_DU.
*
* This code is responsible primarily for adding the appropriate device id
* information to diskset disks that didn't have the information. This would
* typically occur when the OS has been upgraded from an OS release prior to
* Solaris 10
*
* The error path in this routine is defined as - if an error occurs while
* updating the mddb for one disk in the diskset, don't bother updating *any*
* of the mddbs because it's game over anyways as far as disaster recovery for
* that diskset is concerned.
*
* This code will need to be revisited if and when support for importing
* partial disksets is added.
*
* NOTE: This code relies heavily on the meta_repartition() working correctly
* and reformatting a drive, so that there's enough room for a dummy master
* block, every time a drive is added to a diskset. Should
* the meta_repartition() code change in future, this code will have to be
* revisited.
*
* Returns 0 on success and -1 on failure
*/
int
{
void *mb;
int dbcnt;
int dbsize;
/*
* For every drive in the drive descriptor, iterate through all
* the mddbs present on it and check to see if mb_devid_magic is
* set. If it isn't, then update the master block with the correct
* device id information
*/
int i = 0;
/*
* When the import support for remotely replicated
* disksets gets implemented, we probably want to
* inform the user that the disks won't be self
* identifying if any of these calls fails
*/
return (-1);
return (-1);
goto cleanup;
/* if devid_str_decode fails, make sure devid is null */
}
do {
int push = 0;
++i;
goto cleanup;
goto cleanup;
goto cleanup;
/*
* If the disk is one of the ones that doesn't
* have a shared mddb on it, we put a dummy
* master block on it.
*/
if (dbcnt == 0) {
break;
}
}
/*
* if mb_setcreatetime is 0, this field was never
* filled in so do it now.
*/
push = 1;
}
/*
* If MDDB_MAGIC_DE is set in the
* mb_devid_magic field then we know we
* have a valid device id and we don't
* need to add it to the master block.
*
* This would have to be revisited if device
* ids change as a result of device id
* algorithms changing or somesuch.
*/
sizeof (mddb_mb_t))) {
/*
* there's enough space to
* store the devid
*/
push = 1;
}
}
}
/*
* write out (push) any changes we have to the mb
*/
if (push) {
< 0)
goto cleanup;
goto cleanup;
}
if (devid)
} while (i < dbcnt);
}
/* success */
return (0);
if (fd != -1)
if (devid)
return (-1);
}
extern int *replicated_disk_list_built;
extern int replicated_disk_list_built_pass1;
/*
* Exported Entry Points
*/
int
int flags,
int usetag,
)
{
md_drive_desc *d = NULL;
int rval = 0;
int pathname_return = 0;
int i;
int has_set;
int matches = 0;
int numsides = 0;
int rb_level = 0;
int ret = 0;
int unrslv_replicated = 0;
int retake_flag = 0;
if (flags & TAKE_USETAG) {
return (-1);
} else {
return (-1);
}
mdclrerror(ep);
}
/* Do we own the set? */
return (-1);
}
if (i == MD_SETOWNER_NO) {
return (-1);
}
}
/* We already own it, we are done. */
if (i == MD_SETOWNER_YES)
return (0);
return (-1);
/* You can not take ownership of a set that has no drives */
else
return (-1);
return (0);
}
/* END CHECK CODE */
/* Lock the set on our side */
rval = -1;
goto out;
}
/*
* Find the "side" value so that it can be used to deal with
* the devids.
*/
if (side == MD_SIDEWILD) {
rval = -1;
goto out;
}
/*
* A local sets' side 0 references records associated with
* that node's local set. As this is a non-local set, "side"
* must be modified (by adding a SKEW) before we reference
* records in the local set [setno = 0] for the non-local set
* [setno = 1..n].
*/
/*
* If this set had been previously imported as a partial replicated
* diskset, then must attempt to updated any unresolved drive
* records in diskset with new devid information. Must set
* flags in drivedesc list before loading up set so that the
* md driver will fix up names and devids correctly in the
* locator block.
*/
int indx;
rval = -1;
goto out;
}
char *cdevidp;
int len;
char *fp;
/*
* We may have name collision here so we need to get
* the dnp using the devid and not the name.
*/
rval = -1;
goto out;
}
rval = -1;
goto out;
}
/* if no device id, what error?) */
rval = -1;
goto out;
}
rval = -1;
goto out;
}
rval = -1;
goto out;
}
/*
* Assuming we're interested in knowing about
* whatever error occurred, but not in stopping.
*/
mdclrerror(ep);
continue;
}
}
/* Reget sd and dd since freed by meta_prune_cnames. */
rval = -1;
goto out;
}
else
/* If ep has error, then there was a failure, set rval */
rval = -1;
goto out;
}
/* Builds global replicated disk list */
/* If success, then clear error structure */
mdclrerror(ep);
/* If ep has error, then there was a failure, set rval */
rval = -1;
goto out;
}
if (d->dd_flags & MD_DR_UNRSLV_REPLICATED) {
/* Get old devid from drive record */
/*
* If the devid stored in the drive record
* (old_devid) matches a devid known by
* the system, then this disk has already
* been partially resolved. This situation
* could occur if a panic happened during a
* previous take of this diskset.
* Set flag to later handle fixing the master
* block on disk and turning off the unresolved
* replicated flag.
*/
&nmlist) == 0) {
d->dd_flags |= MD_DR_FIX_MB_DID;
retake_flag = 1;
continue;
}
/*
* If the devid stored in the drive record
* is on the list of replicated disks found
* during a system scan then set both flags
* so that the locator block, namespaces
* (diskset and local set), master block
* and unresolved replicated flag are updated.
*/
/*
* If devid stored in the drive record is
* not found then set flag to mark
* that set is still unresolved and
* continue to next drive record.
*/
unrslv_replicated = 1;
continue;
}
/*
* Set flags to fix up the master block,
* locator block of the diskset, diskset
* namespace and the local set namespace.
*/
d->dd_flags |= (MD_DR_FIX_MB_DID |
retake_flag = 1;
}
}
}
/*
* Check the local devid namespace to see if the disks
* have been moved. Use the local set first of all as this contains
* entries for the disks in the set.
*
* This is being done before the tk_own_bydd because the disks
* in the dd list could be wrong! But it should be done with the lock
* held for the set.
*/
/*
* Actually do the check of the disks.
*/
if ((ret == METADEVADM_ERR) ||
(ret == METADEVADM_DSKNAME_ERR)) {
/* check failed in some unknown manner */
rval = -1;
goto out;
} else if (ret == METADEVADM_DISKMOVE) {
/*
* Update the dd namelist so that the rpc.metamhd
* gets the correct disks to reserve - it is the rname
* we are interested in.
*/
char *save_devid;
/*
* Need to save the side names key as this
* points to the namespace entry that will
* need to be updated. In addition the call
* to meta_make_sidenmlist does not actually
* set the namespace key.
*/
/*
* There is the possibility that there
* will be multiple disks with the same
* name but different devids in the
* drivelist. Because of this, we need
* to look for a new dnp based on devid
* and not name.
*/
metafreedrivename(d->dd_dnp);
/*
* null newname so we are reset for next time
* through
*/
if (ret == -1) {
rval = -1;
goto out;
}
}
}
}
if (!MD_ATSET_DESC(sd)) {
goto rollback;
}
goto rollback;
goto rollback;
mdclrerror(ep);
}
/* Snarf set of traditional diskset doesn't use stale information */
rval = -1;
goto out;
}
goto rollback;
/*
* Look at the set on all other hosts, if every other host
* has the same set with a larger genid, then we destroy this
* copy.
*/
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* Skip this node */
continue;
numsides++;
NHS_NST_EQ_G_GT, &xep);
if (has_set < 0) {
goto rollback;
matches++;
mdclrerror(&xep);
continue;
}
if (has_set)
matches++;
}
/* Destroy the set */
mdclrerror(&xep);
rval = -1;
}
goto rollback;
}
/*
* If an unresolved replicated diskset, fix up diskset
* and local namespaces, master block and drive record
* with the new devid. If all drives in diskset are
* now resolved, then clear set unresolved replicated flag.
* If an error is encountered, don't fail the take, but
* don't proceed any further in resolving the replicated disks.
*/
/* Fix up diskset and local namespaces with new devids */
/* Fix up master block with new devids */
}
/* If all drives are resolved, set OK flag in set record. */
/* Ignore failure since no bad effect. */
}
mdclrerror(ep);
}
/*
* meta_getalldevs() will ultimately force devfsadmd to create
* the /dev links for all the configured metadevices if they
* do not already exist. This ensures that once the set is
* owned all the metadevices are accessible as opposed to waiting
* for devfsadmd to create them.
*/
goto rollback;
}
if ((pathname_return == METADEVADM_ERR) ||
goto rollback;
}
goto rollback;
goto rollback;
}
/*
* If the set doesn't have the MD_SR_MB_DEVID bit set, i.e
* the drives in the set don't have the device id information,
* then stick it in if possible.
*
* If updating the master block fails for whatever reason, it's
* okay. It just means the disk(s) in the diskset won't be self
* identifying.
*/
/* Lock the set on current set members */
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* We already locked this side */
continue;
/*
* Ignore any RPC errors on a force
* take. The set will have been taken
* above and we still need to continue.
*/
if (flags & TAKE_FORCE)
continue;
rval = -1;
goto out;
}
BT_SET(node_active, i);
}
/* update the sr_flags on all hosts */
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/*
* Only update those nodes that
* are active (ie those that the
* set is locked on).
*/
if (!BT_TEST(node_active, i))
continue;
goto rollback;
}
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* Unlocked of this side is done later */
continue;
/* no point calling dead nodes */
if (!BT_TEST(node_active, i))
continue;
if (rval == 0)
rval = -1;
}
}
}
/*
* If we get here, we need to unlock the set before the resync
* gets called, otherwise the "daemon" will hold the set lock
* until the resync is done!
*/
if (rval == 0)
rval = -1;
}
/* We try to get things resync'ed, but this can fail */
mdclrerror(&xep);
if (rval == 0)
rval = -1;
}
/*
* In order to resolve the namespace major driver names and
* to have the subdrivers attempt to re-associate devts from
* the newly resolved replicated device ids, return a '2'.
* This instructs metaset to release the diskset and re-take.
*
* Return a 2 if
* - no error was detected on the take
* - a replicated unresolved devid was resolved during take
* - take isn't being called during an import
* - this isn't already a re-take situation
*/
rval = 2;
}
return (rval);
out:
if (rval == 0)
rval = -1;
}
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* We already unlocked this side */
continue;
/* no point calling dead nodes */
if (!BT_TEST(node_active, i))
continue;
if (rval == 0)
rval = -1;
}
}
}
return (rval);
/* Make sure we are blocking all signals */
mdclrerror(&xep);
rval = -1;
/* level 4 */
if (rb_level > 3) {
/* update the sr_flags on all hosts */
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* no point calling dead nodes */
if (!BT_TEST(node_active, i))
continue;
mdclrerror(&xep);
}
}
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* We will unlocked this side below */
continue;
/* no point calling dead nodes */
if (!BT_TEST(node_active, i))
continue;
mdclrerror(&xep);
}
}
/* level 3 */
if (rb_level > 2) {
mdclrerror(&xep);
}
/* level 2 */
if (rb_level > 1) {
mdclrerror(&xep);
}
/* level 1 */
if (rb_level > 0) {
if (!MD_ATSET_DESC(sd)) {
mdclrerror(&xep);
}
}
/* level 0 */
mdclrerror(&xep);
/* release signals back to what they were on entry */
mdclrerror(&xep);
return (rval);
}
int
)
{
int rval = 0;
int rb_level = 0;
/* Make sure we own the set */
return (-1);
/* Get the drive descriptors */
return (-1);
/* Get timeout values in case we need to roll back this release */
return (-1);
/* END CHECK CODE */
/* Lock the set on our side */
rval = -1;
goto out;
}
goto rollback;
goto rollback;
goto rollback;
out:
if (rval == 0)
rval = -1;
}
return (rval);
/* Make sure we are blocking all signals */
mdclrerror(&xep);
rval = -1;
/* level 3 */
if (rb_level > 2) {
mdclrerror(&xep);
}
/* level 2 */
if (rb_level > 1) {
mdclrerror(&xep);
}
/* level 1 */
if (rb_level > 0) {
mdclrerror(&xep);
/* Snarf set of trad diskset doesn't use stale information */
mdclrerror(&xep);
}
/* level 0 */
mdclrerror(&xep);
/* release signals back to what they were on entry */
mdclrerror(&xep);
return (rval);
}