meta_metad_subr.c revision 6b1acda1706fa79a5b975e20b643e546749e83d7
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Just in case we're not in a build environment, make sure that
* TEXT_DOMAIN gets set to something.
*/
#if !defined(TEXT_DOMAIN)
#define TEXT_DOMAIN "SYS_TEST"
#endif
/*
* interface between user land and the set records
*/
#include <meta.h>
#include <metad.h>
#include <sdssc.h>
#include <syslog.h>
#include "meta_set_prv.h"
static int setsnarfdone = 0;
typedef struct key_lst_t {
} key_lst_t;
typedef struct ur_recid_lst {
struct ur_recid_lst *url_nx;
static void
{
/* Run to the end of the list */
return;
/* Add the new member */
return;
}
static int
{
return (1);
}
return (0);
}
static void
{
}
}
static int
{
int recs[3];
return (0);
if (! md_in_daemon) {
return (0);
(void) memset(
return (0);
}
/*
* If here, then the daemon is calling, and so the automatic
* conversion will be performed.
*/
/* shorthand */
return (0);
/* Leave multi-node set records alone */
if (MD_MNSET_REC(sr)) {
return (0);
}
/* Mark the old record as converted */
/* Create space for the new record */
/* Allocate the new record */
/* copy all the data from the record being converted */
/* adjust the selfid to point to the new record */
}
/* Commit the old and the new */
recs[2] = 0;
METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
}
/* Add the the old record to the list of records to delete */
/* Free the old records space */
/* Adjust the reqp structure to point to the new record and size */
return (0);
}
)
{
int ureq;
return (NULL);
}
if (metaislocalset(sp)) {
} else {
return (NULL);
}
}
switch (cmd) {
case MD_UR_GET_NEXT:
!= 0) {
return (NULL);
}
break;
case MD_UR_GET_WKEY:
break;
}
if (*idp <= 0) {
return (NULL);
}
*idp = 0;
return (NULL);
}
*idp = 0;
return (NULL);
}
case MDDB_USER:
case MDDB_UR_SR:
return (NULL);
}
break;
}
break;
}
return (reqp);
}
void *
)
{
void *ret_val;
return (NULL);
return (ret_val);
}
/*
* Called by rpc.metad on startup of disksets to cleanup
* the host entries associated with a diskset. This is needed if
* a node failed or the metaset command was killed during the addition
* of a node to a diskset.
*
* This is called for all traditional disksets.
* This is only called for MNdisksets when in there is only one node
* in all of the MN disksets and this node is not running SunCluster.
* (Otherwise, the cleanup of the host entries is handled by a
* reconfig cycle that the SunCluster software calls).
*/
static int
{
int i,
nid = 0,
self_in_set = FALSE;
if (MD_MNSET_REC(sr)) {
/*
* Already guaranteed to be only 1 node in set which
* is mynode (done in sr_validate).
* Now, check if node is in the OK state. If not in
* the OK state, leave self_in_set FALSE so that
* set will be removed.
*/
self_in_set = TRUE;
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
/* Make sure we are in the set and skip this node */
self_in_set = TRUE;
break;
}
}
}
/*
* missing) it is possible for the call to _cladm() to
* return 0 and a nid of 0. In this instance do not remove
* the set as it is Sun Cluster error that needs to be fixed.
*/
/*
* See if we've got a node which has been booted in
* non-cluster mode. If true the nodeid will match
* one of the sr_nodes values because the conversion
* from nodeid to hostname failed to occur.
*/
for (i = 0; i < MD_MAXSIDES; i++) {
continue;
self_in_set = TRUE;
}
/* If we aren't in the set, delete the set */
if (self_in_set == FALSE) {
"Removing set %s from database\n"),
sr->sr_setname);
mdclrerror(&xep);
return (1);
}
} else {
/*
* Send a message to syslog and return without
* deleting any sets
*/
"Call to _cladm failed for set %s nodeid %d\n"),
return (1);
}
}
return (0);
}
void
{
mdclrerror(&xep);
goto skip;
}
/* delete the replicas? */
/* release ownership of the drive? */
/* NOTE: We may not have a name, so both of the above are ugly! */
skip:
}
static void
{
int i;
int modified = 0;
int sidesok;
static char device_name[MAXPATHLEN];
/* If we were mid-add, cleanup */
modified++;
continue;
}
if (MD_MNSET_REC(sr)) {
/*
* MultiNode disksets only have entries for
* their side in the local set. Verify
* that drive has a name associated with
* this node's side.
*/
while (nr) {
/* Find my node */
continue;
}
NULL) != 0) {
return;
}
}
/*
* If entry is found for this node, then
* break out of loop walking through
* node list. For a multi-node diskset,
* there should only be an entry for
* this node.
*/
break;
}
/*
* If entry is not found for this node,
* then delete the drive. No need to
* continue through the node loop since
* our node has already been found.
*/
/* If we are missing a sidename, cleanup */
modified++;
break;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
NULL) != 0) {
return;
}
}
continue;
/* If we are missing a sidename, cleanup */
modified++;
break;
}
}
continue;
/*
* If we got this far, the drive record is either in the OK
* or DEL state, if it is in the DEL state and the sidenames
* all checked out, then we will make it OK.
*/
continue;
modified++;
}
if (modified) {
mdclrerror(&xep);
}
}
static void
{
/* void */;
/* allocate new list element */
}
#ifdef DUMPKEYLST
static void
{
}
#endif /* DUMPKEYLST */
static int
{
return (1);
return (0);
}
static void
{
}
}
static void
sr_sidenms(void)
{
int i;
/*
* We now go through the list of set and drive records collecting
*/
/*
* To handle the multi-node diskset case, get the sideno
* associated with this node. This sideno will be the
* same across all multi-node disksets.
*/
while (nr) {
break;
}
}
/*
* If this node is not in this MNset -
* then skip this set.
*/
if (!nr) {
continue;
}
}
if (MD_MNSET_REC(sr)) {
/*
* There are no non-local sidenames in the
* local set for a multi-node diskset.
*/
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
}
}
}
}
#ifdef DUMPKEYLST
#endif /* DUMPKEYLST */
/*
* We take the list above and get all non-local sidenames, checking
* each to see if they are in use, if they are not used, we delete them.
* Do the check for myside to cover multinode disksets.
* Then do the check for MD_MAXSIDES to cover non-multinode disksets.
* If any multi-node disksets were present, myside would be non-zero.
* myside is the same for all multi-node disksets for this node.
*/
if (myside) {
/*CONSTCOND*/
while (1) {
NULL) != 0) {
break;
}
break;
NULL) != 0) {
continue;
}
}
}
}
/* Now handle the non-multinode disksets */
for (i = 0; i < MD_MAXSIDES; i++) {
/*CONSTCOND*/
while (1) {
NULL) != 0) {
break;
}
break;
NULL) != 0) {
continue;
}
}
}
}
/* Cleanup */
}
void
sr_validate(void)
{
int mnset_single_node;
assert(setsnarfdone != 0);
/* We have validated the records already */
if (setsnarfdone == 3)
return;
/*
* Check if we are in a single node non-SC3.x environmemnt
*/
/*
* If a possible single_node situation, verify that all
* MN disksets have only one node (which is mynode()).
*/
if (mnset_single_node) {
if (MD_MNSET_REC(sr)) {
/*
* If next pointer is non-null (more than
* one node in list) or if the single node
* isn't my node - reset single node flag.
*/
mnset_single_node = 0;
break;
}
}
}
}
/*
* If a MN diskset and not in the single node
* situation, then don't validate the MN set.
* This is done during a reconfig cycle since all
* nodes must take the same action.
*/
continue;
/* Since we do "partial" snarf's, we only check new entries */
continue;
/* If we were mid-add, cleanup */
mdclrerror(&xep);
continue;
}
/* Make sure we are in the set. */
continue;
/* Check has been done, clear the flag */
/*
* If we got here, we are in the set, make sure the flags make
* sense.
*/
mdclrerror(&xep);
}
/* Make sure all the drives are in a stable state. */
}
/* Cleanup any stray sidenames */
sr_sidenms();
setsnarfdone = 3;
}
static md_set_record *
{
return (tsr);
return ((md_set_record *)NULL);
}
int
{
mddb_recid_t *p;
int nodecnt;
/* We have done the snarf call */
if (setsnarfdone != 0)
return (0);
if (meta_setup_db_locations(ep) != 0) {
return (-1);
mdclrerror(ep);
}
/*
* Get membershiplist from API routine.
* If there's an error, just use a NULL
* nodelist.
*/
nodecnt = 0; /* no nodes are alive */
mdclrerror(ep);
}
/* Let sr_cache_add and dr_cache_add know we are doing the snarf */
setsnarfdone = 1;
/* Go get the set records */
id = 0;
/*
* Cluster nodename support
* Convert nodeid -> nodename
* Don't do this for MN disksets since we've already stored
* both the nodeid and name.
*/
if (!(MD_MNSET_REC(sr)))
/* If we were mid-cvt, cleanup */
/* If the daemon is calling, cleanup */
if (md_in_daemon)
continue;
}
if (md_in_daemon)
/* Skip cached records */
if (MD_MNSET_REC(sr)) {
} else {
}
if (md_in_daemon)
continue;
}
/* Mark the record as one to be checked */
/* If MNdiskset, go get the node records */
if (MD_MNSET_REC(sr)) {
p = &mnsr->sr_noderec;
if (md_in_daemon)
/*
* Turn off ALIVE node flag based on member
* list.
* If ALIVE flag is not set, reset OWN flag.
* If this node is mynode, set the OWN flag
* to match the ownership of the diskset.
*/
if (md_in_daemon) {
while (nl2) {
/*
* If in member list,
* set alive.
*/
if (nl2->msl_node_id ==
break;
}
}
/*
* If mynode is in member list, then
* check to see if set is snarfed.
* If set snarfed, set own flag;
* otherwise reset it.
* Don't change master even if
* node isn't an owner node, since
* node may be master, but hasn't
* joined the set yet.
*/
mynode()) == 0) {
if (s_ownset(
} else {
}
}
} else {
mynode()) == 0) {
/*
* If my node isn't in member
* list then reset master.
*/
mnsr = (struct
mnsr->sr_master_nodenm[0] =
'\0';
}
}
}
/*
* Must grab nr_nextrec now since
* mnnr_cache_add may change it
* (mnnr_cache_add is storing the nodes in
* an ascending nodeid order list in order
* to support reconfig).
*/
if (nr->nr_nextrec != 0)
p = &nr->nr_nextrec;
else
p = NULL;
nr);
if ((md_in_daemon) &&
(void) mdstealerror(ep,
}
}
if (p == NULL)
break;
}
goto out;
mdclrerror(ep);
}
}
if (sr->sr_driverec == 0)
continue;
/* Go get the drive records */
p = &sr->sr_driverec;
if (md_in_daemon)
if (dr->dr_nextrec == 0)
break;
p = &dr->dr_nextrec;
}
goto out;
mdclrerror(ep);
/*
* If dr_nextrec was not valid, or we had some
* problem getting the record, we end up here.
* get_ur_rec() zeroes the recid we passed in,
* if we had a failure getting a record using a key,
* so we simply commit the set record and valid
* drive records, if this fails, we hand an error
* back to the caller.
*/
goto out;
}
}
goto out;
mdclrerror(ep);
}
/*
* If the daemon called, go through the USER records and cleanup
* any that are not used by valid sets.
*/
if (md_in_daemon) {
id = 0;
/* Make a list of records to delete */
continue;
}
}
goto out;
mdclrerror(ep);
}
/* Delete all the delete listed records */
mdclrerror(&xep);
}
}
if (nodecnt)
/* Mark the snarf complete */
setsnarfdone = 2;
return (0);
out:
sr_cache_flush(1);
if (nodecnt)
/* Snarf failed, reset state */
setsnarfdone = 0;
return (-1);
}
void
{
assert(setsnarfdone != 0);
if (setrecords == NULL) {
setrecords = sr;
return;
}
/* void */;
}
void
{
assert(setsnarfdone != 0);
continue;
if (sr == setrecords)
else
if (MD_MNSET_REC(sr)) {
} else {
}
break;
}
if (setrecords == NULL)
setsnarfdone = 0;
}
void
{
assert(setsnarfdone != 0);
return;
}
/* void */;
}
void
{
assert(setsnarfdone != 0);
continue;
} else {
}
break;
}
}
/*
* Nodes must be kept in ascending node id order in order to
* support reconfig.
*
* This routine may change nr->nr_next and nr->nr_nextrec.
*/
void
{
return;
}
/*
* If new_record->nodeid < first_record->nodeid,
* put new_record at beginning of list.
*/
return;
}
/*
* Walk list looking for place to insert record.
*/
while (tnr) {
/* Insert new record between tnr_prev and tnr */
return;
}
}
/*
* Add record to end of list.
*/
}
void
{
tnr = 0;
while (nr) {
continue;
}
} else {
}
break;
}
}
int
metad_isautotakebyname(char *setname)
{
if (md_in_daemon)
assert(setsnarfdone != 0);
mdclrerror(&error);
return (0);
}
return (1);
return (0);
}
}
return (0);
}
int
{
if (md_in_daemon)
assert(setsnarfdone != 0);
mdclrerror(&error);
return (0);
}
return (1);
return (0);
}
}
return (0);
}
{
char buf[100];
assert(setsnarfdone != 0);
return (sr);
return (NULL);
}
{
char buf[100];
if (md_in_daemon)
assert(setsnarfdone != 0);
return (NULL);
return (sr);
return (NULL);
}
/*
* Commit the set record and all of its associated records
* (drive records, node records for a MNset) to the local mddb.
*/
void
{
int *recs;
assert(setsnarfdone != 0);
/*
* Cluster nodename support
* Convert nodename -> nodeid
* Don't do this for MN disksets since we've already stored
* both the nodeid and name.
*/
if (!(MD_MNSET_REC(sr)))
/* Send down to kernel the data in mddb USER set record */
if (inc_genid)
if (MD_MNSET_REC(sr)) {
} else {
}
return;
}
/*
* Walk through the drive records associated with this set record
* and send down to kernel the data in mddb USER drive record.
*/
drc = 0;
while (dr) {
if (inc_genid)
return;
}
drc++;
}
/*
* If this set is a multi-node set -
* walk through the node records associated with this set record
* and send down to kernel the data in mddb USER node record.
*/
nrc = 0;
if (MD_MNSET_REC(sr)) {
while (nr) {
if (inc_genid)
!= 0) {
return;
}
nrc++;
}
}
/*
* Set up list of mddb USER recids containing set and drive records
* and node records if a MNset.
*/
rc = 0;
/* First recid in list is the set record's id */
rc++;
while (dr) {
/* Now, fill in the drive record ids */
rc++;
}
if (MD_MNSET_REC(sr)) {
while (nr) {
/* If a MNset, fill in the node record ids */
rc++;
}
}
/* Set last record to null recid */
/* Write out the set and drive and node records to the local mddb */
METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
return;
}
/*
* Cluster nodename support
* Convert nodeid -> nodename
* Don't do this for MN disksets since we've already stored
* both the nodeid and name.
*/
if (!(MD_MNSET_REC(sr)))
}
/*
* This routine only handles returns a md_set_record structure even
* if the set record describes a MN set. This will allow pre-MN
* SVM RPC code to access a MN set record and to display it.
*
* The MN SVM RPC code detects if the set record returned describes
* a MN set and then will copy it using mnsetdup.
*/
{
while (*tdrpp) {
}
}
return (tsr);
}
/*
* This routine only copies MN set records. If a non-MN set
* record was passed in NULL pointer will be returned.
*/
{
if (!MD_MNSET_REC(mnsr)) {
return (NULL);
}
while (*tdrpp) {
}
while (*tnrpp) {
}
}
return (tmnsr);
}
{
return (tdr);
}
{
return (tnr);
}
/*
* Duplicate parts of the drive decriptor list for this node.
* Only duplicate the drive name string in the mddrivename structure, don't
* need to copy any other pointers since only interested in the flags and
* the drive name (i.e. other pointers will be set to NULL).
* Returns NULL if failure due to Malloc failure.
* Returns pointer (non-NULL) to dup'd list if successful.
*/
{
char *copy_cname;
char *copy_devid;
return (NULL);
while (orig_dd) {
} else {
copy_devid = NULL;
}
(copy_cname == NULL)) {
while (copy_dd_head) {
}
if (copy_dnp)
if (copy_dd)
if (copy_cname)
if (copy_devid)
return (NULL);
}
if (copy_devid) {
}
if (copy_dd_prev == NULL) {
} else {
}
}
return (copy_dd_head);
}
void
sr_cache_flush(int flushnames)
{
}
if (MD_MNSET_REC(tsr)) {
}
} else {
}
}
setrecords = NULL;
setsnarfdone = 0;
/* This will cause the other caches to be cleared */
if (flushnames)
metaflushnames(0);
}
void
{
assert(setsnarfdone != 0);
continue;
}
if (sr == setrecords)
else
if (MD_MNSET_REC(sr)) {
} else {
}
break;
}
setsnarfdone = 0;
/* This will cause the other caches to be cleared */
metaflushnames(0);
}
int
{
return (0);
return (ownset_arg.owns_set);
}
void
{
char stringbuf[100];
int i;
int num_sets = 0;
int num_mn_sets = 0;
return;
mdclrerror(&xep);
if (MD_MNSET_REC(sr)) {
/*
* If this node is a set owner, halt the set before
* deleting the set records. Ignore any errors since
* s_ownset and halt_set could fail if panic had occurred
*/
mdclrerror(&xep);
mdclrerror(&xep);
}
}
if (MD_MNSET_REC(sr)) {
while (nr) {
/* Setting myside for later use */
NULL) != 0) {
return;
}
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
continue;
myside = i;
}
}
while (dr) {
return;
}
&xep) == 0) {
}
else
mdclrerror(&xep);
} else
mdclrerror(&xep);
} else
mdclrerror(&xep);
MD_LOCAL_SET, dev);
}
return;
}
NODEV64);
continue;
num_sets++;
if (MD_MNSET_REC(tsr))
num_mn_sets++;
}
if (num_mn_sets == 0)
/* The set we just deleted is the only one left */
if (num_sets == 0)
}
void
{
}
/*
* resnarf the imported set
*/
int
)
{
mddb_recid_t id, *p;
if (meta_setup_db_locations(ep) != 0) {
return (-1);
mdclrerror(ep);
}
setsnarfdone = 1;
id = 0;
continue;
/* Don't allow resnarf of a multi-node diskset */
if (MD_MNSET_REC(sr))
goto out;
if (md_in_daemon)
if (sr->sr_driverec == 0)
break;
p = &sr->sr_driverec;
if (md_in_daemon)
if (dr->dr_nextrec == 0)
break;
p = &dr->dr_nextrec;
}
goto out;
mdclrerror(ep);
goto out;
}
}
goto out;
mdclrerror(ep);
}
setsnarfdone = 2;
return (0);
out:
sr_cache_flush(1);
setsnarfdone = 0;
return (-1);
}