/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Just in case we're not in a build environment, make sure that
* TEXT_DOMAIN gets set to something.
*/
#if !defined(TEXT_DOMAIN)
#define TEXT_DOMAIN "SYS_TEST"
#endif
/*
* Metadevice database interfaces.
*/
#define MDDB
#include <meta.h>
#include <sys/lvm/md_mddb.h>
#include <sys/lvm/md_crc.h>
#include <sys/lvm/mdio.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>
struct svm_daemon {
char *svmd_name;
char *svmd_kill_val;
};
/*
* This is a list of the daemons that are not stopped by the SVM smf(5)
* services. The mdmonitord is started via svc:/system/mdmonitor:default
* but no contract(4) is constructed and so it is not stopped by smf(5).
*/
struct svm_daemon svmd_kill_list[] = {
{"mdmonitord", "HUP"},
{"mddoors", "KILL"},
};
#define DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
/*
* Are the locator blocks for the replicas using devids
*/
static int devid_in_use = FALSE;
static char *
getlongname(
struct mddb_config *c,
md_error_t *ep
)
{
char *diskname = NULL;
char *devid_str;
devid_nmlist_t *disklist = NULL;
c->c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
(void) mdstealerror(ep, &c->c_mde);
return (NULL);
}
if (c->c_locator.l_devid_flags & MDDB_DEVID_SZ) {
c->c_locator.l_devid = (uintptr_t)
Malloc(c->c_locator.l_devid_sz);
c->c_locator.l_devid_flags =
MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
} else {
(void) mderror(ep, MDE_NODEVID, "");
goto out;
}
if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
(void) mdstealerror(ep, &c->c_mde);
goto out;
}
if (c->c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
(void) mderror(ep, MDE_NODEVID, "");
goto out;
}
if (metaioctl(MD_DB_GETDEV, c, &c->c_mde, NULL) != 0) {
(void) mdstealerror(ep, &c->c_mde);
goto out;
}
if (c->c_locator.l_devid != NULL) {
if (meta_deviceid_to_nmlist("/dev/dsk",
(ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
c->c_locator.l_minor_name, &disklist) != 0) {
devid_str = devid_str_encode(
(ddi_devid_t)(uintptr_t)c->c_locator.l_devid, NULL);
(void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
mderrorextra(ep, devid_str);
if (devid_str != NULL)
devid_str_free(devid_str);
goto out;
}
diskname = Strdup(disklist[0].devname);
}
out:
if (disklist != NULL)
devid_free_nmlist(disklist);
if (c->c_locator.l_devid != NULL)
Free((void *)(uintptr_t)c->c_locator.l_devid);
return (diskname);
}
/*
* meta_get_lb_inittime sends a request for the lb_inittime to the kernel
*/
md_timeval32_t
meta_get_lb_inittime(
mdsetname_t *sp,
md_error_t *ep
)
{
mddb_config_t c;
(void) memset(&c, 0, sizeof (c));
/* Fill in setno, setname, and sideno */
c.c_setno = sp->setno;
if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
(void) mdstealerror(ep, &c.c_mde);
}
return (c.c_timestamp);
}
/*
* mkmasterblks writes out the master blocks of the mddb to the replica.
*
* In a MN diskset, this is called by the node that is adding this replica
* to the diskset.
*/
#define MDDB_VERIFY_SIZE 8192
static int
mkmasterblks(
mdsetname_t *sp,
mdname_t *np,
int fd,
daddr_t firstblk,
int dbsize,
md_timeval32_t inittime,
md_error_t *ep
)
{
int consecutive;
md_timeval32_t tp;
struct mddb_mb *mb;
char *buffer;
int iosize;
md_set_desc *sd;
int mn_set = 0;
daddr_t startblk;
int cnt;
ddi_devid_t devid;
if (! metaislocalset(sp)) {
if ((sd = metaget_setdesc(sp, ep)) == NULL)
return (-1);
if (MD_MNSET_DESC(sd)) {
mn_set = 1; /* Used later */
}
}
/*
* Loop to verify the entire mddb region on disk is read/writable.
* buffer is used to write/read in at most MDDB_VERIFY_SIZE block
* chunks.
*
* A side-effect of this loop is to zero out the entire mddb region
*/
if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
return (mdsyserror(ep, ENOMEM, np->rname));
startblk = firstblk;
for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
if (cnt > MDDB_VERIFY_SIZE)
consecutive = MDDB_VERIFY_SIZE;
else
consecutive = cnt;
if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
Free(buffer);
return (mdsyserror(ep, errno, np->rname));
}
iosize = DEV_BSIZE * consecutive;
if (write(fd, buffer, iosize) != iosize) {
Free(buffer);
return (mdsyserror(ep, errno, np->rname));
}
if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
Free(buffer);
return (mdsyserror(ep, errno, np->rname));
}
if (read(fd, buffer, iosize) != iosize) {
Free(buffer);
return (mdsyserror(ep, errno, np->rname));
}
startblk += consecutive;
}
Free(buffer);
if ((mb = Zalloc(DEV_BSIZE)) == NULL)
return (mdsyserror(ep, ENOMEM, np->rname));
if (meta_gettimeofday(&tp) == -1) {
Free(mb);
return (mdsyserror(ep, errno, np->rname));
}
mb->mb_magic = MDDB_MAGIC_MB;
/*
* If a MN diskset, set master block revision for a MN set.
* Even though the master block structure is no different
* for a MN set, setting the revision field to a different
* number keeps any pre-MN_diskset code from accessing
* this diskset. It also allows for an early determination
* of a MN diskset when reading in from disk so that the
* proper size locator block and locator names structure
* can be read in thus saving time on diskset startup.
*/
if (mn_set)
mb->mb_revision = MDDB_REV_MNMB;
else
mb->mb_revision = MDDB_REV_MB;
mb->mb_timestamp = tp;
mb->mb_setno = sp->setno;
mb->mb_blkcnt = dbsize - 1;
mb->mb_blkno = firstblk;
mb->mb_nextblk = 0;
mb->mb_blkmap.m_firstblk = firstblk + 1;
mb->mb_blkmap.m_consecutive = dbsize - 1;
if (! metaislocalset(sp)) {
mb->mb_setcreatetime = inittime;
}
/*
* We try to save the disks device ID into the remaining bytes in
* the master block. The saved devid is used to provide a mapping
* between this disk's devid and the devid stored into the master
* block. This allows the disk image to be self-identifying
* if it gets copied (e.g. SNDR, True Copy, etc.). This is used
* when we try to import these disks on the remote copied image.
* If we cannot save the disks device ID onto the master block that is
* ok. The disk is just not self-identifying and won't be importable
* in the remote copy scenario.
*/
if (devid_get(fd, &devid) == 0) {
size_t len;
len = devid_sizeof(devid);
if (len <= DEV_BSIZE - sizeof (*mb)) {
/* there is enough space to store the devid */
mb->mb_devid_magic = MDDB_MAGIC_DE;
mb->mb_devid_len = len;
(void) memcpy(mb->mb_devid, devid, len);
}
devid_free(devid);
}
crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
(crc_skip_t *)NULL);
if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
Free(mb);
return (mdsyserror(ep, errno, np->rname));
}
if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
Free(mb);
return (mdsyserror(ep, errno, np->rname));
}
if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
Free(mb);
return (mdsyserror(ep, errno, np->rname));
}
if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
Free(mb);
return (mdsyserror(ep, errno, np->rname));
}
if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
(uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
Free(mb);
return (mdmddberror(ep, MDE_NOTVERIFIED,
meta_getminor(np->dev), sp->setno, 0, np->rname));
}
Free(mb);
return (0);
}
void
meta_mkdummymaster(
mdsetname_t *sp,
int fd,
daddr_t firstblk
)
{
md_timeval32_t tp;
struct mddb_mb *mb;
ddi_devid_t devid;
md_set_desc *sd;
md_error_t ep = mdnullerror;
md_timeval32_t inittime;
/*
* No dummy master blocks are written for a MN diskset since devids
* are not supported in MN disksets.
*/
if (! metaislocalset(sp)) {
if ((sd = metaget_setdesc(sp, &ep)) == NULL)
return;
if (MD_MNSET_DESC(sd))
return;
}
if ((mb = Zalloc(DEV_BSIZE)) == NULL)
return;
mb->mb_magic = MDDB_MAGIC_DU;
mb->mb_revision = MDDB_REV_MB;
mb->mb_setno = sp->setno;
inittime = meta_get_lb_inittime(sp, &ep);
mb->mb_setcreatetime = inittime;
if (meta_gettimeofday(&tp) != -1)
mb->mb_timestamp = tp;
/*
* We try to save the disks device ID into the remaining bytes in
* the master block. This allows the disk image to be self-identifying
* if it gets copied (e.g. SNDR, True Copy, etc.). This is used
* when we try to import these disks on the remote copied image.
* If we cannot save the disks device ID onto the master block that is
* ok. The disk is just not self-identifying and won't be importable
* in the remote copy scenario.
*/
if (devid_get(fd, &devid) == 0) {
int len;
len = devid_sizeof(devid);
if (len <= DEV_BSIZE - sizeof (*mb)) {
/* there is enough space to store the devid */
mb->mb_devid_magic = MDDB_MAGIC_DE;
mb->mb_devid_len = len;
(void) memcpy(mb->mb_devid, (char *)devid, len);
}
devid_free(devid);
}
crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
(crc_skip_t *)NULL);
/*
* If any of these operations fail, we need to inform the
* user that the disk won't be self identifying. When support
* for importing remotely replicated disksets is added, we
* want to add the error messages here.
*/
if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
goto out;
if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
goto out;
if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
goto out;
if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
goto out;
if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
(uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
goto out;
out:
Free(mb);
}
static int
buildconf(mdsetname_t *sp, md_error_t *ep)
{
md_replicalist_t *rlp = NULL;
md_replicalist_t *rl;
FILE *cfp = NULL;
FILE *mfp = NULL;
struct stat sbuf;
int rval = 0;
int in_miniroot = 0;
char line[MDDB_BOOTLIST_MAX_LEN];
char *tname = NULL;
/* get list of local replicas */
if (! metaislocalset(sp))
return (0);
if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
return (-1);
/* open tempfile, copy permissions of original file */
if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
/*
* On the miniroot tmp files must be created in /var/tmp.
* If we get a EROFS error, we assume that we are in the
* miniroot.
*/
if (errno != EROFS)
goto error;
in_miniroot = 1;
errno = 0;
tname = tempnam("/var/tmp", "slvm_");
if (tname == NULL && errno == EROFS) {
/*
* If we are booted on a read-only root because
* of mddb quorum problems we don't want to emit
* any scary error messages.
*/
errno = 0;
goto out;
}
/* open tempfile, copy permissions of original file */
if ((cfp = fopen(tname, "w+")) == NULL)
goto error;
}
if (stat(META_DBCONF, &sbuf) == 0) {
if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
goto error;
if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
goto error;
}
/* print header */
if (fprintf(cfp, "#metadevice database location file ") == EOF)
goto error;
if (fprintf(cfp, "do not hand edit\n") < 0)
goto error;
if (fprintf(cfp,
"#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
goto error;
/* dump replicas */
for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
md_replica_t *r = rl->rl_repp;
int checksum = 42;
int i;
char *devidp;
minor_t min;
devidp = devid_str_encode(r->r_devid, r->r_minor_name);
/* If devid code can't encode devidp - skip entry */
if (devidp == NULL) {
continue;
}
/* compute checksum */
for (i = 0; ((r->r_driver_name[i] != '\0') &&
(i < sizeof (r->r_driver_name))); i++) {
checksum -= r->r_driver_name[i];
}
min = meta_getminor(r->r_namep->dev);
checksum -= min;
checksum -= r->r_blkno;
for (i = 0; i < strlen(devidp); i++) {
checksum -= devidp[i];
}
/* print info */
if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
goto error;
}
devid_str_free(devidp);
}
/* close and rename to real file */
if (fflush(cfp) != 0)
goto error;
if (fsync(fileno(cfp)) != 0)
goto error;
if (fclose(cfp) != 0) {
cfp = NULL;
goto error;
}
cfp = NULL;
/*
* Renames don't work in the miniroot since tmpfiles are
* created in /var/tmp. Hence we copy the data out.
*/
if (! in_miniroot) {
if (rename(META_DBCONFTMP, META_DBCONF) != 0)
goto error;
} else {
if ((cfp = fopen(tname, "r")) == NULL)
goto error;
if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
goto error;
while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
if (fputs(line, mfp) == NULL)
goto error;
}
(void) fclose(cfp);
cfp = NULL;
if (fflush(mfp) != 0)
goto error;
if (fsync(fileno(mfp)) != 0)
goto error;
if (fclose(mfp) != 0) {
mfp = NULL;
goto error;
}
/* delete the tempfile */
(void) unlink(tname);
}
/* success */
rval = 0;
goto out;
/* tempfile error */
error:
rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
mdsyserror(ep, errno, META_DBCONFTMP);
/* cleanup, return success */
out:
if (rlp != NULL)
metafreereplicalist(rlp);
if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
mdsyserror(ep, errno, META_DBCONFTMP);
}
free(tname);
return (rval);
}
/*
* check replica for dev
*/
static int
in_replica(
mdsetname_t *sp,
md_replica_t *rp,
mdname_t *np,
diskaddr_t slblk,
diskaddr_t nblks,
md_error_t *ep
)
{
mdname_t *repnp = rp->r_namep;
diskaddr_t rep_sblk = rp->r_blkno;
diskaddr_t rep_nblks = rp->r_nblk;
/* should be in the same set */
assert(sp != NULL);
/* if error in master block, assume whole partition */
if ((rep_sblk == MD_DISKADDR_ERROR) ||
(rep_nblks == MD_DISKADDR_ERROR)) {
rep_sblk = 0;
rep_nblks = MD_DISKADDR_ERROR;
}
/* check overlap */
if (meta_check_overlap(
MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
return (-1);
}
/* return success */
return (0);
}
/*
* check to see if we're in a replica
*/
int
meta_check_inreplica(
mdsetname_t *sp,
mdname_t *np,
diskaddr_t slblk,
diskaddr_t nblks,
md_error_t *ep
)
{
md_replicalist_t *rlp = NULL;
md_replicalist_t *rl;
int rval = 0;
/* should have a set */
assert(sp != NULL);
/* for each replica */
if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
return (-1);
for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
md_replica_t *rp = rl->rl_repp;
/* check replica */
if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
rval = -1;
break;
}
}
/* cleanup, return success */
metafreereplicalist(rlp);
return (rval);
}
/*
* check replica
*/
int
meta_check_replica(
mdsetname_t *sp, /* set to check against */
mdname_t *np, /* component to check against */
mdchkopts_t options, /* option flags */
diskaddr_t slblk, /* start logical block */
diskaddr_t nblks, /* number of blocks (-1,rest of them) */
md_error_t *ep /* error packet */
)
{
mdchkopts_t chkoptions = MDCHK_ALLOW_REPSLICE;
/* make sure we have a disk */
if (metachkcomp(np, ep) != 0)
return (-1);
/* check to ensure that it is not already in use */
if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
return (-1);
}
if (options & MDCHK_ALLOW_NODBS)
return (0);
if (options & MDCHK_DRVINSET)
return (0);
/* make sure it is in the set */
if (meta_check_inset(sp, np, ep) != 0)
return (-1);
/* make sure its not in a metadevice */
if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
return (-1);
/* return success */
return (0);
}
static int
update_dbinfo_on_drives(
mdsetname_t *sp,
md_drive_desc *dd,
int set_locked,
int force,
md_error_t *ep
)
{
md_set_desc *sd;
int i;
md_setkey_t *cl_sk;
int rval = 0;
md_mnnode_desc *nd;
if ((sd = metaget_setdesc(sp, ep)) == NULL)
return (-1);
if (! set_locked) {
if (MD_MNSET_DESC(sd)) {
md_error_t xep = mdnullerror;
sigset_t sigs;
/* Make sure we are blocking all signals */
if (procsigs(TRUE, &sigs, &xep) < 0)
mdclrerror(&xep);
nd = sd->sd_nodelist;
while (nd) {
if (force && strcmp(nd->nd_nodename,
mynode()) != 0) {
nd = nd->nd_next;
continue;
}
if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
nd = nd->nd_next;
continue;
}
if (clnt_lock_set(nd->nd_nodename, sp, ep))
return (-1);
nd = nd->nd_next;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
if (force && strcmp(sd->sd_nodes[i],
mynode()) != 0)
continue;
if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
return (-1);
}
}
}
if (MD_MNSET_DESC(sd)) {
nd = sd->sd_nodelist;
while (nd) {
if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
nd = nd->nd_next;
continue;
}
if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
nd = nd->nd_next;
continue;
}
if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
== -1) {
rval = -1;
break;
}
nd = nd->nd_next;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
continue;
if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
== -1) {
rval = -1;
break;
}
}
}
if (! set_locked) {
cl_sk = cl_get_setkey(sp->setno, sp->setname);
if (MD_MNSET_DESC(sd)) {
nd = sd->sd_nodelist;
while (nd) {
if (force &&
strcmp(nd->nd_nodename, mynode()) != 0) {
nd = nd->nd_next;
continue;
}
if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
nd = nd->nd_next;
continue;
}
if (clnt_unlock_set(nd->nd_nodename, cl_sk,
ep)) {
rval = -1;
break;
}
nd = nd->nd_next;
}
} else {
for (i = 0; i < MD_MAXSIDES; i++) {
/* Skip empty slots */
if (sd->sd_nodes[i][0] == '\0')
continue;
if (force &&
strcmp(sd->sd_nodes[i], mynode()) != 0)
continue;
if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
ep)) {
rval = -1;
break;
}
}
}
cl_set_setkey(NULL);
}
return (rval);
}
int
meta_db_addsidenms(
mdsetname_t *sp,
mdname_t *np,
daddr_t blkno,
int bcast,
md_error_t *ep
)
{
side_t sideno;
char *bname = NULL;
char *dname = NULL;
minor_t mnum;
mddb_config_t c;
int done;
int rval = 0;
md_set_desc *sd;
sideno = MD_SIDEWILD;
/*CONSTCOND*/
while (1) {
if (bname != NULL) {
Free(bname);
bname = NULL;
}
if (dname != NULL) {
Free(dname);
dname = NULL;
}
if ((done = meta_getnextside_devinfo(sp, np->bname,
&sideno, &bname, &dname, &mnum, ep)) == -1) {
rval = -1;
break;
}
if (done == 0)
break;
if (! metaislocalset(sp)) {
if ((sd = metaget_setdesc(sp, ep)) == NULL) {
rval = -1;
break;
}
}
/*
* Send addsidenms to all nodes using rpc.mdcommd if
* sidename is being added to MN diskset.
*
* It's ok to broadcast this call to other nodes.
*
* Note: The broadcast to other nodes isn't needed during
* the addition of the first mddbs to the set since the
* other nodes haven't been joined to the set yet. All
* nodes in a MN diskset are (implicitly) joined to the set
* on the addition of the first mddb.
*/
if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
(bcast == DB_ADDSIDENMS_BCAST)) {
md_mn_result_t *resultp = NULL;
md_mn_msg_meta_db_newside_t db_ns;
int send_rval;
db_ns.msg_l_dev = np->dev;
db_ns.msg_sideno = sideno;
db_ns.msg_blkno = blkno;
(void) strncpy(db_ns.msg_dname, dname,
sizeof (db_ns.msg_dname));
(void) splitname(np->bname, &db_ns.msg_splitname);
db_ns.msg_mnum = mnum;
/* Set devid to NULL until devids are supported */
db_ns.msg_devid[0] = NULL;
/*
* If reconfig cycle has been started, this node is
* stuck in in the return step until this command has
* completed. If mdcommd is suspended, ask
* send_message to fail (instead of retrying)
* so that metaset can finish allowing the reconfig
* cycle to proceed.
*/
send_rval = mdmn_send_message(sp->setno,
MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)&db_ns,
sizeof (md_mn_msg_meta_db_newside_t),
&resultp, ep);
if (send_rval != 0) {
rval = -1;
if (resultp == NULL)
(void) mddserror(ep,
MDE_DS_COMMD_SEND_FAIL,
sp->setno, NULL, NULL,
sp->setname);
else {
(void) mdstealerror(ep,
&(resultp->mmr_ep));
if (mdisok(ep)) {
(void) mddserror(ep,
MDE_DS_COMMD_SEND_FAIL,
sp->setno, NULL, NULL,
sp->setname);
}
free_result(resultp);
}
break;
}
if (resultp)
free_result(resultp);
} else {
/*
* Let this side's device name, minor # and driver name
* be known to the database replica.
*/
(void) memset(&c, 0, sizeof (c));
/* Fill in device/replica info */
c.c_locator.l_dev = meta_cmpldev(np->dev);
c.c_locator.l_blkno = blkno;
(void) strncpy(c.c_locator.l_driver, dname,
sizeof (c.c_locator.l_driver));
if (splitname(np->bname, &c.c_devname) ==
METASPLIT_LONGDISKNAME && devid_in_use == FALSE) {
rval = mddeverror(ep, MDE_DISKNAMETOOLONG,
NODEV64, np->rname);
break;
}
c.c_locator.l_mnum = mnum;
/* Fill in setno, setname, and sideno */
c.c_setno = sp->setno;
(void) strncpy(c.c_setname, sp->setname,
sizeof (c.c_setname));
c.c_sideno = sideno;
/*
* Don't need device id information from this ioctl
* Kernel determines device id from dev_t, which
* is just what this code would do.
*/
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = 0;
if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
rval = mdstealerror(ep, &c.c_mde);
break;
}
}
}
/* cleanup, return success */
if (bname != NULL) {
Free(bname);
bname = NULL;
}
if (dname != NULL) {
Free(dname);
dname = NULL;
}
return (rval);
}
int
meta_db_delsidenm(
mdsetname_t *sp,
side_t sideno,
mdname_t *np,
daddr_t blkno,
md_error_t *ep
)
{
mddb_config_t c;
md_set_desc *sd;
if (! metaislocalset(sp)) {
if ((sd = metaget_setdesc(sp, ep)) == NULL)
return (-1);
}
/* Use rpc.mdcommd to delete mddb side from all nodes */
if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
md_mn_result_t *resultp = NULL;
md_mn_msg_meta_db_delside_t db_ds;
int send_rval;
db_ds.msg_l_dev = np->dev;
db_ds.msg_blkno = blkno;
db_ds.msg_sideno = sideno;
/* Set devid to NULL until devids are supported */
db_ds.msg_devid[0] = NULL;
/*
* If reconfig cycle has been started, this node is
* stuck in in the return step until this command has
* completed. If mdcommd is suspended, ask
* send_message to fail (instead of retrying)
* so that metaset can finish allowing the reconfig
* cycle to proceed.
*/
send_rval = mdmn_send_message(sp->setno,
MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)&db_ds,
sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
if (send_rval != 0) {
if (resultp == NULL)
(void) mddserror(ep,
MDE_DS_COMMD_SEND_FAIL,
sp->setno, NULL, NULL,
sp->setname);
else {
(void) mdstealerror(ep, &(resultp->mmr_ep));
if (mdisok(ep)) {
(void) mddserror(ep,
MDE_DS_COMMD_SEND_FAIL,
sp->setno, NULL, NULL,
sp->setname);
}
free_result(resultp);
}
return (-1);
}
if (resultp)
free_result(resultp);
} else {
/*
* Let this side's device name, minor # and driver name
* be known to the database replica.
*/
(void) memset(&c, 0, sizeof (c));
/* Fill in device/replica info */
c.c_locator.l_dev = meta_cmpldev(np->dev);
c.c_locator.l_blkno = blkno;
/* Fill in setno, setname, and sideno */
c.c_setno = sp->setno;
(void) strcpy(c.c_setname, sp->setname);
c.c_sideno = sideno;
/*
* Don't need device id information from this ioctl
* Kernel determines device id from dev_t, which
* is just what this code would do.
*/
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = 0;
if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
return (mdstealerror(ep, &c.c_mde));
}
return (0);
}
static int
mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
{
mdnamelist_t *dnp1, *dnp2;
for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
return (mderror(ep, MDE_DUPDRIVE,
dnp1->namep->cname));
}
}
return (0);
}
/*
* Return 1 if files are different, else return 0
*/
static int
filediff(char *tsname, char *sname)
{
int ret = 1, fd;
size_t tsz, sz;
struct stat sbuf;
char *tbuf, *buf;
if (stat(tsname, &sbuf) != 0)
return (1);
tsz = sbuf.st_size;
if (stat(sname, &sbuf) != 0)
return (1);
sz = sbuf.st_size;
if (tsz != sz)
return (1);
/* allocate memory and read both files into buffer */
tbuf = malloc(tsz);
buf = malloc(sz);
if (tbuf == NULL || buf == NULL)
goto out;
fd = open(tsname, O_RDONLY);
if (fd == -1)
goto out;
sz = read(fd, tbuf, tsz);
(void) close(fd);
if (sz != tsz)
goto out;
fd = open(sname, O_RDONLY);
if (fd == -1)
goto out;
sz = read(fd, buf, tsz);
(void) close(fd);
if (sz != tsz)
goto out;
/* compare content */
ret = bcmp(tbuf, buf, tsz);
out:
if (tbuf)
free(tbuf);
if (buf)
free(buf);
return (ret);
}
/*
* patch md.conf file with mddb locations
*/
int
meta_db_patch(
char *sname, /* system file name */
char *cname, /* mddb.cf file name */
int patch, /* patching locally */
md_error_t *ep
)
{
char *tsname = NULL;
char line[MDDB_BOOTLIST_MAX_LEN];
FILE *tsfp = NULL;
FILE *mfp = NULL;
int rval = -1;
/* check names */
if (sname == NULL) {
if (patch)
sname = "md.conf";
else
sname = "/kernel/drv/md.conf";
}
if (cname == NULL)
cname = META_DBCONF;
/*
* edit file
*/
if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
if (mdissyserror(ep, EROFS)) {
/*
* If we are booted on a read-only root because
* of mddb quorum problems we don't want to emit
* any scary error messages.
*/
mdclrerror(ep);
rval = 0;
}
goto out;
}
if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0, 0,
ep) != 0)
goto out;
/* if file content is identical, skip rename */
if (filediff(tsname, sname) == 0) {
rval = 0;
goto out;
}
if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
(fclose(tsfp) != 0)) {
(void) mdsyserror(ep, errno, tsname);
goto out;
}
tsfp = NULL;
/*
* rename file. If we get a Cross Device error then it
* is because we are in the miniroot.
*/
if (rename(tsname, sname) != 0 && errno != EXDEV) {
(void) mdsyserror(ep, errno, sname);
goto out;
}
if (errno == EXDEV) {
if ((tsfp = fopen(tsname, "r")) == NULL)
goto out;
if ((mfp = fopen(sname, "w+")) == NULL)
goto out;
while (fgets(line, sizeof (line), tsfp) != NULL) {
if (fputs(line, mfp) == NULL)
goto out;
}
(void) fclose(tsfp);
tsfp = NULL;
if (fflush(mfp) != 0)
goto out;
if (fsync(fileno(mfp)) != 0)
goto out;
if (fclose(mfp) != 0) {
mfp = NULL;
goto out;
}
}
Free(tsname);
tsname = NULL;
rval = 0;
/* cleanup, return error */
out:
if (tsfp != NULL)
(void) fclose(tsfp);
if (tsname != NULL) {
(void) unlink(tsname);
Free(tsname);
}
return (rval);
}
/*
* Add replicas to set. This happens as a result of:
* - metadb [-s set_name] -a
* - metaset -s set_name -a disk
* - metaset -s set_name -d disk (causes a rebalance of mddbs)
* - metaset -s set_name -b
*
* For a local set, this routine is run on the local set host.
*
* For a traditional diskset, this routine is run on the node that
* is running the metaset command.
*
* For a multinode diskset, this routine is run by the node that is
* running the metaset command. If this is the first mddb added to
* the MN diskset, then no communication is made to other nodes via commd
* since the other nodes will be in-sync with respect to the mddbs when
* those other nodes join the set and snarf in the newly created mddb.
* If this is not the first mddb added to the MN diskset, then this
* attach command is sent to all of the nodes using commd. This keeps
* the nodes in-sync.
*/
int
meta_db_attach(
mdsetname_t *sp,
mdnamelist_t *db_nlp,
mdchkopts_t options,
md_timeval32_t *timeval,
int dbcnt,
int dbsize,
char *sysfilename,
md_error_t *ep
)
{
struct mddb_config c;
mdnamelist_t *nlp;
mdname_t *np;
md_drive_desc *dd = NULL;
md_drive_desc *p;
int i;
int fd;
side_t sideno;
daddr_t blkno;
int replicacount = 0;
int start_svmdaemons = 0;
int rval = 0;
md_error_t status = mdnullerror;
md_set_desc *sd;
int stale_bool = FALSE;
int flags;
int firstmddb = 1;
md_timeval32_t inittime = {0, 0};
/*
* Error if we don't get some work to do.
*/
if (db_nlp == NULL)
return (mdsyserror(ep, EINVAL, NULL));
if (mdnamesareunique(db_nlp, ep) != 0)
return (-1);
(void) memset(&c, 0, sizeof (c));
c.c_id = 0;
c.c_setno = sp->setno;
/* Don't need device id information from this ioctl */
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = 0;
if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
if (metaislocalset(sp)) {
if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
mdclrerror(&c.c_mde);
else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
(! (options & MDCHK_ALLOW_NODBS)))
return (mdstealerror(ep, &c.c_mde));
} else {
if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
return (mdstealerror(ep, &c.c_mde));
}
mdclrerror(&c.c_mde);
}
/*
* Is current set STALE?
*/
if (c.c_flags & MDDB_C_STALE) {
stale_bool = TRUE;
}
assert(db_nlp != NULL);
/* if these are the first replicas then the SVM daemons need to run */
if (c.c_dbcnt == 0)
start_svmdaemons = 1;
/*
* check to see if we will go over the total possible number
* of data bases
*/
nlp = db_nlp;
while (nlp) {
replicacount += dbcnt;
nlp = nlp->next;
}
if ((replicacount + c.c_dbcnt) > c.c_dbmax)
return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
sp->setno, c.c_dbcnt + replicacount, NULL));
/*
* go through and check to make sure all locations specified
* are legal also pick out driver name;
*/
for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
diskaddr_t devsize;
np = nlp->namep;
if (! metaislocalset(sp)) {
uint_t partno;
uint_t rep_partno;
mddrivename_t *dnp = np->drivenamep;
/*
* make sure that non-local database replicas
* are always on the replica slice.
*/
if (meta_replicaslice(dnp,
&rep_partno, ep) != 0)
return (-1);
if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
return (-1);
if (partno != rep_partno)
return (mddeverror(ep, MDE_REPCOMP_ONLY,
np->dev, sp->setname));
}
if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
ep)) {
return (-1);
}
if ((devsize = metagetsize(np, ep)) == -1)
return (-1);
if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
meta_getminor(np->dev), sp->setno, devsize,
np->cname));
}
/*
* If first disk in set we don't have lb_inittime yet for use as
* mb_setcreatetime so don't go looking for it. WE'll come back
* later and update after the locator block has been created.
* If this isn't the first disk in the set, we have a locator
* block and thus we have lb_inittime. Set mb_setcreatetime to
* lb_inittime.
*/
if (! metaislocalset(sp)) {
if (c.c_dbcnt != 0) {
firstmddb = 0;
inittime = meta_get_lb_inittime(sp, ep);
}
}
/*
* go through and write all master blocks
*/
for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
np = nlp->namep;
if ((fd = open(np->rname, O_RDWR)) < 0)
return (mdsyserror(ep, errno, np->rname));
for (i = 0; i < dbcnt; i++) {
if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
inittime, ep)) {
(void) close(fd);
return (-1);
}
}
(void) close(fd);
}
if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
return (-1);
if (! metaislocalset(sp)) {
dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
if (! mdisok(ep))
return (-1);
if ((sd = metaget_setdesc(sp, ep)) == NULL)
return (-1);
}
/*
* go through and tell kernel to add them
*/
for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
mdcinfo_t *cinfo;
np = nlp->namep;
if ((cinfo = metagetcinfo(np, ep)) == NULL) {
rval = -1;
goto out;
}
/*
* If mddb is being added to MN diskset and there already
* exists a valid mddb in the set (which equates to this
* node being an owner of the set) then use rpc.mdcommd
* mechanism to add mddb(s) so that all nodes stay in sync.
* If set is stale, don't log the message since rpc.mdcommd
* can't write the message to the mddb.
*
* Otherwise, just add mddb to this node.
*/
if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
md_mn_result_t *resultp = NULL;
md_mn_msg_meta_db_attach_t attach;
int send_rval;
/*
* In a scenario where new replicas had been added on
* the master, and then all of the old replicas failed
* before the slaves had knowledge of the new replicas,
* the slaves are unable to re-parse in the mddb
* from the new replicas since the slaves have no
* knowledge of the new replicas. The following
* algorithm solves this problem:
* - META_DB_ATTACH message generates submsgs
* - BLOCK parse (master)
* - MDDB_ATTACH new replicas
* - UNBLOCK parse (master) causing parse
* information to be sent from master
* to slaves at a higher class than the
* unblock so the parse message will
* reach slaves before unblock message.
*/
attach.msg_l_dev = np->dev;
attach.msg_cnt = dbcnt;
attach.msg_dbsize = dbsize;
(void) strncpy(attach.msg_dname, cinfo->dname,
sizeof (attach.msg_dname));
(void) splitname(np->bname, &attach.msg_splitname);
attach.msg_options = options;
/* Set devid to NULL until devids are supported */
attach.msg_devid[0] = NULL;
/*
* If reconfig cycle has been started, this node is
* stuck in in the return step until this command has
* completed. If mdcommd is suspended, ask
* send_message to fail (instead of retrying)
* so that metaset can finish allowing the reconfig
* cycle to proceed.
*/
flags = MD_MSGF_FAIL_ON_SUSPEND;
if (stale_bool == TRUE)
flags |= MD_MSGF_NO_LOG;
send_rval = mdmn_send_message(sp->setno,
MD_MN_MSG_META_DB_ATTACH,
flags, 0, (char *)&attach,
sizeof (md_mn_msg_meta_db_attach_t),
&resultp, ep);
if (send_rval != 0) {
rval = -1;
if (resultp == NULL)
(void) mddserror(ep,
MDE_DS_COMMD_SEND_FAIL,
sp->setno, NULL, NULL,
sp->setname);
else {
(void) mdstealerror(ep,
&(resultp->mmr_ep));
if (mdisok(ep)) {
(void) mddserror(ep,
MDE_DS_COMMD_SEND_FAIL,
sp->setno, NULL, NULL,
sp->setname);
}
free_result(resultp);
}
goto out;
}
if (resultp)
free_result(resultp);
} else {
/* Adding mddb(s) to just this node */
for (i = 0; i < dbcnt; i++) {
(void) memset(&c, 0, sizeof (c));
/* Fill in device/replica info */
c.c_locator.l_dev = meta_cmpldev(np->dev);
c.c_locator.l_blkno = i * dbsize + 16;
blkno = c.c_locator.l_blkno;
(void) strncpy(c.c_locator.l_driver,
cinfo->dname,
sizeof (c.c_locator.l_driver));
if (splitname(np->bname, &c.c_devname) ==
METASPLIT_LONGDISKNAME && devid_in_use ==
FALSE) {
rval = mddeverror(ep,
MDE_DISKNAMETOOLONG,
NODEV64, np->rname);
goto out;
}
c.c_locator.l_mnum = meta_getminor(np->dev);
/* Fill in setno, setname, and sideno */
c.c_setno = sp->setno;
if (! metaislocalset(sp)) {
if (MD_MNSET_DESC(sd)) {
c.c_multi_node = 1;
}
}
(void) strcpy(c.c_setname, sp->setname);
c.c_sideno = sideno;
/*
* Don't need device id information from this
* ioctl Kernel determines device id from
* dev_t, which is just what this code would do.
*/
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = 0;
if (timeval != NULL)
c.c_timestamp = *timeval;
if (setup_med_cfg(sp, &c,
(options & MDCHK_SET_FORCE), ep)) {
rval = -1;
goto out;
}
if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde,
NULL) != 0) {
rval = mdstealerror(ep, &c.c_mde);
goto out;
}
/*
* This is either a traditional diskset OR this
* is the first replica added to a MN diskset.
* In either case, set broadcast to NO_BCAST so
* that message won't go through rpc.mdcommd.
* If this is a traditional diskset, the bcast
* flag is ignored since traditional disksets
* don't use the rpc.mdcommd.
*/
if (meta_db_addsidenms(sp, np, blkno,
DB_ADDSIDENMS_NO_BCAST, ep))
goto out;
}
}
if (! metaislocalset(sp)) {
/* update the dbcnt and size in dd */
for (p = dd; p != NULL; p = p->dd_next)
if (p->dd_dnp == np->drivenamep) {
p->dd_dbcnt = dbcnt;
p->dd_dbsize = dbsize;
break;
}
}
/*
* If this was the first addition of disks to the
* diskset you now need to update the mb_setcreatetime
* which needed lb_inittime which wasn't there until now.
*/
if (firstmddb) {
if (meta_update_mb(sp, dd, ep) != 0) {
return (-1);
}
}
(void) close(fd);
}
out:
if (metaislocalset(sp)) {
/* everything looks fine. Start mdmonitord */
if (rval == 0 && start_svmdaemons == 1) {
if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
mde_perror(&status, "");
mdclrerror(&status);
}
}
if (buildconf(sp, &status)) {
/* Don't mask any previous errors */
if (rval == 0)
rval = mdstealerror(ep, &status);
return (rval);
}
if (meta_db_patch(sysfilename, NULL, 0, &status)) {
/* Don't mask any previous errors */
if (rval == 0)
rval = mdstealerror(ep, &status);
}
} else {
if (update_dbinfo_on_drives(sp, dd,
(options & MDCHK_SET_LOCKED),
(options & MDCHK_SET_FORCE),
&status)) {
/* Don't mask any previous errors */
if (rval == 0)
rval = mdstealerror(ep, &status);
else
mdclrerror(&status);
}
metafreedrivedesc(&dd);
}
/*
* For MN disksets that already had already had nodes joined
* before the attach of this mddb(s), the name invalidation is
* done by the commd handler routine. Otherwise, if this
* is the first attach of a MN diskset mddb, the invalidation
* must be done here since the first attach cannot be sent
* via the commd since there are no nodes joined to the set yet.
*/
if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
(MD_MNSET_DESC(sd) &&
(!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
meta_invalidate_name(nlp->namep);
}
}
return (rval);
}
/*
* deletelist_length
*
* return the number of slices that have been specified for deletion
* on the metadb command line. This does not calculate the number
* of replicas because there may be multiple replicas per slice.
*/
static int
deletelist_length(mdnamelist_t *db_nlp)
{
mdnamelist_t *nlp;
int list_length = 0;
for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
list_length++;
}
return (list_length);
}
static int
in_deletelist(char *devname, mdnamelist_t *db_nlp)
{
mdnamelist_t *nlp;
mdname_t *np;
int index = 0;
for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
np = nlp->namep;
if (strcmp(devname, np->bname) == 0)
return (index);
index++;
}
return (-1);
}
/*
* Delete replicas from set. This happens as a result of:
* - metadb [-s set_name] -d
* - metaset -s set_name -a disk (causes a rebalance of mddbs)
* - metaset -s set_name -d disk
* - metaset -s set_name -b
*
* For a local set, this routine is run on the local set host.
*
* For a traditional diskset, this routine is run on the node that
* is running the metaset command.
*
* For a multinode diskset, this routine is run by the node that is
* running the metaset command. This detach routine is sent to all
* of the joined nodes in the diskset using commd. This keeps
* the nodes in-sync.
*/
int
meta_db_detach(
mdsetname_t *sp,
mdnamelist_t *db_nlp,
mdforceopts_t force_option,
char *sysfilename,
md_error_t *ep
)
{
struct mddb_config c;
mdnamelist_t *nlp;
mdname_t *np;
md_drive_desc *dd = NULL;
md_drive_desc *p;
int replicacount;
int replica_delete_count;
int nr_replica_slices;
int i;
int stop_svmdaemons = 0;
int rval = 0;
int index;
int valid_replicas_nottodelete = 0;
int invalid_replicas_nottodelete = 0;
int invalid_replicas_todelete = 0;
int errored = 0;
int *tag_array;
int fd = -1;
md_error_t status = mdnullerror;
md_set_desc *sd;
int stale_bool = FALSE;
int flags;
/*
* Error if we don't get some work to do.
*/
if (db_nlp == NULL)
return (mdsyserror(ep, EINVAL, NULL));
if (mdnamesareunique(db_nlp, ep) != 0)
return (-1);
(void) memset(&c, 0, sizeof (c));
c.c_id = 0;
c.c_setno = sp->setno;
/* Don't need device id information from this ioctl */
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = 0;
if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
return (mdstealerror(ep, &c.c_mde));
/*
* Is current set STALE?
*/
if (c.c_flags & MDDB_C_STALE) {
stale_bool = TRUE;
}
replicacount = c.c_dbcnt;
assert(db_nlp != NULL);
/*
* go through and gather how many data bases are on each
* device specified.
*/
nr_replica_slices = deletelist_length(db_nlp);
tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
replica_delete_count = 0;
for (i = 0; i < replicacount; i++) {
char *devname;
int found = 0;
c.c_id = i;
/* Don't need device id information from this ioctl */
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = 0;
if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
return (mdstealerror(ep, &c.c_mde));
devname = splicename(&c.c_devname);
if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
Free(devname);
devname = getlongname(&c, ep);
if (devname == NULL) {
return (-1);
}
}
if ((index = in_deletelist(devname, db_nlp)) != -1) {
found = 1;
tag_array[index] = 1;
replica_delete_count++;
}
errored = c.c_locator.l_flags & (MDDB_F_EREAD |
MDDB_F_EWRITE | MDDB_F_TOOSMALL | MDDB_F_EFMT |
MDDB_F_EDATA | MDDB_F_EMASTER);
/*
* There are four combinations of "errored" and "found"
* and they are used to find the number of
* (a) valid/invalid replicas that are not in the delete
* list and are available in the system.
* (b) valid/invalid replicas that are to be deleted.
*/
if (errored && !found) /* errored and !found */
invalid_replicas_nottodelete++;
else if (!found) /* !errored and !found */
valid_replicas_nottodelete++;
else if (errored) /* errored and found */
invalid_replicas_todelete++;
/*
* else it is !errored and found. This means
* valid_replicas_todelete++; But this variable will not
* be used anywhere
*/
Free(devname);
}
index = 0;
for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
np = nlp->namep;
if (tag_array[index++] != 1) {
Free(tag_array);
return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
}
}
Free(tag_array);
/* if all replicas are deleted stop mdmonitord */
if ((replicacount - replica_delete_count) == 0)
stop_svmdaemons = 1;
if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
if (force_option & MDFORCE_NONE)
return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
}
/*
* The following algorithms are followed to check for deletion:
* (a) If the delete list(db_nlp) has all invalid replicas and no valid
* replicas, then deletion should be allowed.
* (b) Deletion should be allowed only if valid replicas that are "not"
* to be deleted is always greater than the invalid replicas that
* are "not" to be deleted.
* (c) If the user uses -f option, then deletion should be allowed.
*/
if ((invalid_replicas_todelete != replica_delete_count) &&
(invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
(force_option != MDFORCE_LOCAL))
return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
/*
* go through and tell kernel to delete them
*/
/* Don't need device id information from this ioctl */
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = 0;
if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
return (mdstealerror(ep, &c.c_mde));
if (! metaislocalset(sp)) {
dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
if (! mdisok(ep))
return (-1);
if ((sd = metaget_setdesc(sp, ep)) == NULL)
return (-1);
}
for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
np = nlp->namep;
/*
* If mddb is being deleted from MN diskset and node is
* an owner of the diskset then use rpc.mdcommd
* mechanism to add mddb(s) so that all nodes stay in sync.
* If set is stale, don't log the message since rpc.mdcommd
* can't write the message to the mddb.
*
* When mddbs are first being added to set, a detach can
* be called before any node has joined the diskset, so
* must check to see if node is an owner of the diskset.
*
* Otherwise, just delete mddb from this node.
*/
if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
md_mn_result_t *resultp;
md_mn_msg_meta_db_detach_t detach;
int send_rval;
/*
* The following algorithm is used to detach replicas.
* - META_DB_DETACH message generates submsgs
* - BLOCK parse (master)
* - MDDB_DETACH replicas
* - UNBLOCK parse (master) causing parse
* information to be sent from master
* to slaves at a higher class than the
* unblock so the parse message will
* reach slaves before unblock message.
*/
(void) splitname(np->bname, &detach.msg_splitname);
/* Set devid to NULL until devids are supported */
detach.msg_devid[0] = NULL;
/*
* If reconfig cycle has been started, this node is
* stuck in in the return step until this command has
* completed. If mdcommd is suspended, ask
* send_message to fail (instead of retrying)
* so that metaset can finish allowing the reconfig
* cycle to proceed.
*/
flags = MD_MSGF_FAIL_ON_SUSPEND;
if (stale_bool == TRUE)
flags |= MD_MSGF_NO_LOG;
send_rval = mdmn_send_message(sp->setno,
MD_MN_MSG_META_DB_DETACH,
flags, 0, (char *)&detach,
sizeof (md_mn_msg_meta_db_detach_t),
&resultp, ep);
if (send_rval != 0) {
rval = -1;
if (resultp == NULL)
(void) mddserror(ep,
MDE_DS_COMMD_SEND_FAIL,
sp->setno, NULL, NULL,
sp->setname);
else {
(void) mdstealerror(ep,
&(resultp->mmr_ep));
if (mdisok(ep)) {
(void) mddserror(ep,
MDE_DS_COMMD_SEND_FAIL,
sp->setno, NULL, NULL,
sp->setname);
}
free_result(resultp);
}
goto out;
}
if (resultp)
free_result(resultp);
} else {
i = 0;
while (i < c.c_dbcnt) {
char *devname;
c.c_id = i;
/* Don't need devid info from this ioctl */
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = 0;
if (metaioctl(MD_DB_GETDEV, &c,
&c.c_mde, NULL)) {
rval = mdstealerror(ep, &c.c_mde);
goto out;
}
devname = splicename(&c.c_devname);
if (strstr(devname, META_LONGDISKNAME_STR)
!= NULL) {
Free(devname);
devname = getlongname(&c, ep);
if (devname == NULL) {
return (-1);
}
}
if (strcmp(devname, np->bname) != 0) {
Free(devname);
i++;
continue;
}
Free(devname);
/* Don't need devid info from this ioctl */
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = 0;
if (metaioctl(MD_DB_DELDEV, &c,
&c.c_mde, NULL) != 0) {
rval = mdstealerror(ep, &c.c_mde);
goto out;
}
/* Not incrementing "i" intentionally */
}
}
if (! metaislocalset(sp)) {
/* update the dbcnt and size in dd */
for (p = dd; p != NULL; p = p->dd_next) {
if (p->dd_dnp == np->drivenamep) {
p->dd_dbcnt = 0;
p->dd_dbsize = 0;
break;
}
}
/*
* Slam a dummy master block and make it self
* identifying
*/
if ((fd = open(np->rname, O_RDWR)) >= 0) {
meta_mkdummymaster(sp, fd, 16);
(void) close(fd);
}
}
}
out:
if (metaislocalset(sp)) {
/*
* Stop all the daemons if there are
* no more replicas so that the module can be
* unloaded.
*/
if (rval == 0 && stop_svmdaemons == 1) {
char buf[MAXPATHLEN];
int i;
for (i = 0; i < DAEMON_COUNT; i++) {
(void) snprintf(buf, MAXPATHLEN,
"/usr/bin/pkill -%s -x %s",
svmd_kill_list[i].svmd_kill_val,
svmd_kill_list[i].svmd_name);
if (pclose(popen(buf, "w")) == -1)
md_perror(buf);
}
if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
mde_perror(&status, "");
mdclrerror(&status);
}
}
if (buildconf(sp, &status)) {
/* Don't mask any previous errors */
if (rval == 0)
rval = mdstealerror(ep, &status);
else
mdclrerror(&status);
return (rval);
}
if (meta_db_patch(sysfilename, NULL, 0, &status)) {
/* Don't mask any previous errors */
if (rval == 0)
rval = mdstealerror(ep, &status);
else
mdclrerror(&status);
}
} else {
if (update_dbinfo_on_drives(sp, dd,
(force_option & MDFORCE_SET_LOCKED),
((force_option & MDFORCE_LOCAL) |
(force_option & MDFORCE_DS)), &status)) {
/* Don't mask any previous errors */
if (rval == 0)
rval = mdstealerror(ep, &status);
else
mdclrerror(&status);
}
metafreedrivedesc(&dd);
}
if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
meta_invalidate_name(nlp->namep);
}
}
return (rval);
}
static md_replica_t *
metareplicaname(
mdsetname_t *sp,
int flags,
struct mddb_config *c,
md_error_t *ep
)
{
md_replica_t *rp;
char *devname;
size_t sz;
devid_nmlist_t *disklist = NULL;
char *devid_str;
/* allocate replicaname */
rp = Zalloc(sizeof (*rp));
/* get device name */
devname = splicename(&c->c_devname);
/*
* Check if the device has a long name (>40 characters) and
* if so then we have to use devids to get the device name.
* If this cannot be done then we have to fail the request.
*/
if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
if (c->c_locator.l_devid != NULL) {
if (meta_deviceid_to_nmlist("/dev/dsk",
(ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
c->c_locator.l_minor_name, &disklist) != 0) {
devid_str = devid_str_encode(
(ddi_devid_t)(uintptr_t)
c->c_locator.l_devid, NULL);
(void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
mderrorextra(ep, devid_str);
if (devid_str != NULL)
devid_str_free(devid_str);
Free(rp);
Free(devname);
return (NULL);
}
} else {
(void) mderror(ep, MDE_NODEVID, "");
Free(rp);
Free(devname);
return (NULL);
}
Free(devname);
devname = disklist[0].devname;
}
if (flags & PRINT_FAST) {
if ((rp->r_namep = metaname_fast(&sp, devname,
LOGICAL_DEVICE, ep)) == NULL) {
Free(devname);
Free(rp);
return (NULL);
}
} else {
if ((rp->r_namep = metaname(&sp, devname,
LOGICAL_DEVICE, ep)) == NULL) {
Free(devname);
Free(rp);
return (NULL);
}
}
Free(devname);
/* make sure it's OK */
if ((! (flags & MD_BASICNAME_OK)) &&
(metachkcomp(rp->r_namep, ep) != 0)) {
Free(rp);
return (NULL);
}
rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR;
rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR;
rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
sz = devid_sizeof((ddi_devid_t)(uintptr_t)
(c->c_locator.l_devid));
if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
(ddi_devid_t)NULL) {
Free(rp);
return (NULL);
}
(void) memcpy((void *)rp->r_devid,
(void *)(uintptr_t)c->c_locator.l_devid, sz);
(void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
rp->r_flags &= ~MDDB_F_NODEVID;
/* Overwrite dev derived from name with dev from devid */
rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
}
(void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
rp->r_blkno = c->c_locator.l_blkno;
if (c->c_dbend != 0)
rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
/* return replica */
return (rp);
}
/*
* free replica list
*/
void
metafreereplicalist(
md_replicalist_t *rlp
)
{
md_replicalist_t *rl = NULL;
for (/* void */; (rlp != NULL); rlp = rl) {
rl = rlp->rl_next;
if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
free(rlp->rl_repp->r_devid);
}
Free(rlp->rl_repp);
Free(rlp);
}
}
/*
* return list of all replicas in set
*/
int
metareplicalist(
mdsetname_t *sp,
int flags,
md_replicalist_t **rlpp,
md_error_t *ep
)
{
md_replicalist_t **tail = rlpp;
int count = 0;
struct mddb_config c;
int i;
char *devid;
/* for each replica */
i = 0;
do {
md_replica_t *rp;
/* get next replica */
(void) memset(&c, 0, sizeof (c));
c.c_id = i;
c.c_setno = sp->setno;
c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
mdclrerror(&c.c_mde);
break; /* handle none at all */
}
(void) mdstealerror(ep, &c.c_mde);
goto out;
}
if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
(void) mdsyserror(ep, ENOMEM, META_DBCONF);
goto out;
}
c.c_locator.l_devid = (uintptr_t)devid;
/*
* Turn on space and sz flags since 'sz' amount of
* space has been alloc'd.
*/
c.c_locator.l_devid_flags =
MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
}
if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
mdclrerror(&c.c_mde);
break; /* handle none at all */
}
(void) mdstealerror(ep, &c.c_mde);
goto out;
}
/*
* Paranoid check - shouldn't happen, but is left as
* a place holder for changes that will be needed after
* dynamic reconfiguration changes are added to SVM (to
* support movement of disks at any point in time).
*/
if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
(void) fprintf(stderr,
dgettext(TEXT_DOMAIN,
"Error: Relocation Information "
"(drvnm=%s, mnum=0x%lx) \n"
"relocation information size changed - \n"
"rerun command\n"),
c.c_locator.l_driver, c.c_locator.l_mnum);
(void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
goto out;
}
if (c.c_dbcnt == 0)
break; /* handle none at all */
/* get info */
if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
goto out;
/* append to list */
*tail = Zalloc(sizeof (**tail));
(*tail)->rl_repp = rp;
tail = &(*tail)->rl_next;
++count;
if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
free(devid);
c.c_locator.l_devid_flags = 0;
}
} while (++i < c.c_dbcnt);
if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
free(devid);
}
/* return count */
return (count);
/* cleanup, return error */
out:
if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
free(devid);
}
metafreereplicalist(*rlpp);
*rlpp = NULL;
return (-1);
}
/*
* meta_sync_db_locations - get list of replicas from kernel and write
* out to mddb.cf and md.conf. 'Syncs up' the replica list in
* the kernel with the replica list in the conf files.
*
*/
void
meta_sync_db_locations(
mdsetname_t *sp,
md_error_t *ep
)
{
char *sname = 0; /* system file name */
char *cname = 0; /* config file name */
if (!metaislocalset(sp))
return;
/* Updates backup of configuration file (aka mddb.cf) */
if (buildconf(sp, ep) != 0)
return;
/* Updates system configuration file (aka md.conf) */
(void) meta_db_patch(sname, cname, 0, ep);
}
/*
* setup_db_locations - parse the mddb.cf file and
* tells the driver which db locations to use.
*/
int
meta_setup_db_locations(
md_error_t *ep
)
{
mddb_config_t c;
FILE *fp;
char inbuff[1024];
char *buff;
uint_t i;
size_t sz;
int rval = 0;
char *devidp;
uint_t devid_size;
char *minor_name = NULL;
ddi_devid_t devid_decode;
int checksum;
/* do mddb.cf file */
(void) memset(&c, '\0', sizeof (c));
if ((fp = fopen(META_DBCONF, "r")) == NULL) {
if (errno != ENOENT)
return (mdsyserror(ep, errno, META_DBCONF));
}
while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
fp)) != NULL)) {
/* ignore comments */
if (*buff == '#')
continue;
/* parse locator */
(void) memset(&c, 0, sizeof (c));
c.c_setno = MD_LOCAL_SET;
i = strcspn(buff, " \t");
if (i > sizeof (c.c_locator.l_driver))
i = sizeof (c.c_locator.l_driver);
(void) strncpy(c.c_locator.l_driver, buff, i);
buff += i;
c.c_locator.l_dev =
makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
c.c_locator.l_mnum = minor(c.c_locator.l_dev);
/* parse out devid */
while (isspace((int)(*buff)))
buff += 1;
i = strcspn(buff, " \t");
if ((devidp = (char *)malloc(i+1)) == NULL)
return (mdsyserror(ep, ENOMEM, META_DBCONF));
(void) strncpy(devidp, buff, i);
devidp[i] = '\0';
if (devid_str_decode(devidp, &devid_decode,
&minor_name) == -1) {
free(devidp);
continue;
}
/* Conf file must have minor name associated with devid */
if (minor_name == NULL) {
free(devidp);
devid_free(devid_decode);
continue;
}
sz = devid_sizeof(devid_decode);
/* Copy to devid size buffer that ioctl expects */
if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
devid_free(devid_decode);
free(minor_name);
free(devidp);
return (mdsyserror(ep, ENOMEM, META_DBCONF));
}
(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
(void *)devid_decode, sz);
devid_free(devid_decode);
if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
free(minor_name);
free(devidp);
free((void *)(uintptr_t)c.c_locator.l_devid);
return (mdsyserror(ep, ENOMEM, META_DBCONF));
}
(void) strcpy(c.c_locator.l_minor_name, minor_name);
free(minor_name);
c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
c.c_locator.l_devid_sz = sz;
devid_size = strlen(devidp);
buff += devid_size;
checksum = strtol(buff, &buff, 10);
for (i = 0; c.c_locator.l_driver[i] != 0; i++)
checksum += c.c_locator.l_driver[i];
for (i = 0; i < devid_size; i++) {
checksum += devidp[i];
}
free(devidp);
checksum += minor(c.c_locator.l_dev);
checksum += c.c_locator.l_blkno;
if (checksum != 42) {
/* overwritten later for more serious problems */
rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
free((void *)(uintptr_t)c.c_locator.l_devid);
continue;
}
c.c_locator.l_flags = 0;
/* use db location */
if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
free((void *)(uintptr_t)c.c_locator.l_devid);
return (mdstealerror(ep, &c.c_mde));
}
/* free up devid if in use */
free((void *)(uintptr_t)c.c_locator.l_devid);
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = 0;
}
if ((fp) && (fclose(fp) != 0))
return (mdsyserror(ep, errno, META_DBCONF));
/* check for stale database */
(void) memset((char *)&c, 0, sizeof (struct mddb_config));
c.c_id = 0;
c.c_setno = MD_LOCAL_SET;
/*
* While we do not need the devid here we may need to
* know if devid's are being used by the kernel for
* the replicas. This is because under some circumstances
* we can only manipulate the SVM configuration if the
* kernel is using devid's.
*/
c.c_locator.l_devid = (uint64_t)0;
c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
c.c_locator.l_devid_sz = 0;
if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
return (mdstealerror(ep, &c.c_mde));
mdclrerror(&c.c_mde);
}
if (c.c_flags & MDDB_C_STALE)
return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
0, NULL));
if (c.c_locator.l_devid_sz != 0) {
/*
* Devid's are being used to track the replicas because
* there is space for a devid.
*/
devid_in_use = TRUE;
}
/* success */
return (rval);
}
/*
* meta_db_minreplica - returns the minimum size replica currently in use.
*/
daddr_t
meta_db_minreplica(
mdsetname_t *sp,
md_error_t *ep
)
{
md_replica_t *r;
md_replicalist_t *rl, *rlp = NULL;
daddr_t nblks = 0;
if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
return (-1);
if (rlp == NULL)
return (-1);
/* find the smallest existing replica */
for (rl = rlp; rl != NULL; rl = rl->rl_next) {
r = rl->rl_repp;
nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
}
metafreereplicalist(rlp);
return (nblks);
}
/*
* meta_get_replica_names
* returns an mdnamelist_t of replica slices
*/
/*ARGSUSED*/
int
meta_get_replica_names(
mdsetname_t *sp,
mdnamelist_t **nlpp,
int options,
md_error_t *ep
)
{
md_replicalist_t *rlp = NULL;
md_replicalist_t *rl;
mdnamelist_t **tailpp = nlpp;
int cnt = 0;
assert(nlpp != NULL);
if (!metaislocalset(sp))
goto out;
/* get replicas */
if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
cnt = -1;
goto out;
}
/* build name list */
for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
/*
* Add the name struct to the end of the
* namelist but keep a pointer to the last
* element so that we don't incur the overhead
* of traversing the list each time
*/
tailpp = meta_namelist_append_wrapper(
tailpp, rl->rl_repp->r_namep);
++cnt;
}
/* cleanup, return count or error */
out:
metafreereplicalist(rlp);
return (cnt);
}