meta_import.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <assert.h>
#include <ctype.h>
#include <libdevinfo.h>
#include <mdiox.h>
#include <meta.h>
#include "meta_repartition.h"
#include "meta_set_prv.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/lvm/md_mddb.h>
#include <sys/lvm/md_names.h>
#include <sys/lvm/md_crc.h>
typedef struct did_list {
void *rdid; /* real did if replicated set */
void *did; /* did stored in lb */
char *devname;
dev_t dev;
uint_t did_index;
char *minor_name;
struct did_list *next;
} did_list_t;
typedef struct replicated_disk {
void *old_devid;
void *new_devid;
struct replicated_disk *next;
} replicated_disk_t;
/*
* The current implementation limits the max device id length to 256 bytes.
* Should the max device id length be increased, this define would have to
* be bumped up accordingly
*/
#define MAX_DEVID_LEN 256
/*
* We store a global list of all the replicated disks in the system. In
* order to prevent us from performing a linear search on this list, we
* store the disks in a two dimensional sparse array. The disks are bucketed
* based on the length of their device ids.
*/
static replicated_disk_t *replicated_disk_list[MAX_DEVID_LEN + 1] = {NULL};
/*
* The list of replicated disks is built just once and this flag is set
* once it's done
*/
static int replicated_disk_list_built = 0;
/*
* Map logical blk to physical
*
* This is based on the routine of the same name in the md kernel module (see
* file md_mddb.c), with the following caveats:
*
* - The kernel routine works on in core master blocks, or mddb_mb_ic_t; this
* routine works instead on the mddb_mb_t read directly from the disk
*/
static daddr_t
getphysblk(
mddb_block_t blk,
mddb_mb_t *mbp
)
{
/*
* Sanity check: is the block within range? If so, we then assume
* that the block range map in the master block is valid and
* consistent with the block count. Unfortunately, there is no
* reliable way to validate this assumption.
*/
if (blk >= mbp->mb_blkcnt || blk >= mbp->mb_blkmap.m_consecutive)
return ((daddr_t)-1);
return (mbp->mb_blkmap.m_firstblk + blk);
}
/*
* drive_append()
*
* Append to tail of linked list of md_im_drive_info_t.
*
* Will allocate space for new node and copy args into new space.
*
* Returns pointer to new node.
*/
static md_im_drive_info_t *
drive_append(
md_im_drive_info_t **midpp,
mddrivename_t *dnp,
void *devid,
void *rdevid,
int devid_sz,
char *minor_name,
md_timeval32_t timestamp,
md_im_replica_info_t *mirp
)
{
md_im_drive_info_t *midp;
int o_devid_sz;
for (; (*midpp != NULL); midpp = &((*midpp)->mid_next))
;
midp = *midpp = Zalloc(sizeof (md_im_drive_info_t));
midp->mid_dnp = dnp;
/*
* If rdevid is not NULL then we know we are dealing with
* replicated diskset case. 'devid_sz' will always be the
* size of a valid devid which can be 'devid' or 'rdevid'
*/
midp->mid_devid = (void *)Malloc(devid_sz);
if (rdevid) {
(void) memcpy(midp->mid_devid, rdevid, devid_sz);
/*
* Also need to store the 'other' devid
*/
o_devid_sz = devid_sizeof((ddi_devid_t)devid);
midp->mid_o_devid = (void *)Malloc(o_devid_sz);
(void) memcpy(midp->mid_o_devid, devid, o_devid_sz);
midp->mid_o_devid_sz = o_devid_sz;
} else {
/*
* In the case of regular diskset, midp->mid_o_devid
* will be a NULL pointer
*/
(void) memcpy(midp->mid_devid, devid, devid_sz);
}
midp->mid_devid_sz = devid_sz;
midp->mid_setcreatetimestamp = timestamp;
(void) strlcpy(midp->mid_minor_name, minor_name, MDDB_MINOR_NAME_MAX);
midp->mid_replicas = mirp;
return (midp);
}
/*
* drive_append_wrapper()
*
* Constant time append wrapper; the append function will always walk the list,
* this will take a tail argument and use the append function on just the tail
* node, doing the appropriate old-tail-next-pointer bookkeeping.
*/
static md_im_drive_info_t **
drive_append_wrapper(
md_im_drive_info_t **tailpp,
mddrivename_t *dnp,
void *devid,
void *rdevid,
int devid_sz,
char *minor_name,
md_timeval32_t timestamp,
md_im_replica_info_t *mirp
)
{
(void) drive_append(tailpp, dnp, devid, rdevid, devid_sz, minor_name,
timestamp, mirp);
if ((*tailpp)->mid_next == NULL)
return (tailpp);
return (&((*tailpp)->mid_next));
}
/*
* replica_append()
*
* Append to tail of linked list of md_im_replica_info_t.
*
* Will allocate space for new node and copy args into new space.
*
* Returns pointer to new node.
*/
static md_im_replica_info_t *
replica_append(
md_im_replica_info_t **mirpp,
int flags,
daddr32_t offset,
daddr32_t length,
md_timeval32_t timestamp
)
{
md_im_replica_info_t *mirp;
for (; (*mirpp != NULL); mirpp = &((*mirpp)->mir_next))
;
mirp = *mirpp = Zalloc(sizeof (md_im_replica_info_t));
mirp->mir_flags = flags;
mirp->mir_offset = offset;
mirp->mir_length = length;
mirp->mir_timestamp = timestamp;
return (mirp);
}
/*
* replica_append_wrapper()
*
* Constant time append wrapper; the append function will always walk the list,
* this will take a tail argument and use the append function on just the tail
* node, doing the appropriate old-tail-next-pointer bookkeeping.
*/
static md_im_replica_info_t **
replica_append_wrapper(
md_im_replica_info_t **tailpp,
int flags,
daddr32_t offset,
daddr32_t length,
md_timeval32_t timestamp
)
{
(void) replica_append(tailpp, flags, offset, length, timestamp);
if ((*tailpp)->mir_next == NULL)
return (tailpp);
return (&(*tailpp)->mir_next);
}
/*
* map_replica_disk()
*
* Searches the device id list for a specific
* disk based on the locator block device id array index.
*
* Returns a pointer to the did_list node if a match was
* found or NULL otherwise.
*/
static did_list_t *
map_replica_disk(
did_list_t *did_listp,
int did_index
)
{
did_list_t *tailp = did_listp;
while (tailp != NULL) {
if (tailp->did_index == did_index)
return (tailp);
tailp = tailp->next;
}
/* not found, return failure */
return (NULL);
}
/*
* replicated_list_lookup()
*
* looks up a replicated disk entry in the global replicated disk list
* based upon the length of that disk's device id. returns the new device id
* for the disk.
* If you store the returned devid you must create a local copy.
*/
static void *
replicated_list_lookup(
uint_t devid_len,
void *old_devid
)
{
replicated_disk_t *head = NULL;
assert(devid_len <= MAX_DEVID_LEN);
head = replicated_disk_list[devid_len];
if (head == NULL)
return (NULL);
do {
if (devid_compare((ddi_devid_t)old_devid,
(ddi_devid_t)head->old_devid) == 0)
return (head->new_devid);
head = head->next;
} while (head != NULL);
return (NULL);
}
/*
* replicated_list_insert()
*
* inserts a replicated disk entry into the global replicated disk list
*/
static void
replicated_list_insert(
size_t old_devid_len,
void *old_devid,
void *new_devid
)
{
replicated_disk_t *repl_disk, **first_entry;
void *repl_old_devid = NULL;
assert(old_devid_len <= MAX_DEVID_LEN);
repl_disk = Zalloc(sizeof (replicated_disk_t));
repl_old_devid = Zalloc(old_devid_len);
(void) memcpy(repl_old_devid, (void *)old_devid, old_devid_len);
repl_disk->old_devid = repl_old_devid;
repl_disk->new_devid = new_devid;
first_entry = &replicated_disk_list[old_devid_len];
if (*first_entry == NULL) {
*first_entry = repl_disk;
return;
}
repl_disk->next = *first_entry;
replicated_disk_list[old_devid_len] = repl_disk;
}
/*
* get_replica_disks()
*
* Will step through the locator records in the supplied locator block, and add
* each one with an active replica to a supplied list of md_im_drive_info_t, and
* add the appropriate replicas to the md_im_replica_info_t contained therein.
*/
static void
get_replica_disks(
md_im_set_desc_t *misp,
did_list_t *did_listp,
mddb_mb_t *mb,
mddb_lb_t *lbp,
md_error_t *ep,
int replicated
)
{
mddrivename_t *dnp;
int indx, on_list;
mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
int flags;
int devid_sz;
char *minor_name;
did_list_t *replica_disk;
daddr32_t offset;
daddr32_t length;
md_timeval32_t timestamp;
md_im_replica_info_t **mirpp = NULL;
md_im_drive_info_t **midpp = &misp->mis_drives;
md_im_drive_info_t *midp;
void *did;
for (indx = 0; indx < lbp->lb_loccnt; indx++) {
on_list = 0;
if (lbp->lb_locators[indx].l_flags & MDDB_F_ACTIVE) {
/*
* search the device id list for a
* specific ctds based on the locator
* block device id array index.
*/
replica_disk = map_replica_disk(did_listp, indx);
assert(replica_disk != NULL);
/*
* metadrivename() can fail for a slice name
* if there is not an existing mddrivename_t.
* So we use metadiskname() to strip the slice
* number.
*/
dnp = metadrivename(&sp,
metadiskname(replica_disk->devname), ep);
for (midp = misp->mis_drives; midp != NULL;
midp = midp->mid_next) {
if (dnp == midp->mid_dnp) {
on_list = 1;
mirpp = &midp->mid_replicas;
break;
}
}
/*
* Get the correct devid_sz
*/
if (replicated)
did = replica_disk->rdid;
else
did = replica_disk->did;
devid_sz = devid_sizeof((ddi_devid_t)did);
minor_name = replica_disk->minor_name;
/*
* New on the list so add it
*/
if (!on_list) {
mddb_mb_t *mbp;
uint_t sliceno;
mdname_t *rsp;
int fd = -1;
mbp = Malloc(DEV_BSIZE);
/* determine the replica slice */
if (meta_replicaslice(dnp, &sliceno,
ep) != 0) {
Free(mbp);
continue;
}
/*
* if the replica slice size is zero,
* don't bother opening
*/
if (dnp->vtoc.parts[sliceno].size == 0) {
Free(mbp);
continue;
}
if ((rsp = metaslicename(dnp, sliceno,
ep)) == NULL) {
Free(mbp);
continue;
}
if ((fd = open(rsp->rname,
O_RDONLY| O_NDELAY)) < 0) {
Free(mbp);
continue;
}
/*
* a drive may not have a master block
*/
if (read_master_block(ep, fd, mbp,
DEV_BSIZE) <= 0) {
mdclrerror(ep);
Free(mbp);
(void) close(fd);
continue;
}
(void) close(fd);
midpp = drive_append_wrapper(midpp, dnp,
replica_disk->did, replica_disk->rdid,
devid_sz, minor_name, mbp->mb_setcreatetime,
NULL);
mirpp = &((*midpp)->mid_replicas);
Free(mbp);
}
/*
* For either of these assertions to fail, it implies
* a NULL return from metadrivename() above. Since
* the args came from a presumed valid locator block,
* that's Bad.
*/
assert(midpp != NULL);
assert(mirpp != NULL);
/*
* Extract the parameters describing this replica.
*
* The magic "1" in the length calculation accounts
* for the length of the master block, in addition to
* the block count it describes. (The master block
* will always take up one block on the disk, and
* there will always only be one master block per
* replica, even though much of the code is structured
* to handle noncontiguous replicas.)
*/
flags = lbp->lb_locators[indx].l_flags;
offset = lbp->lb_locators[indx].l_blkno;
length = mb->mb_blkcnt + 1;
timestamp = mb->mb_setcreatetime;
mirpp = replica_append_wrapper(mirpp, flags,
offset, length, timestamp);
/*
* If we're here it means -
*
* a) we had an active copy of the replica, and
* b) we've added the disk to the list of
* disks as well.
*
* We need to bump up the number of active
* replica count for each such replica so that it
* can be used later for replica quorum check.
*/
misp->mis_active_replicas++;
}
}
}
/*
* get_nonreplica_disks()
*
* Extracts the disks without replicas from the locator name space and adds them
* to the supplied list of md_im_drive_info_t.
*/
static void
get_nonreplica_disks(
md_im_set_desc_t *misp,
mddb_rb_t *did_nm,
mddb_rb_t *did_shrnm,
md_error_t *ep,
int replicated
)
{
char *search_path = "/dev";
devid_nmlist_t *nmlist;
md_im_drive_info_t *midp, **midpp = &misp->mis_drives;
mddrivename_t *dnp;
mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
mddb_rb_t *rbp_did = did_nm;
mddb_rb_t *rbp_did_shr = did_shrnm;
int on_list = 0;
int devid_sz;
struct devid_min_rec *did_rec;
struct devid_shr_rec *did_shr_rec;
struct did_shr_name *did;
struct did_min_name *min;
void *r_did; /* NULL if not a replicated diskset */
void *valid_did;
/*
* We got a pointer to an mddb record, which we expect to contain a
* name record; extract the pointer thereto.
*/
/* LINTED */
did_rec = (struct devid_min_rec *)((caddr_t)(&rbp_did->rb_data));
/* LINTED */
did_shr_rec = (struct devid_shr_rec *)
((caddr_t)(&rbp_did_shr->rb_data));
/*
* Skip the nm_rec_hdr and iterate on the array of struct minor_name
* at the end of the devid_min_rec
*/
for (min = &did_rec->minor_name[0]; min->min_devid_key != 0;
/* LINTED */
min = (struct did_min_name *)((char *)min + DID_NAMSIZ(min))) {
on_list = 0;
r_did = NULL;
/*
* For a give DID_NM key, locate the corresponding device
* id from DID_NM_SHR
*/
for (did = &did_shr_rec->device_id[0]; did->did_key != 0;
/* LINTED */
did = (struct did_shr_name *)
((char *)did + DID_SHR_NAMSIZ(did))) {
/*
* We got a match, this is the device id we're
* looking for
*/
if (min->min_devid_key == did->did_key)
break;
}
if (did->did_key == 0) {
/* we didn't find a match */
assert(did->did_key != 0);
md_exit(NULL, 1);
}
/*
* If replicated diskset
*/
if (replicated) {
size_t new_devid_len;
char *temp;
/*
* In this case, did->did_devid will
* be invalid so lookup the real one
*/
temp = replicated_list_lookup(did->did_size,
did->did_devid);
new_devid_len = devid_sizeof((ddi_devid_t)temp);
r_did = Zalloc(new_devid_len);
(void) memcpy(r_did, temp, new_devid_len);
valid_did = r_did;
} else {
valid_did = did->did_devid;
}
/* Get the ctds mapping for that device id */
if (meta_deviceid_to_nmlist(search_path,
(ddi_devid_t)valid_did,
&min->min_name[0], &nmlist) == 0) {
assert(nmlist->devname != NULL);
/* Don't bother with metadevices, but track disks */
if (!is_metaname(nmlist->devname)) {
dnp = metadrivename(&sp,
metadiskname(nmlist->devname), ep);
assert(dnp != NULL);
/* Is it already on the list? */
for (midp = misp->mis_drives; midp != NULL;
midp = midp->mid_next) {
if (midp->mid_dnp == dnp) {
on_list = 1;
break;
}
}
devid_sz = devid_sizeof(
(ddi_devid_t)valid_did);
if (!on_list) {
mddb_mb_t *mbp;
uint_t sliceno;
mdname_t *rsp;
int fd = -1;
mbp = Malloc(DEV_BSIZE);
/* determine the replica slice */
if (meta_replicaslice(dnp, &sliceno,
ep) != 0) {
Free(mbp);
continue;
}
/*
* if the replica slice size is zero,
* don't bother opening
*/
if (dnp->vtoc.parts[sliceno].size
== 0) {
Free(mbp);
continue;
}
if ((rsp = metaslicename(dnp, sliceno,
ep)) == NULL) {
Free(mbp);
continue;
}
if ((fd = open(rsp->rname,
O_RDONLY| O_NDELAY)) < 0) {
Free(mbp);
continue;
}
/*
* a drive may not have a master block
*/
if (read_master_block(ep, fd, mbp,
DEV_BSIZE) <= 0) {
mdclrerror(ep);
Free(mbp);
(void) close(fd);
continue;
}
(void) close(fd);
/*
* If it is replicated diskset,
* r_did will be non-NULL and
* devid_sz will be its size
*/
midpp = drive_append_wrapper(midpp,
dnp, &did->did_devid, r_did,
devid_sz, &min->min_name[0],
mbp->mb_setcreatetime, NULL);
Free(mbp);
}
}
devid_free_nmlist(nmlist);
}
}
}
/*
* set_append()
*
* Append to tail of linked list of md_im_set_desc_t.
*
* Will allocate space for new node AND populate it by extracting disks with
* and without replicas from the locator blocks and locator namespace.
*
* Returns pointer to new node.
*/
static md_im_set_desc_t *
set_append(
md_im_set_desc_t **mispp,
did_list_t *did_listp,
mddb_mb_t *mb,
mddb_lb_t *lbp,
mddb_rb_t *nm,
mddb_rb_t *did_nm,
mddb_rb_t *did_shrnm,
md_error_t *ep,
int replicated
)
{
md_im_set_desc_t *misp;
set_t setno = mb->mb_setno;
/* run to end of list */
for (; (*mispp != NULL); mispp = &((*mispp)->mis_next))
;
/* allocate new list element */
misp = *mispp = Zalloc(sizeof (md_im_set_desc_t));
if (replicated)
misp->mis_flags = MD_IM_SET_REPLICATED;
misp->mis_oldsetno = setno;
/* Get the disks with and without replicas */
get_replica_disks(misp, did_listp, mb, lbp, ep, replicated);
if (nm != NULL && did_nm != NULL && did_shrnm != NULL) {
get_nonreplica_disks(misp, did_nm, did_shrnm, ep, replicated);
}
/*
* An error in this struct could come from either of the above routines;
* in both cases, we want to pass it back on up.
*/
return (misp);
}
/*
* set_append_wrapper()
*
* Constant time append wrapper; the append function will always walk the list,
* this will take a tail argument and use the append function on just the tail
* node, doing the appropriate old-tail-next-pointer bookkeeping.
*/
static md_im_set_desc_t **
set_append_wrapper(
md_im_set_desc_t **tailpp,
did_list_t *did_listp,
mddb_mb_t *mb,
mddb_lb_t *lbp,
mddb_rb_t *nm,
mddb_rb_t *did_nm,
mddb_rb_t *did_shrnm,
md_error_t *ep,
int replicated
)
{
(void) set_append(tailpp, did_listp, mb, lbp, nm, did_nm,
did_shrnm, ep, replicated);
/* it's the first item in the list, return it instead of the next */
return (((*tailpp)->mis_next == NULL) ? tailpp : &(*tailpp)->mis_next);
}
/*
* add_disk_names()
*
* Iterator to walk the minor node tree of the device snapshot, adding only the
* first non-block instance of each non-cdrom minor node to a list of disks.
*/
static int
add_disk_names(di_node_t node, di_minor_t minor, void *args)
{
char *search_path = "/dev";
ddi_devid_t devid = di_devid(node);
devid_nmlist_t *nm;
char *min = di_minor_name(minor);
md_im_names_t *cnames = (md_im_names_t *)args;
static di_node_t save_node = NULL;
/*
* skip CD devices
* If a device does not have a device id, we can't
* do anything with it so just exclude it from our
* list.
*
* This would also encompass CD devices and floppy
* devices that don't have a device id.
*/
if (devid == NULL) {
return (DI_WALK_CONTINUE);
}
/* char disk devices (as opposed to block) */
if (di_minor_spectype(minor) == S_IFCHR) {
/* only first occurrence (slice 0) of each instance */
if (save_node == NULL || node != save_node) {
save_node = node;
if (meta_deviceid_to_nmlist(search_path, devid,
min, &nm) == 0) {
int index = cnames->min_count++;
assert(nm->devname != NULL);
cnames->min_names =
Realloc(cnames->min_names,
cnames->min_count *
sizeof (char *));
assert(cnames->min_names != NULL);
cnames->min_names[index] =
metadiskname(nm->devname);
devid_free_nmlist(nm);
}
}
}
return (DI_WALK_CONTINUE);
}
/*
* meta_list_disks()
*
* Snapshots the device tree and extracts disk devices from the snapshot.
*/
int
meta_list_disks(md_error_t *ep, md_im_names_t *cnames)
{
di_node_t root_node;
assert(cnames != NULL);
cnames->min_count = 0;
cnames->min_names = NULL;
if ((root_node = di_init("/", DINFOCPYALL|DINFOFORCE))
== DI_NODE_NIL) {
return (mdsyserror(ep, errno, NULL));
}
(void) di_walk_minor(root_node, DDI_NT_BLOCK, 0, cnames,
add_disk_names);
di_fini(root_node);
return (0);
}
/*
* meta_imp_drvused
*
* Checks if given drive is mounted, swapped, part of disk configuration
* or in use by SVM. ep also has error code set up if drive is in use.
*
* Returns 1 if drive is in use.
* Returns 0 if drive is not in use.
*/
int
meta_imp_drvused(
mdsetname_t *sp,
mddrivename_t *dnp,
md_error_t *ep
)
{
md_error_t status = mdnullerror;
md_error_t *db_ep = &status;
/*
* We pass in db_ep to meta_setup_db_locations
* and never ever use the error contained therein
* because all we're interested in is a check to
* see whether any local metadbs are present.
*/
if ((meta_check_drivemounted(sp, dnp, ep) != 0) ||
(meta_check_driveswapped(sp, dnp, ep) != 0) ||
(((meta_setup_db_locations(db_ep) == 0) &&
((meta_check_drive_inuse(sp, dnp, 1, ep) != 0) ||
(meta_check_driveinset(sp, dnp, ep) != 0))))) {
return (1);
} else {
return (0);
}
}
/*
* meta_prune_cnames()
*
* Removes in-use disks from the list prior to further processing.
*
* Return value depends on err_on_prune flag: if set, and one or more disks
* are pruned, the return list will be the pruned disks. If not set, or if no
* disks are pruned, the return list will be the unpruned disks.
*/
mddrivenamelist_t *
meta_prune_cnames(
md_error_t *ep,
md_im_names_t *cnames,
int err_on_prune
)
{
int d;
int fcount = 0;
mddrivenamelist_t *dnlp = NULL;
mddrivenamelist_t **dnlpp = &dnlp;
mddrivenamelist_t *fdnlp = NULL;
mddrivenamelist_t **fdnlpp = &fdnlp;
mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
for (d = 0; d < cnames->min_count; ++d) {
mddrivename_t *dnp;
dnp = metadrivename(&sp, cnames->min_names[d], ep);
if (dnp == NULL) {
/*
* Assuming we're interested in knowing about
* whatever error occurred, but not in stopping.
*/
mde_perror(ep, cnames->min_names[d]);
mdclrerror(ep);
continue;
}
/*
* Check if the drive is inuse.
*/
if (meta_imp_drvused(sp, dnp, ep)) {
fdnlpp = meta_drivenamelist_append_wrapper(fdnlpp, dnp);
fcount++;
mdclrerror(ep);
} else {
dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
}
}
if (fcount) {
if (err_on_prune) {
(void) mddserror(ep, MDE_DS_DRIVEINUSE, 0,
NULL, fdnlp->drivenamep->cname, NULL);
metafreedrivenamelist(dnlp);
return (fdnlp);
}
metafreedrivenamelist(fdnlp);
}
return (dnlp);
}
/*
* read_master_block()
*
* Returns:
* < 0 for failure
* 0 for no valid master block
* 1 for valid master block
*
* The supplied buffer will be filled in for EITHER 0 or 1.
*/
int
read_master_block(
md_error_t *ep,
int fd,
void *bp,
int bsize
)
{
mddb_mb_t *mbp = bp;
int rval = 1;
assert(bp != NULL);
if (lseek(fd, (off_t)dbtob(16), SEEK_SET) < 0)
return (mdsyserror(ep, errno, NULL));
if (read(fd, bp, bsize) != bsize)
return (mdsyserror(ep, errno, NULL));
/*
* The master block magic number can either be MDDB_MAGIC_MB in
* the case of a real master block, or, it can be MDDB_MAGIC_DU
* in the case of a dummy master block
*/
if ((mbp->mb_magic != MDDB_MAGIC_MB) &&
(mbp->mb_magic != MDDB_MAGIC_DU)) {
rval = 0;
(void) mdmddberror(ep, MDE_DB_MASTER, 0, 0, 0, NULL);
}
if (mbp->mb_revision != MDDB_REV_MB) {
rval = 0;
}
return (rval);
}
/*
* read_locator_block()
*
* Returns:
* < 0 for failure
* 0 for no valid locator block
* 1 for valid locator block
*/
int
read_locator_block(
md_error_t *ep,
int fd,
mddb_mb_t *mbp,
void *bp,
int bsize
)
{
mddb_lb_t *lbp = bp;
assert(bp != NULL);
if (lseek(fd, (off_t)dbtob(mbp->mb_blkmap.m_firstblk), SEEK_SET) < 0)
return (mdsyserror(ep, errno, NULL));
if (read(fd, bp, bsize) != bsize)
return (mdsyserror(ep, errno, NULL));
return ((lbp->lb_magic == MDDB_MAGIC_LB) ? 1 : 0);
}
int
phys_read(
md_error_t *ep,
int fd,
mddb_mb_t *mbp,
daddr_t blk,
void *bp,
int bcount
)
{
daddr_t pblk;
if ((pblk = getphysblk(blk, mbp)) < 0)
return (mdmddberror(ep, MDE_DB_BLKRANGE, NODEV32,
MD_LOCAL_SET, blk, NULL));
if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0)
return (mdsyserror(ep, errno, NULL));
if (read(fd, bp, bcount) != bcount)
return (mdsyserror(ep, errno, NULL));
return (bcount);
}
/*
* read_locator_block_did()
*
* Returns:
* < 0 for failure
* 0 for no valid locator name struct
* 1 for valid locator name struct
*/
int
read_locator_block_did(
md_error_t *ep,
int fd,
mddb_mb_t *mbp,
mddb_lb_t *lbp,
void *bp,
int bsize
)
{
int lb_didfirstblk = lbp->lb_didfirstblk;
mddb_did_blk_t *lbdidp = bp;
int rval;
assert(bp != NULL);
if ((rval = phys_read(ep, fd, mbp, lb_didfirstblk, bp, bsize)) < 0)
return (rval);
return ((lbdidp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
}
/*
* read_locator_names()
*
* Returns:
* < 0 for failure
* 0 for no valid locator name struct
* 1 for valid locator name struct
*/
int
read_locator_names(
md_error_t *ep,
int fd,
mddb_mb_t *mbp,
mddb_lb_t *lbp,
void *bp,
int bsize
)
{
int lnfirstblk = lbp->lb_lnfirstblk;
mddb_ln_t *lnp = bp;
int rval;
assert(bp != NULL);
if ((rval = phys_read(ep, fd, mbp, lnfirstblk, bp, bsize)) < 0)
return (rval);
return ((lnp->ln_magic == MDDB_MAGIC_LN) ? 1 : 0);
}
int
read_database_block(
md_error_t *ep,
int fd,
mddb_mb_t *mbp,
int dbblk,
void *bp,
int bsize
)
{
mddb_db_t *dbp = bp;
int rval;
assert(bp != NULL);
if ((rval = phys_read(ep, fd, mbp, dbblk, bp, bsize)) < 0)
return (rval);
return ((dbp->db_magic == MDDB_MAGIC_DB) ? 1 : 0);
}
int
read_loc_didblks(
md_error_t *ep,
int fd,
mddb_mb_t *mbp,
int didblk,
void *bp,
int bsize
)
{
mddb_did_blk_t *didbp = bp;
int rval;
assert(bp != NULL);
if ((rval = phys_read(ep, fd, mbp, didblk, bp, bsize)) < 0)
return (rval);
return ((didbp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
}
int
read_loc_didinfo(
md_error_t *ep,
int fd,
mddb_mb_t *mbp,
int infoblk,
void *bp,
int bsize
)
{
int rval = 1;
mddb_did_info_t *infop = bp;
assert(bp != NULL);
if ((rval = phys_read(ep, fd, mbp, infoblk, bp, bsize)) < 0)
return (rval);
return ((infop->info_flags & MDDB_DID_EXISTS) ? 1 : 0);
}
/*
* meta_nm_rec()
*
* Return the DE corresponding to the requested namespace record type.
* Modifies dbp to have a firstentry if one isn't there.
*/
static mddb_de_t *
meta_nm_rec(mddb_db_t *dbp, mddb_type_t rectype)
{
mddb_de_t *dep;
int desize;
if (dbp->db_firstentry != NULL) {
/* LINTED */
dep = (mddb_de_t *)((caddr_t)(&dbp->db_firstentry)
+ sizeof (dbp->db_firstentry));
dbp->db_firstentry = dep;
while (dep && dep->de_next) {
desize = sizeof (*dep) - sizeof (dep->de_blks) +
sizeof (daddr_t) * dep->de_blkcount;
/* LINTED */
dep->de_next = (mddb_de_t *)
((caddr_t)dep + desize);
dep = dep->de_next;
}
}
for (dep = dbp->db_firstentry; dep != NULL; dep = dep->de_next) {
if (dep->de_type1 == rectype)
break;
}
return (dep);
}
/*
* read_nm_rec()
*
* Reads the NM, NM_DID or NM_DID_SHR record in the mddb and stores the
* configuration data in the buffer 'nm'
*
* Returns:
* < 0 for failure
* 0 for no valid NM/DID_NM/DID_NM_SHR record
* 1 for valid NM/DID_NM/DID_NM_SHR record
*
*/
static int
read_nm_rec(
md_error_t *ep,
int fd,
mddb_mb_t *mbp,
mddb_lb_t *lbp,
char **nm,
mddb_type_t rectype,
char *diskname
)
{
int cnt, dbblk, rval = 0;
char db[DEV_BSIZE];
mddb_de_t *dep;
/*LINTED*/
mddb_db_t *dbp = (mddb_db_t *)&db;
char *tmpnm = NULL;
daddr_t pblk;
for (dbblk = lbp->lb_dbfirstblk;
dbblk != 0;
dbblk = dbp->db_nextblk) {
if ((rval = read_database_block(ep, fd, mbp, dbblk, dbp,
sizeof (db))) <= 0)
return (rval);
/*
* Locate NM/DID_NM/DID_NM_SHR record. Normally there is
* only one record per mddb. There is a rare case when we
* can't expand the record. If this is the case then we
* will have multiple NM/DID_NM/DID_NM_SHR records linked
* with r_next_recid.
*
* For now assume the normal case and handle the extended
* namespace in Phase 2.
*/
if ((dep = meta_nm_rec(dbp, rectype)) != NULL)
break;
}
/* If meta_nm_rec() never succeeded, bail out */
if (dep == NULL)
return (0);
/* Read in the appropriate record and return configurations */
tmpnm = (char *)Zalloc(dbtob(dep->de_blkcount));
*nm = tmpnm;
for (cnt = 0; cnt < dep->de_blkcount; cnt++) {
if ((pblk = getphysblk(dep->de_blks[cnt], mbp)) < 0) {
rval = mdmddberror(ep, MDE_DB_BLKRANGE,
NODEV32, MD_LOCAL_SET,
dep->de_blks[cnt], diskname);
return (rval);
}
if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0) {
rval = mdsyserror(ep, errno, diskname);
return (rval);
}
if (read(fd, tmpnm, DEV_BSIZE) != DEV_BSIZE) {
rval = mdsyserror(ep, errno, diskname);
return (rval);
}
tmpnm += DEV_BSIZE;
}
return (1);
}
/*
* is_replicated
*
* Determines whether a disk has been replicated or not. It checks to see
* if the device id stored in the master block is the same as the device id
* registered for that disk on the current system. If the two device ids are
* different, then we know that the disk has been replicated.
*
* If need_devid is set and the disk is replicated, fill in the new_devid.
* Also, if need_devid is set, this routine allocates memory for the device
* ids; the caller of this routine is responsible for free'ing up the memory.
*
* Returns:
* 1 if it's a replicated disk
* 0 if it's not a replicated disk
*/
static int
is_replicated(
int fd,
mddb_mb_t *mbp,
int need_devid,
void **new_devid
)
{
ddi_devid_t current_devid;
int retval = 0;
size_t new_devid_len;
if (mbp->mb_devid_magic != MDDB_MAGIC_DE)
return (retval);
if (devid_get(fd, &current_devid) != 0)
return (retval);
if (devid_compare((ddi_devid_t)mbp->mb_devid, current_devid) != 0)
retval = 1;
if (retval && need_devid) {
new_devid_len = devid_sizeof(current_devid);
*new_devid = Zalloc(new_devid_len);
(void) memcpy(*new_devid, (void *)current_devid, new_devid_len);
}
devid_free(current_devid);
return (retval);
}
/*
* free_replicated_disks_list()
*
* this frees up all the memory allocated by build_replicated_disks_list
*/
static void
free_replicated_disks_list()
{
replicated_disk_t **repl_disk, *temp;
int index;
for (index = 0; index <= MAX_DEVID_LEN; index++) {
repl_disk = &replicated_disk_list[index];
while (*repl_disk != NULL) {
temp = *repl_disk;
*repl_disk = (*repl_disk)->next;
Free(temp->old_devid);
Free(temp->new_devid);
Free(temp);
}
}
}
/*
* build_replicated_disks_list()
*
* Builds a list of disks that have been replicated using either a
* remote replication or a point-in-time replication software. The
* list is stored as a two dimensional sparse array.
*
* Returns
* 1 on success
* 0 on failure
*/
static int
build_replicated_disks_list(
md_error_t *ep,
mddrivenamelist_t *dnlp
)
{
uint_t sliceno;
int fd = -1;
mddrivenamelist_t *dp;
mdname_t *rsp;
mddb_mb_t *mbp;
mbp = Malloc(DEV_BSIZE);
for (dp = dnlp; dp != NULL; dp = dp->next) {
mddrivename_t *dnp;
void *new_devid;
dnp = dp->drivenamep;
/* determine the replica slice */
if (meta_replicaslice(dnp, &sliceno, ep) != 0)
continue;
/*
* if the replica slice size is zero, don't bother opening
*/
if (dnp->vtoc.parts[sliceno].size == 0)
continue;
if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL)
continue;
if ((fd = open(rsp->rname, O_RDONLY| O_NDELAY)) < 0)
return (mdsyserror(ep, errno, rsp->rname));
/* a drive may not have a master block so we just continue */
if (read_master_block(ep, fd, mbp, DEV_BSIZE) <= 0) {
(void) close(fd);
mdclrerror(ep);
continue;
}
if (is_replicated(fd, mbp, 1, &new_devid)) {
replicated_list_insert(mbp->mb_devid_len,
mbp->mb_devid, new_devid);
}
(void) close(fd);
}
replicated_disk_list_built = 1;
Free(mbp);
return (1);
}
/*
* free_did_list()
*
* Frees the did_list allocated as part of build_did_list
*/
static void
free_did_list(
did_list_t *did_listp
)
{
did_list_t *temp, *head;
head = did_listp;
while (head != NULL) {
temp = head;
head = head->next;
if (temp->rdid)
Free(temp->rdid);
if (temp->did)
Free(temp->did);
if (temp->devname)
Free(temp->devname);
if (temp->minor_name)
Free(temp->minor_name);
Free(temp);
}
}
/*
* build_did_list()
*
* Build a list of device ids corresponding to disks in the locator block.
* Memory is allocated here for the nodes in the did_list. The callers of
* this routine must also call free_did_list to free up the memory after
* they're done.
*
* Returns:
* < 0 for failure
* 0 for no valid locator block device id array
* 1 for valid locator block device id array
* ENOTSUP partial diskset, not all disks in a diskset on the
* system where import is being executed
*/
static int
build_did_list(
md_error_t *ep,
int fd,
mddb_mb_t *mb,
mddb_did_blk_t *lbdidp,
did_list_t **did_listp,
int replicated
)
{
char *search_path = "/dev";
char *minor_name;
int rval, cnt;
devid_nmlist_t *nm;
uint_t did_info_length = 0;
uint_t did_info_firstblk = 0;
did_list_t *new, *head = NULL;
char *bp = NULL, *temp;
mddb_did_info_t *did_info = NULL;
void *did = NULL;
size_t new_devid_len;
for (cnt = 0; cnt < MDDB_NLB; cnt++) {
did_info = &lbdidp->blk_info[cnt];
if (!(did_info->info_flags & MDDB_DID_EXISTS))
continue;
new = Zalloc(sizeof (did_list_t));
new->did = Zalloc(did_info->info_length);
/*
* If we can re-use the buffer already has been
* read in then just use it. Otherwise free
* the previous one and alloc a new one
*/
if (dbtob(did_info->info_blkcnt) != did_info_length &&
did_info->info_firstblk != did_info_firstblk) {
did_info_length = dbtob(did_info->info_blkcnt);
did_info_firstblk = did_info->info_firstblk;
if (bp)
Free(bp);
bp = temp = Zalloc(did_info_length);
if ((rval = phys_read(ep, fd, mb, did_info_firstblk,
(void *)bp, did_info_length)) < 0)
return (rval);
} else {
temp = bp;
}
temp += did_info->info_offset;
(void) memcpy(new->did, temp, did_info->info_length);
new->did_index = cnt;
minor_name = did_info->info_minor_name;
/*
* If we are not able to find the ctd mapping corresponding
* to a given device id, it probably means the device id in
* question is not registered with the system.
*
* Highly likely that the only time this happens, we've hit
* a case where not all the disks that are a part of the
* diskset were moved before importing the diskset.
*
* If set is a replicated diskset, then the device id we get
* from 'lb' will be the 'other' did and we need to lookup
* the real one before we call this routine.
*/
if (replicated) {
temp = replicated_list_lookup(did_info->info_length,
new->did);
new_devid_len = devid_sizeof((ddi_devid_t)temp);
new->rdid = Zalloc(new_devid_len);
(void) memcpy(new->rdid, temp, new_devid_len);
did = new->rdid;
} else {
did = new->did;
}
if (devid_valid((ddi_devid_t)(did)) == 0) {
return (-1);
}
if ((rval = meta_deviceid_to_nmlist(search_path,
(ddi_devid_t)did, minor_name, &nm)) != 0) {
*did_listp = head;
free_did_list(*did_listp);
*did_listp = NULL;
(void) mddserror(ep, MDE_DS_PARTIALSET, MD_SET_BAD,
mynode(), NULL, NULL);
return (ENOTSUP);
}
assert(nm->devname != NULL);
new->devname = Strdup(nm->devname);
new->dev = nm->dev;
new->minor_name = Strdup(minor_name);
devid_free_nmlist(nm);
new->next = head;
head = new;
}
/* Free the last bp */
if (bp)
Free(bp);
*did_listp = head;
return (1);
}
/*
* meta_get_set_info
*
* Scans a given drive for set specific information. If the given drive
* has a shared metadb, scans the shared metadb for information pertaining
* to the set.
*
* Returns:
* <0 for failure
* 0 success but no replicas were found
* 1 success and a replica was found
* ENOTSUP for partial disksets detected
*/
int
meta_get_set_info(
mddrivenamelist_t *dp,
md_im_set_desc_t **mispp,
int local_mb_ok,
md_error_t *ep
)
{
uint_t s;
mdname_t *rsp;
int fd;
char mb[DEV_BSIZE];
/*LINTED*/
mddb_mb_t *mbp = (mddb_mb_t *)mb;
char lb[dbtob(MDDB_LBCNT)];
/*LINTED*/
mddb_lb_t *lbp = (mddb_lb_t *)lb;
mddb_did_blk_t *lbdidp = NULL;
mddb_ln_t *lnp = NULL;
int lnsize, lbdid_size;
int rval = 0;
char db[DEV_BSIZE];
/*LINTED*/
mddb_db_t *dbp = (mddb_db_t *)db;
did_list_t *did_listp = NULL;
mddrivenamelist_t *dnlp;
mddrivename_t *dnp;
md_im_names_t cnames = { 0, NULL};
char *nm = NULL;
char *did_nm = NULL, *did_shrnm = NULL;
struct nm_rec *nmp;
struct devid_shr_rec *did_shrnmp;
struct devid_min_rec *did_nmp;
int extended_namespace = 0;
int replicated = 0;
dnp = dp->drivenamep;
/*
* Determine and open the replica slice
*/
if (meta_replicaslice(dnp, &s, ep) != 0) {
return (-1);
}
/*
* Test for the size of replica slice in question. If
* the size is zero, we know that this is not a disk that was
* part of a set and it should be silently ignored for import.
*/
if (dnp->vtoc.parts[s].size == 0)
return (0);
if ((rsp = metaslicename(dnp, s, ep)) == NULL) {
return (-1);
}
if ((fd = open(rsp->rname, O_RDONLY|O_NDELAY)) < 0)
return (mdsyserror(ep, errno, rsp->cname));
/*
* After the open() succeeds, we should return via the "out"
* label to clean up after ourselves. (Up 'til now, we can
* just return directly, because there are no resources to
* give back.)
*/
if ((rval = read_master_block(ep, fd, mbp, sizeof (mb))) <= 0)
goto out;
replicated = is_replicated(fd, mbp, 0, NULL);
if (!local_mb_ok && mbp->mb_setno == 0) {
rval = 0;
goto out;
}
if ((rval = read_locator_block(ep, fd, mbp, lbp, sizeof (lb))) <= 0)
goto out;
/*
* Once the locator block has been read, we need to
* check if the locator block commit count is zero.
* If it is zero, we know that the replica we're dealing
* with is on a disk that was deleted from the disk set;
* and, it potentially has stale data. We need to quit
* in that case
*/
if (lbp->lb_commitcnt == 0) {
rval = 0;
goto out;
}
/*
* Make sure that the disk being imported has device id
* namespace present for disksets. If a disk doesn't have
* device id namespace, we skip reading the replica on that disk
*/
if (!(lbp->lb_flags & MDDB_DEVID_STYLE)) {
rval = 0;
goto out;
}
/*
* Grab the locator block device id array. Allocate memory for the
* array first.
*/
lbdid_size = dbtob(lbp->lb_didblkcnt);
lbdidp = Zalloc(lbdid_size);
if ((rval = read_locator_block_did(ep, fd, mbp, lbp, lbdidp,
lbdid_size)) <= 0)
goto out;
/*
* For a disk that has not been replicated, extract the device ids
* stored in the locator block device id array and store them in
* a list.
*
* If the disk has been replicated using replication software such
* as HDS Truecopy/ShadowImage or EMC SRDF/BCV, the device ids in
* the locator block are invalid and we need to build a list of
* replicated disks.
*/
if (replicated && !replicated_disk_list_built) {
/*
* if there's a replicated diskset involved, we need to
* scan the system one more time and build a list of all
* candidate disks that might be part of that replicated set
*/
if (meta_list_disks(ep, &cnames) != 0) {
rval = 0;
goto out;
}
dnlp = meta_prune_cnames(ep, &cnames, 0);
rval = build_replicated_disks_list(ep, dnlp);
if (rval == 0)
goto out;
}
rval = build_did_list(ep, fd, mbp, lbdidp, &did_listp, replicated);
if ((rval <= 0) || (rval == ENOTSUP))
goto out;
/*
* Until here, we've gotten away with fixed sizes for the
* master block and locator block. The locator names,
* however, are sized (and therefore allocated) dynamically
* according to information in the locator block.
*/
lnsize = dbtob(lbp->lb_lnblkcnt);
lnp = Zalloc(lnsize);
if ((rval = read_locator_names(ep, fd, mbp, lbp, lnp, lnsize)) <= 0)
goto out;
/*
* Read in the NM record
* If no NM record was found, it still is a valid configuration
* but it also means that we won't find any corresponding DID_NM
* or DID_SHR_NM.
*/
if ((rval = read_nm_rec(ep, fd, mbp, lbp, &nm, MDDB_NM, rsp->cname))
< 0)
goto out;
else if (rval == 0)
goto append;
/*
* At this point, we have read in all of the blocks that form
* the nm_rec. We should at least detect the corner case
* mentioned above, in which r_next_recid links to another
* nm_rec. Extended namespace handling is left for Phase 2.
*
* What this should really be is a loop, each iteration of
* which reads in a nm_rec and calls the set_append_wrapper().
*/
/*LINTED*/
nmp = (struct nm_rec *)(nm + sizeof (mddb_rb_t));
if (nmp->r_rec_hdr.r_next_recid != (mddb_recid_t)0) {
extended_namespace = 1;
rval = 0;
goto out;
}
if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_nm,
MDDB_DID_NM, rsp->cname)) < 0)
goto out;
else if (rval == 0)
goto append;
/*LINTED*/
did_nmp = (struct devid_min_rec *)(did_nm + sizeof (mddb_rb_t));
if (did_nmp->min_rec_hdr.r_next_recid != (mddb_recid_t)0) {
extended_namespace = 1;
rval = 0;
goto out;
}
if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_shrnm,
MDDB_DID_SHR_NM, rsp->cname)) < 0)
goto out;
else if (rval == 0)
goto append;
/*LINTED*/
did_shrnmp = (struct devid_shr_rec *)(did_shrnm + sizeof (mddb_rb_t));
if (did_shrnmp->did_rec_hdr.r_next_recid != (mddb_recid_t)0) {
extended_namespace = 1;
rval = 0;
goto out;
}
append:
/* Finally, we've got what we need to process this replica. */
mispp = set_append_wrapper(mispp, did_listp, mbp, lbp,
/*LINTED*/
(mddb_rb_t *)nm, (mddb_rb_t *)did_nm, (mddb_rb_t *)did_shrnm,
ep, replicated);
/* Return the fact that we found at least one set */
rval = 1;
out:
if (fd >= 0)
(void) close(fd);
if (did_listp != NULL)
free_did_list(did_listp);
if (lnp != NULL)
Free(lnp);
if (nm != NULL)
Free(nm);
if (did_nm != NULL)
Free(did_nm);
if (did_shrnm != NULL)
Free(did_shrnm);
/*
* If we are at the end of the list, we must free up
* the replicated list too
*/
if (dp->next == NULL)
free_replicated_disks_list();
if (extended_namespace)
return (mddserror(ep, MDE_DS_EXTENDEDNM, MD_SET_BAD,
mynode(), NULL, NULL));
return (rval);
}
/*
* Return the minor name associated with a given disk slice
*/
static char *
meta_getminor_name(
char *devname,
md_error_t *ep
)
{
int fd = -1;
char *minor_name = NULL;
char *ret_minor_name = NULL;
if (devname == NULL)
return (NULL);
if ((fd = open(devname, O_RDONLY|O_NDELAY, 0)) < 0) {
(void) mdsyserror(ep, errno, devname);
return (NULL);
}
if (devid_get_minor_name(fd, &minor_name) == 0) {
ret_minor_name = Strdup(minor_name);
devid_str_free(minor_name);
}
(void) close(fd);
return (ret_minor_name);
}
static int
meta_replica_quorum(
md_im_set_desc_t *misp,
md_error_t *ep
)
{
md_im_drive_info_t *midp;
mddrivename_t *dnp;
md_im_replica_info_t *midr;
mdname_t *np;
struct stat st_buf;
uint_t rep_slice;
int replica_count = 0;
for (midp = misp->mis_drives; midp != NULL;
midp = midp->mid_next) {
dnp = midp->mid_dnp;
if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
((np = metaslicename(dnp, rep_slice, ep))
== NULL)) {
mdclrerror(ep);
continue;
}
if (stat(np->bname, &st_buf) != 0)
continue;
/*
* The drive is okay now count its replicas
*/
for (midr = midp->mid_replicas; midr != NULL;
midr = midr->mir_next) {
replica_count++;
}
}
if (replica_count < (misp->mis_active_replicas + 1)/2)
return (-1);
return (0);
}
static set_t
meta_imp_setno(
md_error_t *ep
)
{
set_t max_sets, setno;
int bool;
if ((max_sets = get_max_sets(ep)) == 0) {
return (MD_SET_BAD);
}
/*
* This code needs to be expanded when we run in SunCluster
* environment SunCluster obtains setno internally
*/
for (setno = 1; setno < max_sets; setno++) {
if (clnt_setnumbusy(mynode(), setno,
&bool, ep) == -1) {
setno = MD_SET_BAD;
break;
}
/*
* found one available
*/
if (bool == FALSE)
break;
}
if (setno == max_sets) {
setno = MD_SET_BAD;
}
return (setno);
}
int
meta_imp_set(
md_im_set_desc_t *misp,
char *setname,
int force,
bool_t dry_run,
md_error_t *ep
)
{
md_timeval32_t tp;
md_im_drive_info_t *midp;
uint_t rep_slice;
mddrivename_t *dnp;
struct mddb_config c;
mdname_t *np;
md_im_replica_info_t *mirp;
char setnum_link[MAXPATHLEN];
char setname_link[MAXPATHLEN];
char *minor_name = NULL;
(void) memset(&c, 0, sizeof (c));
(void) strlcpy(c.c_setname, setname, sizeof (c.c_setname));
c.c_sideno = 0;
c.c_flags = MDDB_C_IMPORT;
/*
* Check to see if the setname that the set is being imported into,
* already exists.
*/
if (getsetbyname(c.c_setname, ep) != NULL) {
return (mddserror(ep, MDE_DS_SETNAMEBUSY, MD_SET_BAD,
mynode(), NULL, c.c_setname));
}
/*
* Find the next available set number
*/
if ((c.c_setno = meta_imp_setno(ep)) == MD_SET_BAD) {
return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
mynode(), NULL, c.c_setname));
}
if (meta_gettimeofday(&tp) == -1) {
return (mdsyserror(ep, errno, NULL));
}
c.c_timestamp = tp;
/* Check to see if replica quorum requirement is fulfilled */
if (!force && meta_replica_quorum(misp, ep) == -1)
return (mddserror(ep, MDE_DS_INSUFQUORUM, MD_SET_BAD,
mynode(), NULL, c.c_setname));
for (midp = misp->mis_drives; midp != NULL;
midp = midp->mid_next) {
mdcinfo_t *cinfo;
/*
* We pass down the list of the drives in the
* set down to the kernel irrespective of
* whether the drives have a replica or not.
*
* The kernel detects which of the drives don't
* have a replica and accordingly does the
* right thing.
*/
dnp = midp->mid_dnp;
if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
((np = metaslicename(dnp, rep_slice, ep))
== NULL)) {
mdclrerror(ep);
continue;
}
(void) strcpy(c.c_locator.l_devname, np->bname);
c.c_locator.l_dev = meta_cmpldev(np->dev);
c.c_locator.l_mnum = meta_getminor(np->dev);
c.c_locator.l_devid = (uintptr_t)Malloc(midp->mid_devid_sz);
(void) memcpy((void *)c.c_locator.l_devid, midp->mid_devid,
midp->mid_devid_sz);
c.c_locator.l_devid_sz = midp->mid_devid_sz;
c.c_locator.l_devid_flags =
MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
if (midp->mid_o_devid) {
c.c_locator.l_old_devid =
(uint64_t)Malloc(midp->mid_o_devid_sz);
(void) memcpy((void *)c.c_locator.l_old_devid,
midp->mid_o_devid, midp->mid_o_devid_sz);
c.c_locator.l_old_devid_sz = midp->mid_o_devid_sz;
}
minor_name = meta_getminor_name(np->bname, ep);
(void) strncpy(c.c_locator.l_minor_name, minor_name,
sizeof (c.c_locator.l_minor_name));
if ((cinfo = metagetcinfo(np, ep)) == NULL) {
mdclrerror(ep);
continue;
}
(void) strncpy(c.c_locator.l_driver, cinfo->dname,
sizeof (c.c_locator.l_driver));
mirp = midp->mid_replicas;
do {
if (mirp) {
c.c_locator.l_flags = 0;
c.c_locator.l_blkno = mirp->mir_offset;
mirp = mirp->mir_next;
} else {
/*
* Default offset for dummy is 16
*/
c.c_locator.l_blkno = 16;
}
if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
Free((void *)c.c_locator.l_devid);
if (c.c_locator.l_old_devid)
Free((void *)c.c_locator.l_old_devid);
return (mdstealerror(ep, &c.c_mde));
}
} while (mirp != NULL);
}
/*
* If the dry run option was specified, flag success
* and exit out
*/
if (dry_run == 1) {
md_eprintf("%s\n", dgettext(TEXT_DOMAIN,
"import should be successful"));
Free((void *)c.c_locator.l_devid);
if (c.c_locator.l_old_devid)
Free((void *)c.c_locator.l_old_devid);
return (0);
}
/*
* Now kernel should have all the information
* regarding the import diskset replica.
* Tell kernel to load them up and import the set
*/
if (metaioctl(MD_IOCIMP_LOAD, &c.c_setno, &c.c_mde, NULL) != 0) {
Free((void *)c.c_locator.l_devid);
if (c.c_locator.l_old_devid)
Free((void *)c.c_locator.l_old_devid);
return (mdstealerror(ep, &c.c_mde));
}
(void) meta_smf_enable(META_SMF_DISKSET, NULL);
/* The set has now been imported, create the appropriate symlink */
(void) snprintf(setname_link, MAXPATHLEN, "/dev/md/%s", setname);
(void) snprintf(setnum_link, MAXPATHLEN, "shared/%d", c.c_setno);
/*
* Since we already verified that the setname was OK, make sure to
* cleanup before proceeding.
*/
if (unlink(setname_link) == -1) {
if (errno != ENOENT)
(void) mdsyserror(ep, errno, setname_link);
}
if (symlink(setnum_link, setname_link) == -1)
(void) mdsyserror(ep, errno, setname_link);
/* resnarf the set that has just been imported */
if (clnt_resnarf_set(mynode(), c.c_setno, ep) != 0)
md_eprintf("%s\n", dgettext(TEXT_DOMAIN, "Please stop and "
"restart rpc.metad"));
Free((void *)c.c_locator.l_devid);
if (c.c_locator.l_old_devid)
Free((void *)c.c_locator.l_old_devid);
return (0);
}