mirror_ioctl.c revision d7cd82522afdd890a66c7600b499590ad44e84bd
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
extern int md_status;
extern kcondvar_t md_cv;
extern md_ops_t mirror_md_ops;
extern int md_ioctl_cnt;
extern md_krwlock_t md_unit_array_rw;
extern mdq_anchor_t md_ff_daemonq;
extern void md_probe_one();
extern void mirror_openfail_console_info();
#ifdef DEBUG
extern int mirror_debug_flag;
#endif
static void
{
/*
* Release the block on writes to the mirror and resume any blocked
* resync thread.
* This is only required for MN sets
*/
#ifdef DEBUG
if (mirror_debug_flag)
#endif
un->un_suspend_wr_flag = 0;
}
}
{
mdi_unit_t *ui;
return (NULL);
}
return (NULL);
}
}
return (NULL);
}
return ((mm_unit_t *)1);
}
return (NULL);
}
if (flags & ARRAY_WRITER)
else if (flags & ARRAY_READER)
else /* RD_LOCK */
}
return (NULL);
}
return (un);
}
static int
void *d,
int mode
)
{
int err;
int i;
md_set_params_t *msp = d;
return (0);
/*
* Create the db record for this mdstruct
* We don't store incore elements ondisk
*/
#if defined(_ILP32)
#else
#endif
} else {
/*
* It's important to use the correct size here
*/
}
if (recid < 0)
/* Resize to include incore fields */
/*
* It is okay that we muck with the mdstruct here,
* since no one else will know about the mdstruct
* until we commit it. If we crash, the record will
* be automatically purged, since we haven't
* committed it yet.
*/
/* copy in the user's mdstruct */
return (EFAULT);
}
/* All 64 bit metadevices only support EFI labels. */
}
for (i = 0; i < NMIRROR; i++) {
struct mm_submirror *sm;
continue;
/* ensure that the submirror is a metadevice */
continue;
/* mirror creation should fail here */
}
return (err);
}
/*
* Update unit availability
*/
return (0);
}
static int
void *migp,
int mode,
)
{
return (0);
return (0);
}
return (EFAULT);
}
return (EFAULT);
return (0);
}
static int
void *mgdp,
int mode,
)
{
int cnt;
int i;
return (0);
continue;
return (ENODEV);
}
return (EFAULT);
++udevs;
}
++cnt;
}
return (0);
}
static int
)
{
mdi_unit_t *ui;
return (0);
}
/* single thread */
(void) md_unit_openclose_enter(ui);
if (md_unit_isopen(ui)) {
}
int smi;
continue;
}
}
}
/*
* Update unit availability
*/
/*
* If MN set, reset s_un_next so all nodes can have
* the same view of the next available slot when
* nodes are -w and -j
*/
if (MD_MNSET_SETNO(setno)) {
}
return (0);
}
static int
)
{
return (0);
}
static int
)
{
return (0);
}
static int
)
{
}
static int
)
{
return (0);
}
static int
{
int ci;
int i;
int compcnt;
void (*get_dev)();
int nkeys;
for (i = 0; i < NMIRROR; i++) {
continue;
get_dev =
"get device", 0);
/*
* For each of the underlying stripe components get
* the info.
*/
*smi = i;
return (1);
}
}
/*
* now we rescan looking only for NODEV. If we find
* NODEV then we will check the keys to see if its a match.
*
* If no key was found to match dev, then there is
* no way to compare keys - so continue.
*/
if (nkeys == 0) {
continue;
}
if (nkeys > 1) {
return (mddeverror(
}
*smi = i;
return (1);
}
}
}
}
}
/*
* comp_replace:
* ----------------
* Called to implement the component replace function
*
* Owner is returned in the parameter block passed in by the caller.
*
* Returns:
* 0 success
* error code if the functions fails
*
* For a MN set, on entry all writes to the mirror are suspended, on exit
* from this function, writes must be resumed when not a dryrun.
*/
static int
)
{
mdi_unit_t *ui;
int (*repl_dev)();
void (*repl_done)();
void *repl_data;
int err = 0;
void (*get_dev)();
return (0);
}
goto errexit;
}
/*
* replace cannot be done while a resync is active or we are
* still waiting for an optimized resync to be started
*/
goto errexit;
}
goto errexit;
}
goto errexit;
}
goto errexit;
}
goto errexit;
}
"get device", 0);
"replace device", 0);
int numkeys;
/*
* We trust the dev_t because we cannot determine the
* dev_t from the device id since a new disk is in the
* same location. Since this is a call from metareplace -e dx
* AND it is SCSI a new dev_t is not generated. So the
* dev_t from the mddb is used. Before enabling the device
* we check to make sure that multiple entries for the same
* device does not exist in the namespace. If they do we
* fail the ioctl.
* One of the many ways multiple entries in the name space
* can occur is if one removed the failed component in the
* stripe of a mirror and put another disk that was part of
* another metadevice. After reboot metadevadm would correctly
* update the device name for the metadevice whose component
* has moved. However now in the metadb there are two entries
* for the same name (ctds) that belong to different
* metadevices. One is valid, the other is a ghost or "last
* know as" ctds.
*/
/*
* Verify that multiple keys for the same
* dev_t don't exist
*/
goto errexit;
}
/*
* Namespace has multiple entries
* for the same devt
*/
if (numkeys > 1) {
goto errexit;
}
goto errexit;
}
this_dev);
goto errexit;
}
}
goto errexit;
}
/* in case of dryrun, don't actually do anything */
}
}
if (err != 0) {
goto errexit;
}
/* In case of a dryun we're done. */
return (0);
}
/* set_sm_comp_state() commits the modified records */
lock);
/*
* If the mirror is open then need to make sure that the submirror,
* on which the replace ran, is also open and if not then open it.
* This is only a concern for a single component sub-mirror stripe
* as it may not be open due to the failure of the single component.
*
* This check has to be done after the call to (*repl_done)
* as that function releases the writer lock on the submirror.
*/
if (md_unit_isopen(ui)) {
if (!md_unit_isopen(ms_ui)) {
/*
* Underlying submirror is not open so open it.
*/
goto errexit;
}
}
}
} else {
}
/*
* Reset any saved resync location flags as we've now replaced the
* component. This means we have to resync the _whole_ component.
*/
return (0);
/* We need to resume writes unless this is a dryrun */
return (0);
}
/*
* mirror_attach:
* ----------------
* Called to implement the submirror attach function
*
* Owner is returned in the parameter block passed in by the caller.
*
* Returns:
* 0 success
* error code if the functions fails
*
* For a MN set, on entry all writes to the mirror are suspended, on exit
* from this function, writes must be resumed when not a dryrun.
*/
static int
)
{
int smi;
/*
* This routine should not be called during upgrade.
*/
if (MD_UPGRADE) {
return (0);
}
return (0);
}
break;
goto errexit;
}
goto errexit;
}
goto errexit;
}
/* Check the size */
goto errexit;
}
/* Don't attach labeled sm to unlabeled mirrors */
goto errexit;
}
/* Open the sm, only if the mirror is open */
goto errexit;
}
/* in dryrun mode, don't leave the device open */
if (options & MDIOCTL_DRYRUN) {
}
}
/*
* After this point the checks are done and action is taken.
* So, clean up and return in case of dryrun.
*/
if (options & MDIOCTL_DRYRUN) {
return (0);
}
if (!MD_MNSET_SETNO(setno))
return (0);
/* We need to resume writes unless this is a dryrun */
if (!(options & MDIOCTL_DRYRUN))
return (0);
}
void
{
int compcnt;
int i;
for (i = 0; i < compcnt; i++) {
shared->ms_lasterrcnt = 0;
}
}
/*
* mirror_detach:
* ----------------
* Called to implement the submirror detach function
*
* Owner is returned in the parameter block passed in by the caller.
*
* Returns:
* 0 success
* error code if the functions fails
*
* For a MN set, on entry all writes to the mirror are suspended, on exit
* from this function, writes must be resumed.
*/
static int
)
{
mdi_unit_t *ui;
int smi;
int nsv = 0;
return (0);
}
}
/*
* detach cannot be done while a resync is active or we are
* still waiting for an optimized resync to be started
*/
}
continue;
break;
}
}
}
}
/*
* Need to pass in the extra record id,
* cause mirror_commit() will not commit
* a sm (from the smmask) if the slot is unused.
* Which it is, since we are detaching.
*/
recids[1] = 0;
/*
* If there are any erred components
* then make the detach fail and do not unparent the
* submirror.
*/
/* reallow soft partitioning of submirror */
/* Close the sm, only if the mirror is open */
nsv = 1;
} else
/*
* Perhaps the mirror changed it's size due to this detach.
* (void) mirror_grow_unit(un, &mde);
*/
/*
* NOTE: We are passing the detached sm recid
* and not the smmask field. This is correct.
*/
}
return (0);
}
/*
* mirror_offline:
* ----------------
* Called to implement the submirror offline function
*
* Owner is returned in the parameter block passed in by the caller.
*
* Returns:
* 0 success
* error code if the functions fails
*
* For a MN set, on entry all writes to the mirror are suspended, on exit
* from this function, writes must be resumed.
*/
static int
)
{
int smi;
return (0);
}
/*
* offline cannot be done while a resync is active or we are
* still waiting for an optimized resync to be started
*/
}
/*
* Reject mirror_offline if ABR is set
*/
}
continue;
break;
}
}
}
}
return (0);
}
/*
* mirror_online:
* ----------------
* Called to implement the submirror online function
*
* Owner is returned in the parameter block passed in by the caller.
*
* Returns:
* 0 success
* error code if the functions fails
*
* For a MN set, on entry all writes to the mirror are suspended, on exit
* from this function, writes must be resumed.
*/
static int
)
{
int smi;
return (0);
}
continue;
break;
}
}
}
/*
* online cannot be done while a resync is active or we are
* still waiting for an optimized resync to be started
*/
}
/* for MN sets, re-read the resync record from disk */
if (!MD_MNSET_SETNO(setno))
else return (0);
}
int
)
{
int smi;
int spc; /* sectors per head */
/*
* grow_unit cannot be done while a resync is active or we are
* still waiting for an optimized resync to be started. Set
* flag to indicate GROW_PENDING and once the resync is complete
* the grow_unit function will be executed.
*/
}
/*
* Find the smallest submirror
*/
total_blocks = 0;
continue;
/*
* Growth is not possible if there is one or more
* submirrors made up of non-Metadevices.
*/
return (0);
if ((total_blocks == 0) ||
}
/*
* If the smallest submirror is not larger
* than the mirror, we are all done.
*/
return (0);
/*
* Growing the mirror now.
* First: Round down the actual_tb to be a multiple
* of nheads * nsects.
*/
/* Is the mirror growing from 32 bit device to 64 bit device? */
#if defined(_ILP32)
#else
/* yup, new device size. So we need to replace the record */
/* Preserve the friendly name properties of growing unit */
/* Resize to include incore fields */
/* All 64 bit metadevices only support EFI labels. */
/*
* If the device had a vtoc record attached to it, we remove
* the vtoc record, because the layout has changed completely.
*/
if (old_vtoc != 0) {
un->c.un_vtoc_id =
}
/*
* If there was a vtoc record, it is no longer needed, because
* a new efi record has been created for this un.
*/
if (old_vtoc != 0) {
}
#endif
}
}
return (0);
}
}
return (0);
}
static int
void *mgp,
)
{
return (0);
return (0);
}
static int
)
{
return (0);
if (pp->change_read_option)
if (pp->change_write_option)
if (pp->change_pass_num)
return (0);
}
static int
)
{
diskaddr_t d;
return (0);
ri->ri_percent_done = 0;
ri->ri_percent_dirty = 0;
return (0);
}
/* Return state of resync thread */
d = un->un_rs_resync_2_do;
if (d) {
if (un->c.un_total_blocks >
percent *= 1000;
percent /= d;
if (percent > 1000)
percent = 1000;
} else {
percent *= 100;
percent /= d;
}
} else {
ri->ri_percent_done = 0;
}
}
ri->ri_percent_dirty = 0;
return (0);
}
cnt = 0;
cnt++;
d = un->un_rrd_num;
if (d) {
percent *= 100;
percent /= d;
} else
percent = 0;
return (0);
}
/*
* mirror_get_owner:
* ----------------
* Called to obtain the current owner of a mirror.
*
* Owner is returned in the parameter block passed in by the caller.
*
* Returns:
* 0 success
* EINVAL metadevice does not exist or is not a member of a multi-owned
* set.
*/
static int
{
return (EINVAL);
if (!MD_MNSET_SETNO(setno)) {
return (EINVAL);
}
return (0);
}
/*
* mirror_choose_owner_thread:
* --------------------------
* Called to send a CHOOSE_OWNER message to the commd running on the master
* node. This needs to run in a separate context so that mutex livelock is
* avoided. This can occur because the original request is issued from a call
* to metaioctl() which acquires the global ioctl lock, calls down into the
* mirror_ioctl code and then attempts to mdmn_ksend_message() to the master
* node. As the handler for the choose_owner message needs to send another
* ioctl through the metaioctl() entry point, any other use (by rpc.metad or
* mdcommd checking on set ownership) will deadlock the system leading to
* cluster reconfiguration timeouts and eventually a node or (at worst) a
* cluster-wide panic
*/
static void
{
int rval;
sizeof (md_mn_msg_chooseid_t), kres);
}
thread_exit();
}
/*
* mirror_owner_thread:
* -------------------
* Called to request an ownership change from a thread context. This issues
* a mdmn_ksend_message() and then completes the appropriate ownership change
* on successful completion of the message transport.
* The originating application must poll for completion on the 'flags' member
* of the MD_MN_MM_OWNER_STATUS ioctl() parameter block.
* Success is marked by a return value of MD_MN_MM_RES_OK, Failure by
* MD_MN_MM_RES_FAIL
*/
static void
{
int rval;
un->un_mirror_owner_status = 0;
/*
* Message transport layer failed. Return the failure code to
* the application.
*/
} else {
/*
* Ownership change succeeded. Update in-core version of
* mirror owner.
*/
/* Sets node owner of un_rr_dirty record */
if (un->un_rr_dirty_recid)
/*
* Release the block on the current resync region if it
* is blocked
*/
}
/* Restart the resync thread if it was previously blocked */
}
}
thread_exit();
}
/*
* mirror_set_owner:
* ----------------
* Called to change the owner of a mirror to the specified node. If we
* are not the owner of the mirror, we do nothing apart from update the in-core
* ownership. It can also be used to choose a new owner for the resync of a
* mirror, this case is specified by the flag MD_MN_MM_CHOOSE_OWNER, see below.
*
* The p->d.flags bitfield controls how subsequent ownership changes will be
* handled:
* MD_MN_MM_SPAWN_THREAD
* a separate thread is created which emulates the behaviour of
* become_owner() [mirror.c]. This is needed when changing the
* ownership from user context as there needs to be a controlling
* kernel thread which updates the owner info on the originating
* node. Successful completion of the mdmn_ksend_message() means
* that the owner field can be changed.
*
* MD_MN_MM_PREVENT_CHANGE
* Disallow any change of ownership once this ownership change has
* been processed. The only way of changing the owner away from
* the p->d.owner node specified in the call is to issue a request
* with MD_MN_MM_ALLOW_CHANGE set in the flags. Any request to
* become owner from a different node while the PREVENT_CHANGE
* is in operation will result in an EAGAIN return value.
* un->un_owner_state has MM_MN_PREVENT_CHANGE set.
*
* MD_MN_MM_ALLOW_CHANGE
* Allow the owner to be changed by a subsequent request.
* un->un_owner_state has MM_MN_PREVENT_CHANGE cleared.
*
* MD_MN_MM_CHOOSE_OWNER
* Choose a new owner for a mirror resync. In this case, the new
* owner argument is not used. The selection of a new owner
* is a round robin allocation using a resync owner count. This
* ioctl passes this value in a message to the master node
* which uses it to select a node from the node list and then
* sends it a message to become the owner.
*
* If we are the current owner, we must stop further i/o from being scheduled
* and wait for any pending i/o to drain. We wait for any in-progress resync
* bitmap updates to complete and we can then set the owner. If an update to
* the resync bitmap is attempted after this we simply don't write this out to
* disk until the ownership is restored.
*
* If we are the node that wants to become the owner we update the in-core
* owner and return. The i/o that initiated the ownership change will complete
* on successful return from this ioctl.
*
* Return Value:
* 0 Success
* EINVAL Invalid unit referenced
* EAGAIN Ownership couldn't be transferred away or change of
* ownership is prevented. Caller should retry later on.
*/
static int
{
mdi_unit_t *ui;
return (EINVAL);
if (!MD_MNSET_SETNO(setno)) {
return (EINVAL);
}
/*
* If we are choosing a new resync owner, send a message to the master
* to make the choice.
*/
if (p->d.flags & MD_MN_MM_CHOOSE_OWNER) {
/* Release ioctl lock before we call ksend_message() */
/* If we're resetting the owner pass the node id in */
if (p->d.owner != MD_MN_MIRROR_UNOWNED) {
return (mirror_choose_owner(un, &p->d));
} else {
}
}
/*
* Check for whether we have to spawn a thread to issue this request.
* If set we issue a mdmn_ksend_message() to cause the appropriate
* ownership change. On completion of this request the calling
* application _must_ poll the structure 'flags' field to determine the
* result of the request. All this is necessary until we have true
* multi-entrant ioctl support.
* If we are just clearing the owner, then MD_MN_MM_SPAWN_THREAD can
* be ignored.
*/
p->d.flags &= ~MD_MN_MM_SPAWN_THREAD;
return (EFAULT);
} else {
return (0);
}
}
/*
* If setting owner to NULL, this is being done because the owner has
* died and therefore we set OPT_NOT_DONE to ensure that the
* mirror is marked as "Needs Maintenance" and that an optimized
* resync will be done when we resync the mirror, Also clear the
* PREVENT_CHANGE flag and remove the last resync region from the
* overlap chain.
*/
if (p->d.owner == 0) {
int i;
/*
* If the ABR capability is not set and the pass_num is non-zero
* there is need to perform an optimized resync
* Therefore set OPT_NOT_DONE, setup the resync_bm and set
* the submirrors as resync targets.
*/
for (i = 0; i < NMIRROR; i++) {
if ((SUBMIRROR_IS_READABLE(un, i)) ||
SMS_BY_INDEX_IS(un, i,
}
}
ps->ps_firstblk = 0;
ps->ps_lastblk = 0;
}
}
/*
* If we are not trying to become owner ourselves check
* to see if we have to change the owner
*/
/*
* No need to change owner,
*/
if (p->d.flags & MD_MN_MM_PREVENT_CHANGE) {
} else if (p->d.flags & MD_MN_MM_ALLOW_CHANGE) {
}
return (0);
}
}
/*
* Disallow ownership change if previously requested to. This can only
* be reset by issuing a request with MD_MN_MM_ALLOW_CHANGE set in the
* flags field.
*/
!(p->d.flags & MD_MN_MM_ALLOW_CHANGE)) {
#ifdef DEBUG
"owner while node %x has exclusive access to %s",
#endif
return (EAGAIN);
}
if (p->d.owner == md_mn_mynode_id) {
/*
* I'm becoming the mirror owner. Flag this so that the
* message sender can change the in-core owner when all
* nodes have processed this message
*/
MM_MN_PREVENT_CHANGE : 0;
~MM_MN_PREVENT_CHANGE : ~0;
/*
* I'm releasing ownership. Block and drain i/o. This also
* blocks until any in-progress resync record update completes.
*/
/* Block the resync thread */
/* Sets node owner of un_rr_dirty record */
if (un->un_rr_dirty_recid)
MM_MN_PREVENT_CHANGE : 0;
~MM_MN_PREVENT_CHANGE : ~0;
/*
* Allow further i/o to occur. Any write() from another node
* will now cause another ownership change to occur.
*/
} else {
/* Update the in-core mirror owner */
/* Sets node owner of un_rr_dirty record */
if (un->un_rr_dirty_recid)
MM_MN_PREVENT_CHANGE : 0;
~MM_MN_PREVENT_CHANGE : ~0;
}
return (0);
}
/*
* mirror_allocate_hotspare:
* ------------------------
* Called to allocate a hotspare for a failed component. This function is
* called by the MD_MN_ALLOCATE_HOTSPARE ioctl.
*/
static int
{
#ifdef DEBUG
if (mirror_debug_flag)
printf("mirror_allocate_hotspare: mnum,sm,comp = %x, %x, %x\n",
#endif
return (EINVAL);
/* This function is only valid for a multi-node set */
if (!MD_MNSET_SETNO(setno)) {
return (EINVAL);
}
return (0);
}
/*
* mirror_get_owner_status:
* -----------------------
* Return the status of a previously issued ioctl to change ownership. This is
* required for soft-partition support as the request to change mirror owner
* needs to be run from a separate daemon thread.
*
* Returns:
* 0 Success (contents of un_mirror_owner_status placed in 'flags')
* EINVAL Invalid unit
*/
static int
{
return (EINVAL);
if (!MD_MNSET_SETNO(setno)) {
return (EINVAL);
}
return (0);
}
/*
* mirror_set_state:
* ---------------
* Called to set the state of the component of a submirror to the specified
* value. This function is called by the MD_MN_SET_STATE ioctl.
*/
static int
{
#ifdef DEBUG
if (mirror_debug_flag)
printf("mirror_set_state: mnum,sm,comp,state, hs_id = %x, "
#endif
return (EINVAL);
/* This function is only valid for a multi-node set */
if (!MD_MNSET_SETNO(setno)) {
return (EINVAL);
}
/* Set state in component and update ms_flags */
shared = (md_m_shared_t *)
/*
* If a CS_ERRED state is being sent, verify that the sender
* has the same view of the component that this node currently has.
*
* There is a case where the sender was sending a CS_ERRED when a
* component was in error, but before the sender returns from
* ksend_message the component has been hotspared and resync'd.
*
* In this case, the hs_id will be different from the shared ms_hs_id,
* so the component has already been hotspared. Just return in this
* case.
*/
#ifdef DEBUG
if (mirror_debug_flag) {
printf("mirror_set_state: short circuit "
"hs_id=0x%x, ms_hs_id=0x%x\n",
}
#endif
/* release the block on writes to the mirror */
return (0);
}
}
/*
* If the device is newly errored then make sure that it is
* closed. Closing the device allows for the RCM framework
* to unconfigure the device if required.
*/
void (*get_dev)();
"get device", 0);
}
} else
un->un_changecnt++;
/* Update state in submirror */
/*
* Commit the state change to the metadb, only the master will write
* to disk
*/
/* release the block on writes to the mirror */
/* generate NOTIFY events for error state changes */
} else if (p->state == CS_LAST_ERRED) {
}
return (0);
}
/*
* mirror_suspend_writes:
* ---------------------
* Called to suspend writes to a mirror region. The flag un_suspend_wr_flag is
* tested in mirror_write_strategy, and if set all writes are blocked.
* This function is called by the MD_MN_SUSPEND_WRITES ioctl.
*/
static int
{
#ifdef DEBUG
if (mirror_debug_flag)
#endif
return (EINVAL); /* No unit */
/* This function is only valid for a multi-node set */
if (!MD_MNSET_SETNO(setno)) {
return (EINVAL);
}
/*
* Mark the resync as blocked. This will stop any currently running
* thread and will prevent a new resync from attempting to perform
* i/o
*/
return (0);
}
/*
* mirror_set_capability:
* ------------------------
* Called to set or clear a capability for a mirror
* called by the MD_MN_SET_CAP ioctl.
*/
static int
{
mdi_unit_t *ui;
#ifdef DEBUG
if (mirror_debug_flag)
#endif
return (EINVAL);
/* This function is only valid for a multi-node set */
if (!MD_MNSET_SETNO(setno)) {
return (EINVAL);
}
if (p->sc_set & DKV_ABR_CAP) {
/* Clear DRL and set owner to 0 if no resync active */
un->un_mirror_owner = 0;
}
} else {
}
if (p->sc_set & DKV_DMR_CAP) {
} else {
}
return (0);
}
/*
* mirror_choose_owner:
* ------------------------
* Called to choose an owner for a mirror resync. Can be called when starting
* resync or by the MD_MN_SET_MM_OWNER ioctl with the MD_MN_MM_CHOOSE_OWNER flag
* set. The ioctl is called with this flag set when we are in the cluster
* reconfig and we wish to set a new owner for a resync whose owner has left
* the cluster. We use a resync owner count to implement a round robin
* allocation of resync owners. We send a message to the master including
* this count and the message handler uses it to select an owner from the
* nodelist and then sends a SET_MM_OWNER message to the chosen node to
* become the owner.
*
* Input:
* un - unit reference
* ownp - owner information (if non-NULL)
*/
int
{
/* This function is only valid for a multi-node set */
if (!MD_MNSET_SETNO(setno)) {
return (EINVAL);
}
#ifdef DEBUG
if (mirror_debug_flag)
printf("send choose owner message, mnum = %x,"
#endif
/*
* setup message with current resync count
* and then increment the count. If we're called with a non-NULL
* owner then we are reestablishing the owner of the mirror. In this
* case we have to flag this to the message handler and set rcnt to
* the new owner node.
*/
mutex_enter(&md_mx);
mutex_exit(&md_mx);
} else {
}
/*
* Spawn a thread to issue the ksend_message() call so that we can
* drop the ioctl lock hierarchy that is blocking further rpc.metad and
* commd set ownership checking.
*/
return (EFAULT);
} else {
return (0);
}
}
/*
* mirror_get_status:
* ----------------------------------
* Called by nodes which are not the master node of the cluster. Obtains the
* master abr state and the submirror status for each valid submirror of the
* unit so that the status returned by metastat is consistent across the
* cluster.
* We update tstate for the mirror and both the sm_flag and the sm_state for
* each submirror.
*
* Input:
* un mirror to obtain status from
*
* Calling Convention:
* writerlock (either ioctl or unit) must be held
*/
void
{
int smi;
int rval;
/*
* Get all of the information for the mirror.
*/
/*
* Must drop the writerlock over ksend_message since another
* thread on this node could be running a higher class message
* and be trying grab the readerlock.
*
* If we are in the context of an ioctl, drop the ioctl lock.
* lockp holds the list of locks held.
*/
if (lockp) {
} else {
}
/* if the node hasn't yet joined, it's Ok. */
}
/* if dropped the lock previously, regain it */
if (lockp) {
} else {
/*
* Reacquire dropped locks and update acquirecnts
* appropriately.
*/
(void) md_unit_writerlock(ui);
}
/*
* Check to see if we've got a believable amount of returned data.
* If not, we simply return as there is no usable information.
*/
return;
}
/*
*/
#ifdef DEBUG
if (mirror_debug_flag)
#endif
#ifdef DEBUG
if (mirror_debug_flag) {
}
#endif
}
/* Set ABR if set on the Master node */
}
/*
* mirror_get_mir_state:
* -------------------
* Obtain the ABR state of a mirror and the state of all submirrors from the
* master node for the unit specified in sm_state->mnum.
* Called by MD_MN_GET_MIRROR_STATE ioctl.
*/
static int
{
mdclrerror(&mde);
return (EINVAL);
}
if (!MD_MNSET_SETNO(setno)) {
return (EINVAL);
}
/*
* We've now got a writerlock on the unit structure (so no-one can
* modify the incore values) and we'll now send the message to the
* master node. Since we're only called as part of a reconfig cycle
* we don't need to release the unit locks across the ksend_message as
* only the master node will process it, and we never send this to
* ourselves if we're the master.
*/
return (0);
}
static int
{
void *d = NULL;
int err = 0;
/* We can only handle 32-bit clients for internal commands */
return (EINVAL);
}
/* dispatch ioctl */
switch (cmd) {
case MD_IOCSET:
{
return (EACCES);
sz = sizeof (md_set_params_t);
break;
}
break;
}
case MD_IOCGET:
{
return (EACCES);
sz = sizeof (md_i_get_t);
break;
}
break;
}
case MD_IOCRESET:
{
return (EACCES);
sz = sizeof (md_i_reset_t);
break;
}
break;
}
case MD_IOCSETSYNC:
case MD_MN_SETSYNC:
{
return (EACCES);
sz = sizeof (md_resync_ioctl_t);
break;
}
break;
}
case MD_IOCGETSYNC:
{
return (EACCES);
sz = sizeof (md_resync_ioctl_t);
break;
}
break;
}
case MD_IOCREPLACE:
{
return (EACCES);
sz = sizeof (replace_params_t);
break;
}
break;
}
case MD_IOCOFFLINE:
{
return (EACCES);
sz = sizeof (md_i_off_on_t);
break;
}
break;
}
case MD_IOCONLINE:
{
return (EACCES);
sz = sizeof (md_i_off_on_t);
break;
}
break;
}
case MD_IOCDETACH:
{
return (EACCES);
sz = sizeof (md_detach_params_t);
break;
}
break;
}
case MD_IOCATTACH:
{
return (EACCES);
sz = sizeof (md_att_struct_t);
break;
}
break;
}
case MD_IOCGET_DEVS:
{
return (EACCES);
sz = sizeof (md_getdevs_params_t);
break;
}
break;
}
case MD_IOCGROW:
{
return (EACCES);
sz = sizeof (md_grow_params_t);
break;
}
break;
}
case MD_IOCCHANGE:
{
return (EACCES);
sz = sizeof (md_mirror_params_t);
break;
}
break;
}
case MD_IOCPROBE_DEV:
{
md_probedev_impl_t *p = NULL;
int i;
return (EACCES);
sz = sizeof (md_probedev_t);
/* now copy in the data */
goto free_mem;
}
/*
* Sanity test the args. Test name should have the keyword
* probe.
*/
p->probe_sema = NULL;
ph = (struct md_probedev *)d;
sizeof (md_driver_t));
goto free_mem;
}
KM_SLEEP);
goto free_mem;
}
goto free_mem;
/*
* put the request on the queue and wait.
*/
(void) IOLOCK_RETURN(0, lockp);
/* wait for the events to occur */
sema_p(PROBE_SEMA(p));
}
while (md_ioctl_lock_enter() == EINTR);
/*
* clean up. The hdr list is freed in the probe routines
* since the list is NULL by the time we get here.
*/
if (p) {
if (p->probe_sema != NULL) {
sema_destroy(PROBE_SEMA(p));
}
mutex_destroy(PROBE_MX(p));
}
kmem_free(p, sizeof (md_probedev_impl_t));
}
break;
}
case MD_MN_SET_MM_OWNER:
{
return (EACCES);
sz = sizeof (md_set_mmown_params_t);
break;
}
break;
}
case MD_MN_GET_MM_OWNER:
{
return (EACCES);
sz = sizeof (md_set_mmown_params_t);
break;
}
break;
}
case MD_MN_MM_OWNER_STATUS:
{
return (EACCES);
sz = sizeof (md_mn_own_status_t);
break;
}
break;
}
case MD_MN_SET_STATE:
{
return (EACCES);
sz = sizeof (md_set_state_params_t);
break;
}
break;
}
case MD_MN_SUSPEND_WRITES:
{
return (EACCES);
sz = sizeof (md_suspend_wr_params_t);
break;
}
break;
}
case MD_MN_RESYNC:
{
sz = sizeof (md_mn_rs_params_t);
break;
}
break;
}
case MD_MN_ALLOCATE_HOTSPARE:
{
return (EACCES);
sz = sizeof (md_alloc_hotsp_params_t);
break;
}
lockp);
break;
}
case MD_MN_POKE_HOTSPARES:
{
(void) poke_hotspares();
break;
}
case MD_MN_SET_CAP:
{
return (EACCES);
sz = sizeof (md_mn_setcap_params_t);
break;
}
lockp);
break;
}
case MD_MN_GET_MIRROR_STATE:
{
sz = sizeof (md_mn_get_mir_state_t);
break;
}
lockp);
break;
}
default:
return (ENOTTY);
}
/*
* copyout and free any args
*/
if (sz != 0) {
if (err == 0) {
}
}
}
return (err);
}
int
int cmd,
void *data,
int mode,
)
{
int err = 0;
/* handle admin ioctls */
if (mnum == MD_ADM_MINOR)
/* check unit */
return (ENXIO);
/* is this a supported ioctl? */
if (err != 0) {
return (err);
}
/* dispatch ioctl */
switch (cmd) {
case DKIOCINFO:
{
struct dk_cinfo *p;
return (EACCES);
p = kmem_alloc(sizeof (*p), KM_SLEEP);
kmem_free(p, sizeof (*p));
return (err);
}
case DKIOCGMEDIAINFO:
{
struct dk_minfo p;
return (EACCES);
return (err);
}
case DKIOCGGEOM:
{
struct dk_geom *p;
return (EACCES);
p = kmem_alloc(sizeof (*p), KM_SLEEP);
mode) != 0)
}
kmem_free(p, sizeof (*p));
return (err);
}
case DKIOCGVTOC:
{
return (EACCES);
return (err);
}
}
#ifdef _SYSCALL32
else {
}
#endif /* _SYSCALL32 */
return (err);
}
case DKIOCSVTOC:
{
return (EACCES);
}
}
#ifdef _SYSCALL32
else {
} else {
}
}
#endif /* _SYSCALL32 */
if (err == 0)
return (err);
}
case DKIOCGAPART:
{
return (err);
}
mode) != 0)
}
#ifdef _SYSCALL32
else {
mode) != 0)
}
#endif /* _SYSCALL32 */
return (err);
}
case DKIOCGETEFI:
{
/*
* This one can be done centralized,
* no need to put in the same code for all types of metadevices
*/
}
case DKIOCSETEFI:
{
/*
* This one can be done centralized,
* no need to put in the same code for all types of metadevices
*/
}
case DKIOCPARTITION:
{
}
case DKIOCGETVOLCAP:
{
mdi_unit_t *ui;
/* Only valid for MN sets */
return (EINVAL);
return (EACCES);
}
return (err);
}
case DKIOCSETVOLCAP:
{
volcapset_t volcap = 0;
mdi_unit_t *ui;
/* Only valid for MN sets */
return (EINVAL);
return (EACCES);
return (EFAULT);
/* Not valid if a submirror is offline */
return (EINVAL);
}
volcap |= DKV_ABR_CAP;
/* Only send capability message if there is a change */
return (err);
}
case DKIOCDMR:
{
#ifdef _MULTI_DATAMODEL
#endif /* _MULTI_DATAMODEL */
/* Only valid for MN sets */
return (EINVAL);
return (ENOMEM);
#ifdef _MULTI_DATAMODEL
return (ENOMEM);
}
case DDI_MODEL_ILP32:
/*
* If we're called from a higher-level driver we don't
* need to manipulate the data. Its already been done by
* the caller.
*/
mode)) {
return (EFAULT);
}
break;
}
/* FALLTHROUGH */
case DDI_MODEL_NONE:
return (EFAULT);
}
break;
default:
return (EFAULT);
}
#else /* ! _MULTI_DATAMODEL */
return (EFAULT);
}
#endif /* _MULTI_DATAMODEL */
if (err == 0) {
#ifdef _MULTI_DATAMODEL
case DDI_MODEL_ILP32:
sizeof (vdr32->vdr_side_name));
}
break;
}
/* FALLTHROUGH */
case DDI_MODEL_NONE:
break;
}
#else /* ! _MULTI_DATAMODEL */
#endif /* _MULTI_DATAMODEL */
}
#ifdef _MULTI_DATAMODEL
#endif /* _MULTI_DATAMODEL */
return (err);
}
default:
return (ENOTTY);
}
}
/*
* rename named service entry points and support functions
*/
/*
*
* most of these are handled by generic role swap functions
*/
/*
* MDRNM_UPDATE_KIDS
*/
void
{
int smi;
/*
* since our role isn't changing (parent->parent)
* one of our children must be changing
* find the child being modified, and update
* our notion of it
*/
continue;
}
break;
}
}
}
/*
* exchange down (self->child)
*/
void
)
{
int smi;
/*
* self id changes in our own unit struct
*/
/*
* parent identifier need not change
*/
/*
* point the set array pointers at the "new" unit and unit in-cores
* Note: the other half of this transfer is done in the "update_to"
* exchange named service.
*/
/*
* transfer kstats
*/
/*
* the unit in-core reference to the get next link's id changes
*/
/*
* find the child whose identity we're assuming
*/
continue;
}
}
}
/*
* Update the sub-mirror's identity
*/
/*
* delete the key for the old sub-mirror from the name space
*/
/*
* and store the record id (from the unit struct) into recids
*/
}
/*
* exchange down (parent->self)
*/
void
)
{
int smi;
/*
* self id changes in our own unit struct
*/
/*
* parent identifier need not change
*/
/*
* point the set array pointers at the "new" unit and unit in-cores
* Note: the other half of this transfer is done in the "update_to"
* exchange named service.
*/
/*
* transfer kstats
*/
/*
* the unit in-core reference to the get next link's id changes
*/
/*
* find the child whose identity we're assuming
*/
continue;
}
}
}
/*
* Update the sub-mirror's identity
*/
/*
* delete the key for the old sub-mirror from the name space
*/
/*
* and store the record id (from the unit struct) into recids
*/
}
/*
* MDRNM_LIST_URKIDS: named svc entry point
* all all delta entries appropriate for our children onto the
* deltalist pointd to by dlpp
*/
int
{
md_rendelta_t *new, *p;
int smi;
int n_children;
n_children = 0;
return (-1);
}
/* NULL */
}
continue;
}
if (!new) {
}
return (-1);
}
++n_children;
}
return (n_children);
}
/*
* support routine for MDRNM_CHECK
*/
static int
mdi_unit_t *ui,
{
int smi;
from_min);
return (EINVAL);
}
return (EINVAL);
}
return (EINVAL);
}
/* we're related if trying to swap with our parent */
case MDRNOP_EXCHANGE:
/*
* check for a swap with our child
*/
continue;
}
}
}
if (!related) {
return (EINVAL);
}
break;
case MDRNOP_RENAME:
/*
* if from is top-level and is open, then the kernel is using
* the md_dev64_t.
*/
from_min);
return (EBUSY);
}
break;
default:
from_min);
return (EINVAL);
}
return (0); /* ok */
}
/*
* Named service entry point: MDRNM_CHECK
*/
{
int ci;
int i;
int compcnt;
int err = 0;
return (EINVAL);
}
for (i = 0; i < NMIRROR; i++) {
continue;
if (!smic->sm_get_component_count) {
return (ENXIO);
}
if (!smic->sm_shared_by_indx) {
return (ENXIO);
}
if (!shared) {
return (ENXIO);
}
return (EIO);
}
case CS_OKAY:
break;
case CS_RESYNC:
return (EBUSY);
default:
return (EINVAL);
}
}
}
/* self does additional checks */
}
return (err);
}