sp.c revision b30678564674f0af0118547884def6cf721f1360
3853N/A/*
3853N/A * CDDL HEADER START
3853N/A *
3853N/A * The contents of this file are subject to the terms of the
3853N/A * Common Development and Distribution License (the "License").
3853N/A * You may not use this file except in compliance with the License.
3853N/A *
3853N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
3853N/A * or http://www.opensolaris.org/os/licensing.
3853N/A * See the License for the specific language governing permissions
3853N/A * and limitations under the License.
3853N/A *
3853N/A * When distributing Covered Code, include this CDDL HEADER in each
3853N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
3853N/A * If applicable, add the following below this CDDL HEADER, with the
3853N/A * fields enclosed by brackets "[]" replaced with your own identifying
3853N/A * information: Portions Copyright [yyyy] [name of copyright owner]
3853N/A *
3853N/A * CDDL HEADER END
3853N/A */
3853N/A
3853N/A/*
3853N/A * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
3853N/A * Use is subject to license terms.
5027N/A */
3853N/A
3853N/A/*
3853N/A * Soft partitioning metadevice driver (md_sp).
3853N/A *
4500N/A * This file contains the primary operations of the soft partitioning
3853N/A * metadevice driver. This includes all routines for normal operation
3853N/A * (open/close/read/write). Please see mdvar.h for a definition of
3853N/A * metadevice operations vector (md_ops_t). This driver is loosely
3853N/A * based on the stripe driver (md_stripe).
3853N/A *
3853N/A * All metadevice administration is done through the use of ioctl's.
3853N/A * As such, all administrative routines appear in sp_ioctl.c.
3853N/A *
3853N/A * Soft partitions are represented both in-core and in the metadb with a
3853N/A * unit structure. The soft partition-specific information in the unit
3853N/A * structure includes the following information:
3853N/A * - Device information (md_dev64_t & md key) about the device on which
3853N/A * the soft partition is built.
3853N/A * - Soft partition status information.
3853N/A * - The size of the soft partition and number of extents used to
3853N/A * make up that size.
3853N/A * - An array of exents which define virtual/physical offset
4294N/A * mappings and lengths for each extent.
4294N/A *
3853N/A * Typical soft partition operation proceeds as follows:
3853N/A * - The unit structure is fetched from the metadb and placed into
3853N/A * an in-core array (as with other metadevices). This operation
3853N/A * is performed via sp_build_incore( ) and takes place during
3853N/A * "snarfing" (when all metadevices are brought in-core at
3853N/A * once) and when a new soft partition is created.
3853N/A * - A soft partition is opened via sp_open( ). At open time the
3853N/A * the soft partition unit structure is verified with the soft
3853N/A * partition on-disk structures. Additionally, the soft partition
3853N/A * status is checked (only soft partitions in the OK state may be
3853N/A * opened).
3853N/A * - Soft partition I/O is performed via sp_strategy( ) which relies on
4500N/A * a support routine, sp_mapbuf( ), to do most of the work.
3853N/A * sp_mapbuf( ) maps a buffer to a particular extent via a binary
3853N/A * search of the extent array in the soft partition unit structure.
3853N/A * Once a translation has been performed, the I/O is passed down
3853N/A * to the next layer, which may be another metadevice or a physical
3853N/A * disk. Since a soft partition may contain multiple, non-contiguous
3853N/A * extents, a single I/O may have to be fragmented.
3853N/A * - Soft partitions are closed using sp_close.
3853N/A *
3853N/A */
3853N/A
4136N/A#include <sys/param.h>
3853N/A#include <sys/systm.h>
3853N/A#include <sys/conf.h>
3853N/A#include <sys/file.h>
3853N/A#include <sys/user.h>
3853N/A#include <sys/uio.h>
3853N/A#include <sys/t_lock.h>
3853N/A#include <sys/buf.h>
3853N/A#include <sys/dkio.h>
3853N/A#include <sys/vtoc.h>
3853N/A#include <sys/kmem.h>
3853N/A#include <vm/page.h>
3853N/A#include <sys/cmn_err.h>
5085N/A#include <sys/sysmacros.h>
3853N/A#include <sys/types.h>
3853N/A#include <sys/mkdev.h>
3853N/A#include <sys/stat.h>
3853N/A#include <sys/open.h>
3853N/A#include <sys/lvm/mdvar.h>
3853N/A#include <sys/lvm/md_sp.h>
4500N/A#include <sys/lvm/md_convert.h>
3884N/A#include <sys/lvm/md_notify.h>
3884N/A#include <sys/lvm/md_crc.h>
4500N/A#include <sys/modctl.h>
4500N/A#include <sys/ddi.h>
3853N/A#include <sys/sunddi.h>
3853N/A#include <sys/debug.h>
3853N/A
4500N/A#include <sys/sysevent/eventdefs.h>
4500N/A#include <sys/sysevent/svm.h>
3853N/A
3853N/Amd_ops_t sp_md_ops;
3853N/A#ifndef lint
3853N/Achar _depends_on[] = "drv/md";
3853N/Amd_ops_t *md_interface_ops = &sp_md_ops;
3853N/A#endif
3853N/A
3853N/Aextern unit_t md_nunits;
3853N/Aextern set_t md_nsets;
4294N/Aextern md_set_t md_set[];
4294N/A
4294N/Aextern int md_status;
3853N/Aextern major_t md_major;
3853N/Aextern mdq_anchor_t md_done_daemon;
3853N/Aextern mdq_anchor_t md_sp_daemon;
3853N/Aextern kmutex_t md_mx;
3853N/Aextern kcondvar_t md_cv;
3853N/Aextern md_krwlock_t md_unit_array_rw;
3853N/Aextern clock_t md_hz;
3853N/A
3858N/Astatic kmem_cache_t *sp_parent_cache = NULL;
3858N/Astatic kmem_cache_t *sp_child_cache = NULL;
3858N/Astatic void sp_send_stat_ok(mp_unit_t *);
3858N/Astatic void sp_send_stat_err(mp_unit_t *);
3858N/A
3853N/A/*
3853N/A * FUNCTION: sp_parent_constructor()
3853N/A * INPUT: none.
3853N/A * OUTPUT: ps - parent save structure initialized.
3853N/A * RETURNS: void * - ptr to initialized parent save structure.
3853N/A * PURPOSE: initialize parent save structure.
3853N/A */
3853N/A/*ARGSUSED1*/
3853N/Astatic int
3853N/Asp_parent_constructor(void *p, void *d1, int d2)
3853N/A{
3853N/A mutex_init(&((md_spps_t *)p)->ps_mx,
3853N/A NULL, MUTEX_DEFAULT, NULL);
3853N/A return (0);
3853N/A}
3853N/A
3853N/Astatic void
3853N/Asp_parent_init(md_spps_t *ps)
3853N/A{
3853N/A bzero(ps, offsetof(md_spps_t, ps_mx));
3853N/A}
3853N/A
3853N/A/*ARGSUSED1*/
3853N/Astatic void
3853N/Asp_parent_destructor(void *p, void *d)
3853N/A{
3853N/A mutex_destroy(&((md_spps_t *)p)->ps_mx);
3853N/A}
3853N/A
3853N/A/*
3853N/A * FUNCTION: sp_child_constructor()
3853N/A * INPUT: none.
3853N/A * OUTPUT: cs - child save structure initialized.
3853N/A * RETURNS: void * - ptr to initialized child save structure.
3853N/A * PURPOSE: initialize child save structure.
3853N/A */
3853N/A/*ARGSUSED1*/
3853N/Astatic int
3853N/Asp_child_constructor(void *p, void *d1, int d2)
3853N/A{
3853N/A bioinit(&((md_spcs_t *)p)->cs_buf);
3853N/A return (0);
3853N/A}
3853N/A
3853N/Astatic void
4923N/Asp_child_init(md_spcs_t *cs)
3853N/A{
3853N/A cs->cs_mdunit = 0;
3853N/A cs->cs_ps = NULL;
3853N/A md_bioreset(&cs->cs_buf);
3853N/A}
3853N/A
3853N/A/*ARGSUSED1*/
3853N/Astatic void
3853N/Asp_child_destructor(void *p, void *d)
4923N/A{
4923N/A biofini(&((md_spcs_t *)p)->cs_buf);
4923N/A}
4923N/A
4923N/A/*
4923N/A * FUNCTION: sp_run_queue()
4923N/A * INPUT: none.
4923N/A * OUTPUT: none.
4923N/A * RETURNS: void.
3853N/A * PURPOSE: run the md_daemon to clean up memory pool.
3853N/A */
3853N/A/*ARGSUSED*/
3853N/Astatic void
3853N/Asp_run_queue(void *d)
3853N/A{
3853N/A if (!(md_status & MD_GBL_DAEMONS_LIVE))
3853N/A md_daemon(1, &md_done_daemon);
3853N/A}
3853N/A
3853N/A
3853N/A/*
3853N/A * FUNCTION: sp_build_incore()
3853N/A * INPUT: p - ptr to unit structure.
3853N/A * snarfing - flag to tell us we are snarfing.
3853N/A * OUTPUT: non.
3853N/A * RETURNS: int - 0 (always).
3853N/A * PURPOSE: place unit structure into in-core unit array (keyed from
3853N/A * minor number).
3853N/A */
3853N/Aint
3853N/Asp_build_incore(void *p, int snarfing)
3853N/A{
3853N/A mp_unit_t *un = (mp_unit_t *)p;
3853N/A minor_t mnum;
3853N/A set_t setno;
3853N/A md_dev64_t tmpdev;
3853N/A
3853N/A mnum = MD_SID(un);
3853N/A
3853N/A if (MD_UNIT(mnum) != NULL)
3853N/A return (0);
3853N/A
3853N/A MD_STATUS(un) = 0;
3853N/A
3853N/A if (snarfing) {
3853N/A /*
3853N/A * if we are snarfing, we get the device information
3853N/A * from the metadb record (using the metadb key for
3853N/A * that device).
3853N/A */
3853N/A setno = MD_MIN2SET(mnum);
3853N/A
3853N/A tmpdev = md_getdevnum(setno, mddb_getsidenum(setno),
3853N/A un->un_key, MD_NOTRUST_DEVT);
3853N/A un->un_dev = tmpdev;
3853N/A }
3853N/A
3853N/A /* place various information in the in-core data structures */
3853N/A md_nblocks_set(mnum, un->c.un_total_blocks);
3853N/A MD_UNIT(mnum) = un;
3853N/A
3853N/A return (0);
3853N/A}
3853N/A
3853N/A/*
3853N/A * FUNCTION: reset_sp()
3853N/A * INPUT: un - unit structure to be reset/removed.
3853N/A * mnum - minor number to be reset/removed.
3853N/A * removing - flag to tell us if we are removing
3853N/A * permanently or just reseting in-core
3853N/A * structures.
3853N/A * OUTPUT: none.
3853N/A * RETURNS: void.
3853N/A * PURPOSE: used to either simply reset in-core structures or to
3853N/A * permanently remove metadevices from the metadb.
3853N/A */
3853N/Avoid
3853N/Areset_sp(mp_unit_t *un, minor_t mnum, int removing)
3853N/A{
3853N/A sv_dev_t *sv;
3853N/A mddb_recid_t vtoc_id;
3853N/A
3853N/A /* clean up in-core structures */
3853N/A md_destroy_unit_incore(mnum, &sp_md_ops);
3853N/A
3853N/A md_nblocks_set(mnum, -1ULL);
3853N/A MD_UNIT(mnum) = NULL;
3853N/A
3853N/A /*
3853N/A * Attempt release of minor node
3853N/A */
3853N/A md_remove_minor_node(mnum);
3853N/A
3853N/A if (!removing)
3853N/A return;
3853N/A
3853N/A /* we are removing the soft partition from the metadb */
3853N/A
3853N/A /*
3853N/A * Save off device information so we can get to
3853N/A * it after we do the mddb_deleterec().
3853N/A */
3853N/A sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t), KM_SLEEP);
3853N/A sv->setno = MD_MIN2SET(mnum);
3853N/A sv->key = un->un_key;
3853N/A vtoc_id = un->c.un_vtoc_id;
3853N/A
3853N/A /*
3853N/A * Remove self from the namespace
3853N/A */
3853N/A if (un->c.un_revision & MD_FN_META_DEV) {
3853N/A (void) md_rem_selfname(un->c.un_self_id);
3853N/A }
3853N/A
3853N/A /* Remove the unit structure */
3853N/A mddb_deleterec_wrapper(un->c.un_record_id);
3853N/A
3853N/A if (vtoc_id)
3853N/A mddb_deleterec_wrapper(vtoc_id);
3853N/A
3853N/A SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, TAG_METADEVICE,
3853N/A MD_MIN2SET(mnum), MD_MIN2UNIT(mnum));
3853N/A
3853N/A /*
3853N/A * remove the underlying device name from the metadb. if other
3853N/A * soft partitions are built on this device, this will simply
3853N/A * decrease the reference count for this device. otherwise the
3884N/A * name record for this device will be removed from the metadb.
3884N/A */
3884N/A md_rem_names(sv, 1);
3884N/A kmem_free(sv, sizeof (sv_dev_t));
4500N/A}
4500N/A
3884N/A/*
3853N/A * FUNCTION: sp_send_stat_msg
3853N/A * INPUT: un - unit reference
3853N/A * status - status to be sent to master node
3853N/A * MD_SP_OK - soft-partition is now OK
3853N/A * MD_SP_ERR " " errored
3853N/A * OUTPUT: none.
3853N/A * RETURNS: void.
3853N/A * PURPOSE: send a soft-partition status change to the master node. If the
3853N/A * message succeeds we simply return. If it fails we panic as the
3853N/A * cluster-wide view of the metadevices is now inconsistent.
3853N/A * CALLING CONTEXT:
3853N/A * Blockable. No locks can be held.
3853N/A */
3853N/Astatic void
3853N/Asp_send_stat_msg(mp_unit_t *un, sp_status_t status)
3853N/A{
3853N/A md_mn_msg_sp_setstat_t sp_msg;
3853N/A md_mn_kresult_t *kres;
3853N/A set_t setno = MD_UN2SET(un);
3853N/A int rval;
3853N/A const char *str = (status == MD_SP_ERR) ? "MD_SP_ERR" : "MD_SP_OK";
3853N/A int nretries = 0;
3853N/A
3853N/A sp_msg.sp_setstat_mnum = MD_SID(un);
3853N/A sp_msg.sp_setstat_status = status;
3853N/A
3853N/A kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
3853N/A
3853N/Aspss_msg:
3853N/A rval = mdmn_ksend_message(setno, MD_MN_MSG_SP_SETSTAT2, MD_MSGF_NO_LOG,
3853N/A 0, (char *)&sp_msg, sizeof (sp_msg), kres);
3853N/A
3853N/A if (!MDMN_KSEND_MSG_OK(rval, kres)) {
3853N/A mdmn_ksend_show_error(rval, kres, "MD_MN_MSG_SP_SETSTAT2");
3853N/A /* If we're shutting down already, pause things here. */
3853N/A if (kres->kmmr_comm_state == MDMNE_RPC_FAIL) {
3853N/A while (!md_mn_is_commd_present()) {
3853N/A delay(md_hz);
3853N/A }
3853N/A /*
5085N/A * commd is available again. Retry the message once.
3853N/A * If it fails we panic as the system is in an
3853N/A * unexpected state.
3853N/A */
3853N/A if (nretries++ == 0)
3853N/A goto spss_msg;
3853N/A }
3853N/A /*
3853N/A * Panic as we are now in an inconsistent state.
3853N/A */
3853N/A cmn_err(CE_PANIC, "md: %s: %s could not be set on all nodes\n",
3853N/A md_shortname(MD_SID(un)), str);
3853N/A }
3853N/A
3853N/A kmem_free(kres, sizeof (md_mn_kresult_t));
3853N/A}
3853N/A
3853N/A/*
3853N/A * FUNCTION: sp_finish_error
3853N/A * INPUT: ps - parent save structure for error-ed I/O.
3853N/A * lock_held - set if the unit readerlock is held
3853N/A * OUTPUT: none.
3853N/A * RETURNS: void.
3853N/A * PURPOSE: report a driver error
3853N/A */
3853N/Astatic void
3853N/Asp_finish_error(md_spps_t *ps, int lock_held)
3853N/A{
3853N/A struct buf *pb = ps->ps_bp;
3853N/A mdi_unit_t *ui = ps->ps_ui;
3853N/A md_dev64_t un_dev; /* underlying device */
3853N/A md_dev64_t md_dev = md_expldev(pb->b_edev); /* metadev in error */
4294N/A char *str;
4294N/A
4294N/A un_dev = md_expldev(ps->ps_un->un_dev);
4294N/A /* set error type */
4294N/A if (pb->b_flags & B_READ) {
4294N/A str = "read";
4294N/A } else {
4294N/A str = "write";
4294N/A }
4294N/A
4294N/A
4294N/A SPPS_FREE(sp_parent_cache, ps);
4294N/A pb->b_flags |= B_ERROR;
4294N/A
3853N/A md_kstat_done(ui, pb, 0);
3853N/A
3853N/A if (lock_held) {
3853N/A md_unit_readerexit(ui);
3853N/A }
3853N/A md_biodone(pb);
3853N/A
3853N/A cmn_err(CE_WARN, "md: %s: %s error on %s",
3853N/A md_shortname(md_getminor(md_dev)), str,
3853N/A md_devname(MD_DEV2SET(md_dev), un_dev, NULL, 0));
3853N/A}
3853N/A
3853N/A
3853N/A/*
3858N/A * FUNCTION: sp_xmit_ok
3858N/A * INPUT: dq - daemon queue referencing failing ps structure
3858N/A * OUTPUT: none.
3858N/A * RETURNS: void.
3858N/A * PURPOSE: send a message to the master node in a multi-owner diskset to
3858N/A * update all attached nodes view of the soft-part to be MD_SP_OK.
3858N/A * CALLING CONTEXT:
3858N/A * Blockable. No unit lock held.
3858N/A */
3858N/Astatic void
3858N/Asp_xmit_ok(daemon_queue_t *dq)
3858N/A{
3858N/A md_spps_t *ps = (md_spps_t *)dq;
3858N/A
3858N/A /* Send a MD_MN_MSG_SP_SETSTAT to the master */
3858N/A sp_send_stat_msg(ps->ps_un, MD_SP_OK);
3858N/A
3858N/A /*
3858N/A * Successfully transmitted error state to all nodes, now release this
3858N/A * parent structure.
3858N/A */
3858N/A SPPS_FREE(sp_parent_cache, ps);
3858N/A}
4500N/A
4500N/A/*
4500N/A * FUNCTION: sp_xmit_error
3858N/A * INPUT: dq - daemon queue referencing failing ps structure
3858N/A * OUTPUT: none.
3858N/A * RETURNS: void.
3858N/A * PURPOSE: send a message to the master node in a multi-owner diskset to
3858N/A * update all attached nodes view of the soft-part to be MD_SP_ERR.
3858N/A * CALLING CONTEXT:
3858N/A * Blockable. No unit lock held.
3858N/A */
3858N/Astatic void
3858N/Asp_xmit_error(daemon_queue_t *dq)
3858N/A{
3858N/A md_spps_t *ps = (md_spps_t *)dq;
3858N/A
3858N/A /* Send a MD_MN_MSG_SP_SETSTAT to the master */
3858N/A sp_send_stat_msg(ps->ps_un, MD_SP_ERR);
3858N/A
3858N/A /*
3858N/A * Successfully transmitted error state to all nodes, now release this
4530N/A * parent structure.
3858N/A */
3853N/A SPPS_FREE(sp_parent_cache, ps);
3853N/A}
3853N/Astatic void
3853N/Asp_send_stat_ok(mp_unit_t *un)
3853N/A{
4500N/A minor_t mnum = MD_SID(un);
4500N/A md_spps_t *ps;
3858N/A
4500N/A ps = kmem_cache_alloc(sp_parent_cache, MD_ALLOCFLAGS);
3853N/A sp_parent_init(ps);
4500N/A ps->ps_un = un;
3853N/A ps->ps_ui = MDI_UNIT(mnum);
4500N/A
4500N/A daemon_request(&md_sp_daemon, sp_xmit_ok, (daemon_queue_t *)ps,
4803N/A REQ_OLD);
4803N/A}
5027N/A
4500N/Astatic void
4500N/Asp_send_stat_err(mp_unit_t *un)
4500N/A{
4500N/A minor_t mnum = MD_SID(un);
4500N/A md_spps_t *ps;
4500N/A
4500N/A ps = kmem_cache_alloc(sp_parent_cache, MD_ALLOCFLAGS);
4500N/A sp_parent_init(ps);
3853N/A ps->ps_un = un;
4500N/A ps->ps_ui = MDI_UNIT(mnum);
4500N/A
4500N/A daemon_request(&md_sp_daemon, sp_xmit_error, (daemon_queue_t *)ps,
4500N/A REQ_OLD);
4500N/A}
4500N/A
4500N/A
3853N/A/*
3853N/A * FUNCTION: sp_error()
3853N/A * INPUT: ps - parent save structure for error-ed I/O.
3853N/A * OUTPUT: none.
3853N/A * RETURNS: void.
3853N/A * PURPOSE: report a driver error.
4500N/A * CALLING CONTEXT:
4500N/A * Interrupt - non-blockable
3853N/A */
3853N/Astatic void
3853N/Asp_error(md_spps_t *ps)
3853N/A{
4500N/A set_t setno = MD_UN2SET(ps->ps_un);
4500N/A
3853N/A /*
3853N/A * Drop the mutex associated with this request before (potentially)
3853N/A * enqueuing the free onto a separate thread. We have to release the
3853N/A * mutex before destroying the parent structure.
3853N/A */
3853N/A if (!(ps->ps_flags & MD_SPPS_DONTFREE)) {
3853N/A if (MUTEX_HELD(&ps->ps_mx)) {
3853N/A mutex_exit(&ps->ps_mx);
3853N/A }
3853N/A } else {
3853N/A /*
3853N/A * this should only ever happen if we are panicking,
3853N/A * since DONTFREE is only set on the parent if panicstr
3853N/A * is non-NULL.
3853N/A */
3853N/A ASSERT(panicstr);
3853N/A }
3853N/A
3853N/A /*
3853N/A * For a multi-owner set we need to send a message to the master so that
3853N/A * all nodes get the errored status when we first encounter it. To avoid
3853N/A * deadlocking when multiple soft-partitions encounter an error on one
3853N/A * physical unit we drop the unit readerlock before enqueueing the
3853N/A * request. That way we can service any messages that require a
3853N/A * writerlock to be held. Additionally, to avoid deadlocking when at
3853N/A * the bottom of a metadevice stack and a higher level mirror has
3853N/A * multiple requests outstanding on this soft-part, we clone the ps
3853N/A * that failed and pass the error back up the stack to release the
3853N/A * reference that this i/o may have in the higher-level metadevice.
3853N/A * The other nodes in the cluster just have to modify the soft-part
3853N/A * status and we do not need to block the i/o completion for this.
3853N/A */
3853N/A if (MD_MNSET_SETNO(setno)) {
3853N/A md_spps_t *err_ps;
3853N/A err_ps = kmem_cache_alloc(sp_parent_cache, MD_ALLOCFLAGS);
3853N/A sp_parent_init(err_ps);
3853N/A
3853N/A err_ps->ps_un = ps->ps_un;
3853N/A err_ps->ps_ui = ps->ps_ui;
3853N/A
3853N/A md_unit_readerexit(ps->ps_ui);
3853N/A
4500N/A daemon_request(&md_sp_daemon, sp_xmit_error,
4500N/A (daemon_queue_t *)err_ps, REQ_OLD);
4500N/A
4500N/A sp_finish_error(ps, 0);
4500N/A
4500N/A return;
4500N/A } else {
4500N/A ps->ps_un->un_status = MD_SP_ERR;
4500N/A }
3853N/A
3853N/A /* Flag the error */
4500N/A sp_finish_error(ps, 1);
4803N/A
3853N/A}
3853N/A
3884N/A/*
3884N/A * FUNCTION: sp_mapbuf()
3884N/A * INPUT: un - unit structure for soft partition we are doing
3884N/A * I/O on.
3884N/A * voff - virtual offset in soft partition to map.
3884N/A * bcount - # of blocks in the I/O.
4500N/A * OUTPUT: bp - translated buffer to be passed down to next layer.
4500N/A * RETURNS: 1 - request must be fragmented, more work to do,
4500N/A * 0 - request satisified, no more work to do
3884N/A * -1 - error
4500N/A * PURPOSE: Map the the virtual offset in the soft partition (passed
4500N/A * in via voff) to the "physical" offset on whatever the soft
4500N/A * partition is built on top of. We do this by doing a binary
4500N/A * search of the extent array in the soft partition unit
4500N/A * structure. Once the current extent is found, we do the
4500N/A * translation, determine if the I/O will cross extent
5085N/A * boundaries (if so, we have to fragment the I/O), then
4500N/A * fill in the buf structure to be passed down to the next layer.
4500N/A */
3884N/Astatic int
3884N/Asp_mapbuf(
3884N/A mp_unit_t *un,
3884N/A sp_ext_offset_t voff,
3884N/A sp_ext_length_t bcount,
3884N/A buf_t *bp
3884N/A)
3884N/A{
3884N/A int lo, mid, hi, found, more;
3884N/A size_t new_bcount;
3884N/A sp_ext_offset_t new_blkno;
3884N/A sp_ext_offset_t new_offset;
3884N/A sp_ext_offset_t ext_endblk;
3884N/A md_dev64_t new_edev;
3884N/A extern unsigned md_maxphys;
4500N/A
3853N/A found = 0;
3858N/A lo = 0;
3853N/A hi = un->un_numexts - 1;
3853N/A
3853N/A /*
3853N/A * do a binary search to find the extent that contains the
4500N/A * starting offset. after this loop, mid contains the index
4500N/A * of the correct extent.
4500N/A */
4500N/A while (lo <= hi && !found) {
4500N/A mid = (lo + hi) / 2;
4500N/A /* is the starting offset contained within the mid-ext? */
3853N/A if (voff >= un->un_ext[mid].un_voff &&
4500N/A voff < un->un_ext[mid].un_voff + un->un_ext[mid].un_len)
4500N/A found = 1;
4853N/A else if (voff < un->un_ext[mid].un_voff)
4853N/A hi = mid - 1;
4853N/A else /* voff > un->un_ext[mid].un_voff + un->un_ext[mid].len */
3853N/A lo = mid + 1;
4853N/A }
4853N/A
4853N/A if (!found) {
4853N/A cmn_err(CE_WARN, "sp_mapbuf: invalid offset %llu.\n", voff);
4853N/A return (-1);
4853N/A }
4853N/A
4853N/A /* translate to underlying physical offset/device */
4853N/A new_offset = voff - un->un_ext[mid].un_voff;
4853N/A new_blkno = un->un_ext[mid].un_poff + new_offset;
4853N/A new_edev = un->un_dev;
4853N/A
4853N/A /* determine if we need to break the I/O into fragments */
4853N/A ext_endblk = un->un_ext[mid].un_voff + un->un_ext[mid].un_len;
4853N/A if (voff + btodb(bcount) > ext_endblk) {
4853N/A new_bcount = dbtob(ext_endblk - voff);
4853N/A more = 1;
4853N/A } else {
4853N/A new_bcount = bcount;
4853N/A more = 0;
4853N/A }
3853N/A
3884N/A /* only break up the I/O if we're not built on another metadevice */
3884N/A if ((md_getmajor(new_edev) != md_major) && (new_bcount > md_maxphys)) {
4853N/A new_bcount = md_maxphys;
3884N/A more = 1;
3884N/A }
3884N/A if (bp != (buf_t *)NULL) {
3884N/A /* do bp updates */
4853N/A bp->b_bcount = new_bcount;
4853N/A bp->b_lblkno = new_blkno;
4853N/A bp->b_edev = md_dev64_to_dev(new_edev);
4853N/A }
3853N/A return (more);
4853N/A}
4853N/A
4853N/A/*
4853N/A * FUNCTION: sp_validate()
3853N/A * INPUT: un - unit structure to be validated.
3853N/A * OUTPUT: none.
3853N/A * RETURNS: 0 - soft partition ok.
4500N/A * -1 - error.
4141N/A * PURPOSE: called on open to sanity check the soft partition. In
4500N/A * order to open a soft partition:
4500N/A * - it must have at least one extent
4500N/A * - the extent info in core and on disk must match
4518N/A * - it may not be in an intermediate state (which would
4500N/A * imply that a two-phase commit was interrupted)
4500N/A *
4500N/A * If the extent checking fails (B_ERROR returned from the read
4500N/A * strategy call) _and_ we're a multi-owner diskset, we send a
4500N/A * message to the master so that all nodes inherit the same view
4500N/A * of the soft partition.
4500N/A * If we are checking a soft-part that is marked as in error, and
4500N/A * we can actually read and validate the watermarks we send a
4500N/A * message to clear the error to the master node.
4500N/A */
4500N/Astatic int
4500N/Asp_validate(mp_unit_t *un)
4500N/A{
4500N/A uint_t ext;
4803N/A struct buf *buf;
4500N/A sp_ext_length_t len;
4500N/A mp_watermark_t *wm;
4500N/A set_t setno;
4500N/A int reset_error = 0;
4500N/A
4803N/A setno = MD_UN2SET(un);
4500N/A
4500N/A /* sanity check unit structure components ?? */
4141N/A if (un->un_status != MD_SP_OK) {
3853N/A if (un->un_status != MD_SP_ERR) {
3853N/A cmn_err(CE_WARN, "md: %s: open failed, soft partition "
3853N/A "status is %u.",
3853N/A md_shortname(MD_SID(un)),
3853N/A un->un_status);
3858N/A return (-1);
3853N/A } else {
3853N/A cmn_err(CE_WARN, "md: %s: open of soft partition "
4500N/A "in Errored state.",
4500N/A md_shortname(MD_SID(un)));
4500N/A reset_error = 1;
4500N/A }
3853N/A }
3853N/A
3853N/A if (un->un_numexts == 0) {
3853N/A cmn_err(CE_WARN, "md: %s: open failed, soft partition does "
3853N/A "not have any extents.", md_shortname(MD_SID(un)));
3853N/A return (-1);
3853N/A }
4500N/A
4500N/A len = 0LL;
4500N/A for (ext = 0; ext < un->un_numexts; ext++) {
4500N/A
3853N/A /* tally extent lengths to check total size */
3853N/A len += un->un_ext[ext].un_len;
3853N/A
3853N/A /* allocate buffer for watermark */
3853N/A buf = getrbuf(KM_SLEEP);
3853N/A
3853N/A /* read watermark */
3853N/A buf->b_flags = B_READ;
3853N/A buf->b_edev = md_dev64_to_dev(un->un_dev);
3853N/A buf->b_iodone = NULL;
3853N/A buf->b_proc = NULL;
3853N/A buf->b_bcount = sizeof (mp_watermark_t);
3853N/A buf->b_lblkno = un->un_ext[ext].un_poff - 1;
3853N/A buf->b_bufsize = sizeof (mp_watermark_t);
3853N/A buf->b_un.b_addr = kmem_alloc(sizeof (mp_watermark_t),
3853N/A KM_SLEEP);
3853N/A
3853N/A /*
3853N/A * make the call non-blocking so that it is not affected
3853N/A * by a set take.
3853N/A */
3853N/A md_call_strategy(buf, MD_STR_MAPPED|MD_NOBLOCK, NULL);
3853N/A (void) biowait(buf);
3853N/A
3853N/A if (buf->b_flags & B_ERROR) {
3853N/A cmn_err(CE_WARN, "md: %s: open failed, could not "
3853N/A "read watermark at block %llu for extent %u, "
3853N/A "error %d.", md_shortname(MD_SID(un)),
3853N/A buf->b_lblkno, ext, buf->b_error);
3853N/A kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t));
3853N/A freerbuf(buf);
3853N/A
3853N/A /*
3853N/A * If we're a multi-owner diskset we send a message
3853N/A * indicating that this soft-part has an invalid
3853N/A * extent to the master node. This ensures a consistent
3853N/A * view of the soft-part across the cluster.
3853N/A */
3853N/A if (MD_MNSET_SETNO(setno)) {
3853N/A sp_send_stat_err(un);
3853N/A }
3853N/A return (-1);
3853N/A }
3853N/A
3853N/A wm = (mp_watermark_t *)buf->b_un.b_addr;
3853N/A
3853N/A /* make sure the checksum is correct first */
3853N/A if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum,
3853N/A (uint_t)sizeof (mp_watermark_t), (uchar_t *)NULL)) {
3853N/A cmn_err(CE_WARN, "md: %s: open failed, watermark "
3853N/A "at block %llu for extent %u does not have a "
4294N/A "valid checksum 0x%08x.", md_shortname(MD_SID(un)),
4294N/A buf->b_lblkno, ext, wm->wm_checksum);
4294N/A kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t));
4294N/A freerbuf(buf);
4294N/A return (-1);
4294N/A }
4294N/A
4294N/A if (wm->wm_magic != MD_SP_MAGIC) {
4294N/A cmn_err(CE_WARN, "md: %s: open failed, watermark "
4294N/A "at block %llu for extent %u does not have a "
4294N/A "valid watermark magic number, expected 0x%x, "
4294N/A "found 0x%x.", md_shortname(MD_SID(un)),
4294N/A buf->b_lblkno, ext, MD_SP_MAGIC, wm->wm_magic);
4294N/A kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t));
4294N/A freerbuf(buf);
4294N/A return (-1);
4294N/A }
4294N/A
4294N/A /* make sure sequence number matches the current extent */
4294N/A if (wm->wm_seq != ext) {
4294N/A cmn_err(CE_WARN, "md: %s: open failed, watermark "
3853N/A "at block %llu for extent %u has invalid "
3853N/A "sequence number %u.", md_shortname(MD_SID(un)),
3853N/A buf->b_lblkno, ext, wm->wm_seq);
3853N/A kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t));
3853N/A freerbuf(buf);
3853N/A return (-1);
3853N/A }
3853N/A
3853N/A /* make sure watermark length matches unit structure */
3853N/A if (wm->wm_length != un->un_ext[ext].un_len) {
3853N/A cmn_err(CE_WARN, "md: %s: open failed, watermark "
3853N/A "at block %llu for extent %u has inconsistent "
3853N/A "length, expected %llu, found %llu.",
3853N/A md_shortname(MD_SID(un)), buf->b_lblkno,
3853N/A ext, un->un_ext[ext].un_len,
3853N/A (u_longlong_t)wm->wm_length);
3853N/A kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t));
3853N/A freerbuf(buf);
3853N/A return (-1);
3853N/A }
3853N/A
3853N/A /*
3853N/A * make sure the type is a valid soft partition and not
3853N/A * a free extent or the end.
4136N/A */
3853N/A if (wm->wm_type != EXTTYP_ALLOC) {
3853N/A cmn_err(CE_WARN, "md: %s: open failed, watermark "
3853N/A "at block %llu for extent %u is not marked "
3853N/A "as in-use, type = %u.", md_shortname(MD_SID(un)),
3853N/A buf->b_lblkno, ext, wm->wm_type);
3853N/A kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t));
3853N/A freerbuf(buf);
3853N/A return (-1);
3853N/A }
3853N/A /* free up buffer */
3853N/A kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t));
3853N/A freerbuf(buf);
3853N/A }
3853N/A
3853N/A if (len != un->un_length) {
3853N/A cmn_err(CE_WARN, "md: %s: open failed, computed length "
4923N/A "%llu != expected length %llu.", md_shortname(MD_SID(un)),
3853N/A len, un->un_length);
4136N/A return (-1);
3853N/A }
4500N/A
3853N/A /*
3853N/A * If we're a multi-owner set _and_ reset_error is set, we should clear
3853N/A * the error condition on all nodes in the set. Use SP_SETSTAT2 with
3853N/A * MD_SP_OK.
3853N/A */
3853N/A if (MD_MNSET_SETNO(setno) && reset_error) {
3853N/A sp_send_stat_ok(un);
3853N/A }
3853N/A return (0);
3853N/A}
3853N/A
3853N/A/*
3853N/A * FUNCTION: sp_done()
3853N/A * INPUT: child_buf - buffer attached to child save structure.
3853N/A * this is the buffer on which I/O has just
3853N/A * completed.
3853N/A * OUTPUT: none.
3853N/A * RETURNS: 0 - success.
3853N/A * 1 - error.
3853N/A * PURPOSE: called on I/O completion.
3853N/A */
3853N/Astatic int
3853N/Asp_done(struct buf *child_buf)
3853N/A{
3853N/A struct buf *parent_buf;
3853N/A mdi_unit_t *ui;
3853N/A md_spps_t *ps;
3853N/A md_spcs_t *cs;
3853N/A
3853N/A /* find the child save structure to which this buffer belongs */
3853N/A cs = (md_spcs_t *)((caddr_t)child_buf -
3853N/A (sizeof (md_spcs_t) - sizeof (buf_t)));
3853N/A /* now get the parent save structure */
3853N/A ps = cs->cs_ps;
3853N/A parent_buf = ps->ps_bp;
3853N/A
3853N/A mutex_enter(&ps->ps_mx);
3853N/A /* pass any errors back up to the parent */
3853N/A if (child_buf->b_flags & B_ERROR) {
3853N/A ps->ps_flags |= MD_SPPS_ERROR;
3853N/A parent_buf->b_error = child_buf->b_error;
3853N/A }
3853N/A /* mapout, if needed */
3853N/A if (child_buf->b_flags & B_REMAPPED)
3853N/A bp_mapout(child_buf);
3853N/A
3853N/A ps->ps_frags--;
3853N/A if (ps->ps_frags != 0) {
3853N/A /*
3853N/A * if this parent has more children, we just free the
3853N/A * child and return.
3853N/A */
3853N/A kmem_cache_free(sp_child_cache, cs);
3853N/A mutex_exit(&ps->ps_mx);
3853N/A return (1);
3853N/A }
5085N/A /* there are no more children */
5085N/A kmem_cache_free(sp_child_cache, cs);
5085N/A if (ps->ps_flags & MD_SPPS_ERROR) {
5085N/A sp_error(ps);
5085N/A return (1);
5085N/A }
5085N/A ui = ps->ps_ui;
5085N/A if (!(ps->ps_flags & MD_SPPS_DONTFREE)) {
5085N/A mutex_exit(&ps->ps_mx);
5085N/A } else {
5085N/A /*
5085N/A * this should only ever happen if we are panicking,
5085N/A * since DONTFREE is only set on the parent if panicstr
5085N/A * is non-NULL.
5085N/A */
5085N/A ASSERT(panicstr);
5085N/A }
5085N/A SPPS_FREE(sp_parent_cache, ps);
5085N/A md_kstat_done(ui, parent_buf, 0);
5085N/A md_unit_readerexit(ui);
5085N/A md_biodone(parent_buf);
5085N/A return (0);
3853N/A}
3853N/A
3853N/A/*
3853N/A * FUNCTION: md_sp_strategy()
3853N/A * INPUT: parent_buf - parent buffer
3853N/A * flag - flags
3853N/A * private - private data
3853N/A * OUTPUT: none.
3853N/A * RETURNS: void.
3853N/A * PURPOSE: Soft partitioning I/O strategy. Performs the main work
3853N/A * needed to do I/O to a soft partition. The basic
3853N/A * algorithm is as follows:
3853N/A * - Allocate a child save structure to keep track
3853N/A * of the I/O we are going to pass down.
3853N/A * - Map the I/O to the correct extent in the soft
3853N/A * partition (see sp_mapbuf()).
3853N/A * - bioclone() the buffer and pass it down the
3853N/A * stack using md_call_strategy.
3853N/A * - If the I/O needs to split across extents,
3853N/A * repeat the above steps until all fragments
3853N/A * are finished.
3853N/A */
3853N/Astatic void
3853N/Amd_sp_strategy(buf_t *parent_buf, int flag, void *private)
3853N/A{
3853N/A md_spps_t *ps;
3853N/A md_spcs_t *cs;
3853N/A int more;
3853N/A mp_unit_t *un;
3853N/A mdi_unit_t *ui;
3853N/A size_t current_count;
3853N/A off_t current_offset;
3853N/A sp_ext_offset_t current_blkno;
3853N/A buf_t *child_buf;
3853N/A set_t setno = MD_MIN2SET(getminor(parent_buf->b_edev));
3853N/A int strat_flag = flag;
3853N/A
3853N/A /*
3853N/A * When doing IO to a multi owner meta device, check if set is halted.
4500N/A * We do this check without the needed lock held, for performance
4500N/A * reasons.
4500N/A * If an IO just slips through while the set is locked via an
4500N/A * MD_MN_SUSPEND_SET, we don't care about it.
4500N/A * Only check for suspension if we are a top-level i/o request
4500N/A * (MD_STR_NOTTOP is cleared in 'flag');
4500N/A */
4500N/A if ((md_set[setno].s_status & (MD_SET_HALTED | MD_SET_MNSET)) ==
4500N/A (MD_SET_HALTED | MD_SET_MNSET)) {
4500N/A if ((flag & MD_STR_NOTTOP) == 0) {
4500N/A mutex_enter(&md_mx);
4500N/A /* Here we loop until the set is no longer halted */
4500N/A while (md_set[setno].s_status & MD_SET_HALTED) {
4500N/A cv_wait(&md_cv, &md_mx);
4500N/A }
4500N/A mutex_exit(&md_mx);
4500N/A }
4500N/A }
4500N/A
4500N/A ui = MDI_UNIT(getminor(parent_buf->b_edev));
4500N/A
4500N/A md_kstat_waitq_enter(ui);
4500N/A
3853N/A un = (mp_unit_t *)md_unit_readerlock(ui);
3853N/A
3853N/A if ((flag & MD_NOBLOCK) == 0) {
3853N/A if (md_inc_iocount(setno) != 0) {
3853N/A parent_buf->b_flags |= B_ERROR;
3853N/A parent_buf->b_error = ENXIO;
3853N/A parent_buf->b_resid = parent_buf->b_bcount;
3853N/A md_kstat_waitq_exit(ui);
3853N/A md_unit_readerexit(ui);
3853N/A biodone(parent_buf);
3853N/A return;
3853N/A }
3853N/A } else {
3853N/A md_inc_iocount_noblock(setno);
3853N/A }
3853N/A
3853N/A if (!(flag & MD_STR_NOTTOP)) {
3853N/A if (md_checkbuf(ui, (md_unit_t *)un, parent_buf) != 0) {
3853N/A md_kstat_waitq_exit(ui);
3853N/A return;
3853N/A }
3853N/A }
3853N/A
3853N/A ps = kmem_cache_alloc(sp_parent_cache, MD_ALLOCFLAGS);
3853N/A sp_parent_init(ps);
3853N/A
3853N/A /*
3853N/A * Save essential information from the original buffhdr
3853N/A * in the parent.
3853N/A */
3853N/A ps->ps_un = un;
3853N/A ps->ps_ui = ui;
3853N/A ps->ps_bp = parent_buf;
3853N/A ps->ps_addr = parent_buf->b_un.b_addr;
3853N/A
3853N/A current_count = parent_buf->b_bcount;
3853N/A current_blkno = (sp_ext_offset_t)parent_buf->b_blkno;
3853N/A current_offset = 0;
3853N/A
3853N/A /*
3853N/A * if we are at the top and we are panicking,
3853N/A * we don't free in order to save state.
3853N/A */
3853N/A if (!(flag & MD_STR_NOTTOP) && (panicstr != NULL))
3853N/A ps->ps_flags |= MD_SPPS_DONTFREE;
3853N/A
3853N/A md_kstat_waitq_to_runq(ui);
3853N/A
3853N/A ps->ps_frags++;
3853N/A
3853N/A /*
3853N/A * Mark this i/o as MD_STR_ABR if we've had ABR enabled on this
3853N/A * metadevice.
3853N/A */
3853N/A if (ui->ui_tstate & MD_ABR_CAP)
3853N/A strat_flag |= MD_STR_ABR;
3853N/A
3853N/A /*
3853N/A * this loop does the main work of an I/O. we allocate a
3853N/A * a child save for each buf, do the logical to physical
3853N/A * mapping, decide if we need to frag the I/O, clone the
3853N/A * new I/O to pass down the stack. repeat until we've
3994N/A * taken care of the entire buf that was passed to us.
3853N/A */
3853N/A do {
3853N/A cs = kmem_cache_alloc(sp_child_cache, MD_ALLOCFLAGS);
3853N/A sp_child_init(cs);
4530N/A child_buf = &cs->cs_buf;
4530N/A cs->cs_ps = ps;
4530N/A
4530N/A more = sp_mapbuf(un, current_blkno, current_count, child_buf);
4530N/A if (more == -1) {
4530N/A parent_buf->b_flags |= B_ERROR;
4530N/A parent_buf->b_error = EIO;
4530N/A md_kstat_done(ui, parent_buf, 0);
4530N/A md_unit_readerexit(ui);
4530N/A md_biodone(parent_buf);
3853N/A kmem_cache_free(sp_parent_cache, ps);
3853N/A return;
3853N/A }
3853N/A
3853N/A child_buf = md_bioclone(parent_buf, current_offset,
3853N/A child_buf->b_bcount, child_buf->b_edev,
4530N/A child_buf->b_blkno, sp_done, child_buf,
4530N/A KM_NOSLEEP);
4530N/A /* calculate new offset, counts, etc... */
4530N/A current_offset += child_buf->b_bcount;
4530N/A current_count -= child_buf->b_bcount;
4530N/A current_blkno += (sp_ext_offset_t)(btodb(child_buf->b_bcount));
4530N/A
4530N/A if (more) {
4530N/A mutex_enter(&ps->ps_mx);
4530N/A ps->ps_frags++;
4530N/A mutex_exit(&ps->ps_mx);
4530N/A }
4530N/A
4530N/A md_call_strategy(child_buf, strat_flag, private);
3853N/A } while (more);
3853N/A
3853N/A if (!(flag & MD_STR_NOTTOP) && (panicstr != NULL)) {
3853N/A while (!(ps->ps_flags & MD_SPPS_DONE)) {
3853N/A md_daemon(1, &md_done_daemon);
3853N/A }
3853N/A kmem_cache_free(sp_parent_cache, ps);
3853N/A }
3853N/A}
3853N/A
3853N/A/*
3853N/A * FUNCTION: sp_directed_read()
3853N/A * INPUT: mnum - minor number
3853N/A * vdr - vol_directed_rd_t from user
3853N/A * mode - access mode for copying data out.
3853N/A * OUTPUT: none.
3853N/A * RETURNS: 0 - success
3853N/A * Exxxxx - failure error-code
4500N/A * PURPOSE: Construct the necessary sub-device i/o requests to perform the
4500N/A * directed read as requested by the user. This is essentially the
4500N/A * same as md_sp_strategy() with the exception being that the
4500N/A * underlying 'md_call_strategy' is replaced with an ioctl call.
4500N/A */
4500N/Aint
4773N/Asp_directed_read(minor_t mnum, vol_directed_rd_t *vdr, int mode)
4500N/A{
4500N/A md_spps_t *ps;
4500N/A md_spcs_t *cs;
4500N/A int more;
4773N/A mp_unit_t *un;
4773N/A mdi_unit_t *ui;
4500N/A size_t current_count;
3853N/A off_t current_offset;
3853N/A sp_ext_offset_t current_blkno;
4500N/A buf_t *child_buf, *parent_buf;
4500N/A void *kbuffer;
4500N/A vol_directed_rd_t cvdr;
4500N/A caddr_t userbuf;
3853N/A offset_t useroff;
3853N/A int ret = 0;
3853N/A
3853N/A ui = MDI_UNIT(mnum);
3853N/A
3853N/A md_kstat_waitq_enter(ui);
3853N/A
3853N/A bzero(&cvdr, sizeof (cvdr));
3853N/A
3853N/A un = (mp_unit_t *)md_unit_readerlock(ui);
3853N/A
3853N/A /*
3853N/A * Construct a parent_buf header which reflects the user-supplied
3853N/A * request.
3853N/A */
3853N/A
3853N/A kbuffer = kmem_alloc(vdr->vdr_nbytes, KM_NOSLEEP);
3853N/A if (kbuffer == NULL) {
3853N/A vdr->vdr_flags |= DKV_DMR_ERROR;
3853N/A md_kstat_waitq_exit(ui);
3853N/A md_unit_readerexit(ui);
3853N/A return (ENOMEM);
3853N/A }
3853N/A
3853N/A parent_buf = getrbuf(KM_NOSLEEP);
3853N/A if (parent_buf == NULL) {
3853N/A vdr->vdr_flags |= DKV_DMR_ERROR;
3853N/A md_kstat_waitq_exit(ui);
3853N/A md_unit_readerexit(ui);
3853N/A kmem_free(kbuffer, vdr->vdr_nbytes);
3853N/A return (ENOMEM);
3853N/A }
3853N/A parent_buf->b_un.b_addr = kbuffer;
3853N/A parent_buf->b_flags = B_READ;
3853N/A parent_buf->b_bcount = vdr->vdr_nbytes;
3853N/A parent_buf->b_lblkno = lbtodb(vdr->vdr_offset);
3853N/A parent_buf->b_edev = un->un_dev;
3853N/A
3853N/A
3853N/A ps = kmem_cache_alloc(sp_parent_cache, MD_ALLOCFLAGS);
3853N/A sp_parent_init(ps);
3853N/A
3853N/A /*
3853N/A * Save essential information from the original buffhdr
3853N/A * in the parent.
3853N/A */
3853N/A ps->ps_un = un;
3853N/A ps->ps_ui = ui;
3853N/A ps->ps_bp = parent_buf;
3853N/A ps->ps_addr = parent_buf->b_un.b_addr;
3853N/A
3853N/A current_count = parent_buf->b_bcount;
3853N/A current_blkno = (sp_ext_offset_t)parent_buf->b_lblkno;
3853N/A current_offset = 0;
3853N/A
3853N/A md_kstat_waitq_to_runq(ui);
3853N/A
3853N/A ps->ps_frags++;
3853N/A vdr->vdr_bytesread = 0;
3853N/A
3853N/A /*
3853N/A * this loop does the main work of an I/O. we allocate a
3853N/A * a child save for each buf, do the logical to physical
3853N/A * mapping, decide if we need to frag the I/O, clone the
3853N/A * new I/O to pass down the stack. repeat until we've
3853N/A * taken care of the entire buf that was passed to us.
3853N/A */
3853N/A do {
3853N/A cs = kmem_cache_alloc(sp_child_cache, MD_ALLOCFLAGS);
3853N/A sp_child_init(cs);
3853N/A child_buf = &cs->cs_buf;
3853N/A cs->cs_ps = ps;
3853N/A
3853N/A more = sp_mapbuf(un, current_blkno, current_count, child_buf);
3853N/A if (more == -1) {
3853N/A ret = EIO;
3853N/A vdr->vdr_flags |= DKV_DMR_SHORT;
3853N/A kmem_cache_free(sp_child_cache, cs);
3853N/A goto err_out;
4676N/A }
3853N/A
3853N/A cvdr.vdr_flags = vdr->vdr_flags;
3853N/A cvdr.vdr_side = vdr->vdr_side;
3853N/A cvdr.vdr_nbytes = child_buf->b_bcount;
3853N/A cvdr.vdr_offset = ldbtob(child_buf->b_lblkno);
3853N/A /* Work out where we are in the allocated buffer */
3853N/A useroff = (offset_t)(uintptr_t)kbuffer;
3853N/A useroff = useroff + (offset_t)current_offset;
3853N/A cvdr.vdr_data = (void *)(uintptr_t)useroff;
3853N/A child_buf = md_bioclone(parent_buf, current_offset,
3853N/A child_buf->b_bcount, child_buf->b_edev,
3853N/A child_buf->b_blkno, NULL,
3853N/A child_buf, KM_NOSLEEP);
3853N/A /* calculate new offset, counts, etc... */
3853N/A current_offset += child_buf->b_bcount;
3853N/A current_count -= child_buf->b_bcount;
3853N/A current_blkno += (sp_ext_offset_t)(btodb(child_buf->b_bcount));
3853N/A
3853N/A if (more) {
3853N/A mutex_enter(&ps->ps_mx);
3853N/A ps->ps_frags++;
3853N/A mutex_exit(&ps->ps_mx);
3853N/A }
3853N/A
3853N/A ret = md_call_ioctl(child_buf->b_edev, DKIOCDMR, &cvdr,
3853N/A (mode | FKIOCTL), NULL);
3853N/A
3853N/A /*
3853N/A * Free the child structure as we've finished with it.
4500N/A * Normally this would be done by sp_done() but we're just
4500N/A * using md_bioclone() to segment the transfer and we never
4500N/A * issue a strategy request so the iodone will not be called.
4500N/A */
4500N/A kmem_cache_free(sp_child_cache, cs);
4500N/A if (ret == 0) {
4500N/A /* copyout the returned data to vdr_data + offset */
4500N/A userbuf = (caddr_t)kbuffer;
4500N/A userbuf += (caddr_t)(cvdr.vdr_data) - (caddr_t)kbuffer;
4500N/A if (ddi_copyout(userbuf, vdr->vdr_data,
4500N/A cvdr.vdr_bytesread, mode)) {
4500N/A ret = EFAULT;
4500N/A goto err_out;
4500N/A }
4500N/A vdr->vdr_bytesread += cvdr.vdr_bytesread;
4500N/A } else {
4500N/A goto err_out;
4500N/A }
4500N/A } while (more);
4500N/A
3853N/A /*
3853N/A * Update the user-supplied vol_directed_rd_t structure with the
3853N/A * contents of the last issued child request.
3853N/A */
3853N/A vdr->vdr_flags = cvdr.vdr_flags;
3853N/A vdr->vdr_side = cvdr.vdr_side;
3853N/A bcopy(cvdr.vdr_side_name, vdr->vdr_side_name, VOL_SIDENAME);
3853N/A
3853N/Aerr_out:
3853N/A if (ret != 0) {
3853N/A vdr->vdr_flags |= DKV_DMR_ERROR;
3853N/A }
3853N/A if (vdr->vdr_bytesread != vdr->vdr_nbytes) {
3853N/A vdr->vdr_flags |= DKV_DMR_SHORT;
3853N/A }
3853N/A kmem_cache_free(sp_parent_cache, ps);
4136N/A kmem_free(kbuffer, vdr->vdr_nbytes);
4136N/A freerbuf(parent_buf);
4136N/A md_unit_readerexit(ui);
4136N/A return (ret);
4136N/A}
4136N/A
4136N/A/*
4136N/A * FUNCTION: sp_snarf()
4136N/A * INPUT: cmd - snarf cmd.
4136N/A * setno - set number.
4136N/A * OUTPUT: none.
4136N/A * RETURNS: 1 - soft partitions were snarfed.
4136N/A * 0 - no soft partitions were snarfed.
4136N/A * PURPOSE: Snarf soft partition metadb records into their in-core
4136N/A * structures. This routine is called at "snarf time" when
4136N/A * md loads and gets all metadevices records into memory.
4136N/A * The basic algorithm is simply to walk the soft partition
4136N/A * records in the metadb and call the soft partitioning
4500N/A * build_incore routine to set up the in-core structures.
4500N/A */
4923N/Astatic int
4923N/Asp_snarf(md_snarfcmd_t cmd, set_t setno)
4923N/A{
4923N/A mp_unit_t *un;
4923N/A mddb_recid_t recid;
4923N/A int gotsomething;
4923N/A int all_sp_gotten;
4923N/A mddb_type_t rec_type;
4923N/A mddb_de_ic_t *dep;
4923N/A mddb_rb32_t *rbp;
4923N/A mp_unit_t *big_un;
4923N/A mp_unit32_od_t *small_un;
4923N/A size_t newreqsize;
4923N/A
4923N/A
4923N/A if (cmd == MD_SNARF_CLEANUP)
4923N/A return (0);
4923N/A
4923N/A all_sp_gotten = 1;
4923N/A gotsomething = 0;
4500N/A
4500N/A /* get the record type */
4500N/A rec_type = (mddb_type_t)md_getshared_key(setno,
4500N/A sp_md_ops.md_driver.md_drivername);
4500N/A recid = mddb_makerecid(setno, 0);
4500N/A
4500N/A /*
4500N/A * walk soft partition records in the metadb and call
4500N/A * sp_build_incore to build in-core structures.
4500N/A */
4500N/A while ((recid = mddb_getnextrec(recid, rec_type, 0)) > 0) {
4500N/A /* if we've already gotten this record, go to the next one */
4500N/A if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
4500N/A continue;
4500N/A
4500N/A
4500N/A dep = mddb_getrecdep(recid);
4500N/A dep->de_flags = MDDB_F_SOFTPART;
4500N/A rbp = dep->de_rb;
4500N/A
4500N/A switch (rbp->rb_revision) {
4500N/A case MDDB_REV_RB:
4500N/A case MDDB_REV_RBFN:
4500N/A if ((rbp->rb_private & MD_PRV_CONVD) == 0) {
4500N/A /*
4500N/A * This means, we have an old and small record.
4500N/A * And this record hasn't already been converted
4500N/A * :-o before we create an incore metadevice
4500N/A * from this we have to convert it to a big
4500N/A * record.
4500N/A */
4500N/A small_un =
4500N/A (mp_unit32_od_t *)mddb_getrecaddr(recid);
4500N/A newreqsize = sizeof (mp_unit_t) +
4500N/A ((small_un->un_numexts - 1) *
4500N/A sizeof (struct mp_ext));
4500N/A big_un = (mp_unit_t *)kmem_zalloc(newreqsize,
4500N/A KM_SLEEP);
4500N/A softpart_convert((caddr_t)small_un,
4500N/A (caddr_t)big_un, SMALL_2_BIG);
4500N/A kmem_free(small_un, dep->de_reqsize);
4500N/A dep->de_rb_userdata = big_un;
4500N/A dep->de_reqsize = newreqsize;
4500N/A rbp->rb_private |= MD_PRV_CONVD;
4500N/A un = big_un;
4500N/A } else {
4500N/A /* Record has already been converted */
4500N/A un = (mp_unit_t *)mddb_getrecaddr(recid);
4500N/A }
4803N/A un->c.un_revision &= ~MD_64BIT_META_DEV;
4803N/A break;
4500N/A case MDDB_REV_RB64:
4500N/A case MDDB_REV_RB64FN:
4500N/A /* Large device */
4500N/A un = (mp_unit_t *)mddb_getrecaddr(recid);
4500N/A un->c.un_revision |= MD_64BIT_META_DEV;
4500N/A un->c.un_flag |= MD_EFILABEL;
4500N/A break;
4500N/A }
4500N/A MDDB_NOTE_FN(rbp->rb_revision, un->c.un_revision);
4500N/A
4500N/A /*
4500N/A * Create minor node for snarfed entry.
4500N/A */
4500N/A (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un));
4500N/A
4500N/A if (MD_UNIT(MD_SID(un)) != NULL) {
4500N/A /* unit is already in-core */
4500N/A mddb_setrecprivate(recid, MD_PRV_PENDDEL);
4500N/A continue;
4500N/A }
4500N/A all_sp_gotten = 0;
4500N/A if (sp_build_incore((void *)un, 1) == 0) {
4500N/A mddb_setrecprivate(recid, MD_PRV_GOTIT);
4500N/A md_create_unit_incore(MD_SID(un), &sp_md_ops, 0);
4500N/A gotsomething = 1;
4500N/A }
4500N/A }
4500N/A
4500N/A if (!all_sp_gotten)
4500N/A return (gotsomething);
4853N/A /* double-check records */
4853N/A recid = mddb_makerecid(setno, 0);
4853N/A while ((recid = mddb_getnextrec(recid, rec_type, 0)) > 0)
4853N/A if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT))
4853N/A mddb_setrecprivate(recid, MD_PRV_PENDDEL);
4853N/A
4853N/A return (0);
4853N/A}
4853N/A
4853N/A/*
4853N/A * FUNCTION: sp_halt()
4853N/A * INPUT: cmd - halt cmd.
4853N/A * setno - set number.
4853N/A * RETURNS: 0 - success.
4853N/A * 1 - err.
4853N/A * PURPOSE: Perform driver halt operations. As with stripe, we
4853N/A * support MD_HALT_CHECK and MD_HALT_DOIT. The first
4853N/A * does a check to see if halting can be done safely
4853N/A * (no open soft partitions), the second cleans up and
4853N/A * shuts down the driver.
4853N/A */
4853N/Astatic int
4853N/Asp_halt(md_haltcmd_t cmd, set_t setno)
4853N/A{
4853N/A int i;
4853N/A mdi_unit_t *ui;
4853N/A minor_t mnum;
4853N/A
4853N/A if (cmd == MD_HALT_CLOSE)
4853N/A return (0);
4853N/A
3853N/A if (cmd == MD_HALT_OPEN)
return (0);
if (cmd == MD_HALT_UNLOAD)
return (0);
if (cmd == MD_HALT_CHECK) {
for (i = 0; i < md_nunits; i++) {
mnum = MD_MKMIN(setno, i);
if ((ui = MDI_UNIT(mnum)) == NULL)
continue;
if (ui->ui_opsindex != sp_md_ops.md_selfindex)
continue;
if (md_unit_isopen(ui))
return (1);
}
return (0);
}
if (cmd != MD_HALT_DOIT)
return (1);
for (i = 0; i < md_nunits; i++) {
mnum = MD_MKMIN(setno, i);
if ((ui = MDI_UNIT(mnum)) == NULL)
continue;
if (ui->ui_opsindex != sp_md_ops.md_selfindex)
continue;
reset_sp((mp_unit_t *)MD_UNIT(mnum), mnum, 0);
}
return (0);
}
/*
* FUNCTION: sp_open_dev()
* INPUT: un - unit structure.
* oflags - open flags.
* OUTPUT: none.
* RETURNS: 0 - success.
* non-zero - err.
* PURPOSE: open underlying device via md_layered_open.
*/
static int
sp_open_dev(mp_unit_t *un, int oflags)
{
minor_t mnum = MD_SID(un);
int err;
md_dev64_t tmpdev;
set_t setno = MD_MIN2SET(MD_SID(un));
side_t side = mddb_getsidenum(setno);
tmpdev = un->un_dev;
/*
* Do the open by device id if underlying is regular
*/
if ((md_getmajor(tmpdev) != md_major) &&
md_devid_found(setno, side, un->un_key) == 1) {
tmpdev = md_resolve_bydevid(mnum, tmpdev, un->un_key);
}
err = md_layered_open(mnum, &tmpdev, oflags);
un->un_dev = tmpdev;
if (err)
return (ENXIO);
return (0);
}
/*
* FUNCTION: sp_open()
* INPUT: dev - device to open.
* flag - pass-through flag.
* otyp - pass-through open type.
* cred_p - credentials.
* md_oflags - open flags.
* OUTPUT: none.
* RETURNS: 0 - success.
* non-zero - err.
* PURPOSE: open a soft partition.
*/
/* ARGSUSED */
static int
sp_open(
dev_t *dev,
int flag,
int otyp,
cred_t *cred_p,
int md_oflags
)
{
minor_t mnum = getminor(*dev);
mdi_unit_t *ui = MDI_UNIT(mnum);
mp_unit_t *un;
int err = 0;
set_t setno;
/*
* When doing an open of a multi owner metadevice, check to see if this
* node is a starting node and if a reconfig cycle is underway.
* If so, the system isn't sufficiently set up enough to handle the
* open (which involves I/O during sp_validate), so fail with ENXIO.
*/
setno = MD_MIN2SET(mnum);
if ((md_set[setno].s_status & (MD_SET_MNSET | MD_SET_MN_START_RC)) ==
(MD_SET_MNSET | MD_SET_MN_START_RC)) {
return (ENXIO);
}
/* grab necessary locks */
un = (mp_unit_t *)md_unit_openclose_enter(ui);
setno = MD_UN2SET(un);
/* open underlying device, if necessary */
if (! md_unit_isopen(ui) || (md_oflags & MD_OFLG_PROBEDEV)) {
if ((err = sp_open_dev(un, md_oflags)) != 0)
goto out;
if (MD_MNSET_SETNO(setno)) {
/* For probe, don't incur the overhead of validate */
if (!(md_oflags & MD_OFLG_PROBEDEV)) {
/*
* Don't call sp_validate while
* unit_openclose lock is held. So, actually
* open the device, drop openclose lock,
* call sp_validate, reacquire openclose lock,
* and close the device. If sp_validate
* succeeds, then device will be re-opened.
*/
if ((err = md_unit_incopen(mnum, flag,
otyp)) != 0)
goto out;
mutex_enter(&ui->ui_mx);
ui->ui_lock |= MD_UL_OPENINPROGRESS;
mutex_exit(&ui->ui_mx);
md_unit_openclose_exit(ui);
if (otyp != OTYP_LYR)
rw_exit(&md_unit_array_rw.lock);
err = sp_validate(un);
if (otyp != OTYP_LYR)
rw_enter(&md_unit_array_rw.lock,
RW_READER);
(void) md_unit_openclose_enter(ui);
(void) md_unit_decopen(mnum, otyp);
mutex_enter(&ui->ui_mx);
ui->ui_lock &= ~MD_UL_OPENINPROGRESS;
cv_broadcast(&ui->ui_cv);
mutex_exit(&ui->ui_mx);
/*
* Should be in the same state as before
* the sp_validate.
*/
if (err != 0) {
/* close the device opened above */
md_layered_close(un->un_dev, md_oflags);
err = EIO;
goto out;
}
}
/*
* As we're a multi-owner metadevice we need to ensure
* that all nodes have the same idea of the status.
* sp_validate() will mark the device as errored (if
* it cannot read the watermark) or ok (if it was
* previously errored but the watermark is now valid).
* This code-path is only entered on the non-probe open
* so we will maintain the errored state during a probe
* call. This means the sys-admin must metarecover -m
* to reset the soft-partition error.
*/
} else {
/* For probe, don't incur the overhead of validate */
if (!(md_oflags & MD_OFLG_PROBEDEV) &&
(err = sp_validate(un)) != 0) {
/* close the device opened above */
md_layered_close(un->un_dev, md_oflags);
err = EIO;
goto out;
} else {
/*
* we succeeded in validating the on disk
* format versus the in core, so reset the
* status if it's in error
*/
if (un->un_status == MD_SP_ERR) {
un->un_status = MD_SP_OK;
}
}
}
}
/* count open */
if ((err = md_unit_incopen(mnum, flag, otyp)) != 0)
goto out;
out:
md_unit_openclose_exit(ui);
return (err);
}
/*
* FUNCTION: sp_close()
* INPUT: dev - device to close.
* flag - pass-through flag.
* otyp - pass-through type.
* cred_p - credentials.
* md_cflags - close flags.
* OUTPUT: none.
* RETURNS: 0 - success.
* non-zero - err.
* PURPOSE: close a soft paritition.
*/
/* ARGSUSED */
static int
sp_close(
dev_t dev,
int flag,
int otyp,
cred_t *cred_p,
int md_cflags
)
{
minor_t mnum = getminor(dev);
mdi_unit_t *ui = MDI_UNIT(mnum);
mp_unit_t *un;
int err = 0;
/* grab necessary locks */
un = (mp_unit_t *)md_unit_openclose_enter(ui);
/* count closed */
if ((err = md_unit_decopen(mnum, otyp)) != 0)
goto out;
/* close devices, if necessary */
if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) {
md_layered_close(un->un_dev, md_cflags);
}
/*
* If a MN set and transient capabilities (eg ABR/DMR) are set,
* clear these capabilities if this is the last close in
* the cluster
*/
if (MD_MNSET_SETNO(MD_UN2SET(un)) &&
(ui->ui_tstate & MD_ABR_CAP)) {
md_unit_openclose_exit(ui);
mdmn_clear_all_capabilities(mnum);
return (0);
}
/* unlock, return success */
out:
md_unit_openclose_exit(ui);
return (err);
}
/* used in sp_dump routine */
static struct buf dumpbuf;
/*
* FUNCTION: sp_dump()
* INPUT: dev - device to dump to.
* addr - address to dump.
* blkno - blkno on device.
* nblk - number of blocks to dump.
* OUTPUT: none.
* RETURNS: result from bdev_dump.
* PURPOSE: This routine dumps memory to the disk. It assumes that
* the memory has already been mapped into mainbus space.
* It is called at disk interrupt priority when the system
* is in trouble.
* NOTE: this function is defined using 32-bit arguments,
* but soft partitioning is internally 64-bit. Arguments
* are casted where appropriate.
*/
static int
sp_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
{
mp_unit_t *un;
buf_t *bp;
sp_ext_length_t nb;
daddr_t mapblk;
int result;
int more;
int saveresult = 0;
/*
* Don't need to grab the unit lock.
* Cause nothing else is supposed to be happenning.
* Also dump is not supposed to sleep.
*/
un = (mp_unit_t *)MD_UNIT(getminor(dev));
if ((diskaddr_t)blkno >= un->c.un_total_blocks)
return (EINVAL);
if (((diskaddr_t)blkno + nblk) > un->c.un_total_blocks)
return (EINVAL);
bp = &dumpbuf;
nb = (sp_ext_length_t)dbtob(nblk);
do {
bzero((caddr_t)bp, sizeof (*bp));
more = sp_mapbuf(un, (sp_ext_offset_t)blkno, nb, bp);
nblk = (int)(btodb(bp->b_bcount));
mapblk = bp->b_blkno;
result = bdev_dump(bp->b_edev, addr, mapblk, nblk);
if (result)
saveresult = result;
nb -= bp->b_bcount;
addr += bp->b_bcount;
blkno += nblk;
} while (more);
return (saveresult);
}
static int
sp_imp_set(
set_t setno
)
{
mddb_recid_t recid;
int gotsomething;
mddb_type_t rec_type;
mddb_de_ic_t *dep;
mddb_rb32_t *rbp;
mp_unit_t *un64;
mp_unit32_od_t *un32;
md_dev64_t self_devt;
minor_t *self_id; /* minor needs to be updated */
md_parent_t *parent_id; /* parent needs to be updated */
mddb_recid_t *record_id; /* record id needs to be updated */
gotsomething = 0;
rec_type = (mddb_type_t)md_getshared_key(setno,
sp_md_ops.md_driver.md_drivername);
recid = mddb_makerecid(setno, 0);
while ((recid = mddb_getnextrec(recid, rec_type, 0)) > 0) {
if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
continue;
dep = mddb_getrecdep(recid);
rbp = dep->de_rb;
switch (rbp->rb_revision) {
case MDDB_REV_RB:
case MDDB_REV_RBFN:
/*
* Small device
*/
un32 = (mp_unit32_od_t *)mddb_getrecaddr(recid);
self_id = &(un32->c.un_self_id);
parent_id = &(un32->c.un_parent);
record_id = &(un32->c.un_record_id);
if (!md_update_minor(setno, mddb_getsidenum
(setno), un32->un_key))
goto out;
break;
case MDDB_REV_RB64:
case MDDB_REV_RB64FN:
un64 = (mp_unit_t *)mddb_getrecaddr(recid);
self_id = &(un64->c.un_self_id);
parent_id = &(un64->c.un_parent);
record_id = &(un64->c.un_record_id);
if (!md_update_minor(setno, mddb_getsidenum
(setno), un64->un_key))
goto out;
break;
}
/*
* If this is a top level and a friendly name metadevice,
* update its minor in the namespace.
*/
if ((*parent_id == MD_NO_PARENT) &&
((rbp->rb_revision == MDDB_REV_RBFN) ||
(rbp->rb_revision == MDDB_REV_RB64FN))) {
self_devt = md_makedevice(md_major, *self_id);
if (!md_update_top_device_minor(setno,
mddb_getsidenum(setno), self_devt))
goto out;
}
/*
* Update unit with the imported setno
*
*/
mddb_setrecprivate(recid, MD_PRV_GOTIT);
*self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id));
if (*parent_id != MD_NO_PARENT)
*parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id));
*record_id = MAKERECID(setno, DBID(*record_id));
gotsomething = 1;
}
out:
return (gotsomething);
}
static md_named_services_t sp_named_services[] = {
{NULL, 0}
};
md_ops_t sp_md_ops = {
sp_open, /* open */
sp_close, /* close */
md_sp_strategy, /* strategy */
NULL, /* print */
sp_dump, /* dump */
NULL, /* read */
NULL, /* write */
md_sp_ioctl, /* ioctl, */
sp_snarf, /* snarf */
sp_halt, /* halt */
NULL, /* aread */
NULL, /* awrite */
sp_imp_set, /* import set */
sp_named_services
};
static void
init_init()
{
sp_parent_cache = kmem_cache_create("md_softpart_parent",
sizeof (md_spps_t), 0, sp_parent_constructor,
sp_parent_destructor, sp_run_queue, NULL, NULL, 0);
sp_child_cache = kmem_cache_create("md_softpart_child",
sizeof (md_spcs_t) - sizeof (buf_t) + biosize(), 0,
sp_child_constructor, sp_child_destructor, sp_run_queue,
NULL, NULL, 0);
}
static void
fini_uninit()
{
kmem_cache_destroy(sp_parent_cache);
kmem_cache_destroy(sp_child_cache);
sp_parent_cache = sp_child_cache = NULL;
}
/* define the module linkage */
MD_PLUGIN_MISC_MODULE("soft partition module", init_init(), fini_uninit())