dsw_dev.c revision 3270659f55e0928d6edec3d26217cc29398a8149
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/types.h>
#include <sys/time.h>
#include <sys/ksynch.h>
#include <sys/kmem.h>
#include <sys/errno.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/ddi.h>
#include <sys/nsc_thread.h>
#include <sys/unistat/spcs_s.h>
#include <sys/unistat/spcs_errors.h>
#include <sys/unistat/spcs_s_k.h>
#include <sys/nsctl/nsctl.h>
#include "dsw.h"
#include "dsw_dev.h"
#include "../rdc/rdc_update.h"
#include <sys/nskernd.h>
#include <sys/sdt.h> /* dtrace is S10 or later */
#ifdef DS_DDICT
#include "../contract.h"
#endif
/*
* Instant Image
*
* This file contains the core implementation of II.
*
* II is implemented as a simple filter module that pushes itself between
* user (SV, STE, etc.) and SDBC or NET.
*
*/
#define REMOTE_VOL(s, ip) (((s) && ((ip->bi_flags)&DSW_SHDEXPORT)) || \
(!(s)&&((ip->bi_flags)&DSW_SHDIMPORT)))
#define total_ref(ip) ((ip->bi_shdref + ip->bi_shdrref + ip->bi_bmpref) + \
(NSHADOWS(ip) ? 0 : ip->bi_mstref + ip->bi_mstrref))
#define II_TAIL_COPY(d, s, m, t) bcopy(&(s.m), &(d.m), \
sizeof (d) - (uint_t)&((t *)0)->m)
extern dev_info_t *ii_dip;
#define II_LINK_CLUSTER(ip, cluster) \
_ii_ll_add(ip, &_ii_cluster_mutex, &_ii_cluster_top, cluster, \
&ip->bi_cluster)
#define II_UNLINK_CLUSTER(ip) \
_ii_ll_remove(ip, &_ii_cluster_mutex, &_ii_cluster_top, &ip->bi_cluster)
#define II_LINK_GROUP(ip, group) \
_ii_ll_add(ip, &_ii_group_mutex, &_ii_group_top, group, &ip->bi_group)
#define II_UNLINK_GROUP(ip) \
_ii_ll_remove(ip, &_ii_group_mutex, &_ii_group_top, &ip->bi_group)
_ii_info_t *_ii_info_top;
_ii_info_t *_ii_mst_top = 0;
_ii_overflow_t *_ii_overflow_top;
_ii_lsthead_t *_ii_cluster_top;
_ii_lsthead_t *_ii_group_top;
int ii_debug; /* level of cmn_err noise */
int ii_bitmap; /* bitmap operations switch */
uint_t ii_header = 16; /* Undocumented tunable (with adb!), start */
/* of area cleared in volume when a dependent */
/* shadow is disabled. */
/* max # of chunks in copy loop before delay */
int ii_throttle_unit = MIN_THROTTLE_UNIT;
/* length of delay during update loop */
int ii_throttle_delay = MIN_THROTTLE_DELAY;
int ii_copy_direct = 1;
int ii_nconcopy = 10; /* default value when starting with no cache */
kmutex_t _ii_cluster_mutex;
kmutex_t _ii_group_mutex;
static int _ii_shutting_down = 0;
static nsc_io_t *_ii_io, *_ii_ior;
static nsc_mem_t *_ii_local_mem;
static nsc_def_t _ii_fd_def[], _ii_io_def[], _ii_ior_def[];
static kmutex_t _ii_info_mutex;
static kmutex_t _ii_overflow_mutex;
static kmutex_t _ii_config_mutex;
static _ii_bmp_ops_t alloc_buf_bmp, kmem_buf_bmp;
static nsc_svc_t *ii_volume_update; /* IIVolumeUpdate token */
static nsc_svc_t *ii_report_luns; /* IIReportLuns token */
static nsc_svc_t *ii_get_initiators; /* IIGetInitiators token */
static ksema_t _ii_concopy_sema;
static int _ii_concopy_init = 0;
static int _ii_instance = 0;
void _ii_deinit_dev();
static void _ii_info_free(_ii_info_t *ip);
static void _ii_info_freeshd(_ii_info_t *ip);
static void ii_sibling_free(_ii_info_t *ip);
ii_header_t *_ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp);
int _ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip,
nsc_buf_t *tmp);
static void _ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip,
nsc_buf_t *tmp);
static int _ii_copyvol(_ii_info_t *, int, int, spcs_s_info_t, int);
static void _ii_stopvol(_ii_info_t *ip);
static int _ii_stopcopy(_ii_info_t *ip);
static _ii_info_t *_ii_find_set(char *volume);
static _ii_info_t *_ii_find_vol(char *, int);
static _ii_overflow_t *_ii_find_overflow(char *volume);
static void _ii_ioctl_done(_ii_info_t *ip);
static void _ii_lock_chunk(_ii_info_t *ip, chunkid_t);
static void _ii_unlock_chunks(_ii_info_t *ip, chunkid_t, int);
void _ii_error(_ii_info_t *ip, int error_type);
static nsc_buf_t *_ii_alloc_handle(void (*d_cb)(), void (*r_cb)(),
void (*w_cb)(), ii_fd_t *bfd);
static int _ii_free_handle(ii_buf_t *h, ii_fd_t *bfd);
extern nsc_size_t ii_btsize(nsc_size_t);
extern int ii_tinit(_ii_info_t *);
extern chunkid_t ii_tsearch(_ii_info_t *, chunkid_t);
extern void ii_tdelete(_ii_info_t *, chunkid_t);
extern void ii_reclaim_overflow(_ii_info_t *);
static void ii_overflow_free(_ii_info_t *ip, int disable);
static int ii_overflow_attach(_ii_info_t *, char *, int);
int _ii_nsc_io(_ii_info_t *, int, nsc_fd_t *, int, nsc_off_t, unsigned char *,
nsc_size_t);
static nsc_path_t *_ii_register_path(char *path, int type, nsc_io_t *io);
static int _ii_unregister_path(nsc_path_t *sp, int flag, char *type);
static int _ii_reserve_begin(_ii_info_t *ip);
static int _ii_wait_for_it(_ii_info_t *ip);
static void _ii_reserve_end(_ii_info_t *ip);
static kstat_t *_ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op);
static int _ii_ll_add(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char *,
char **);
static int _ii_ll_remove(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char **);
#define _ii_unlock_chunk(ip, chunk) _ii_unlock_chunks(ip, chunk, 1)
extern const int dsw_major_rev;
extern const int dsw_minor_rev;
extern const int dsw_micro_rev;
extern const int dsw_baseline_rev;
/*
* These constants are used by ii_overflow_free() to indicate how the
* reclamation should take place.
* NO_RECLAIM: just detach the overflow from the set; do not
* attempt to reclaim chunks, do not decrement the
* used-by count
* RECLAIM: reclaim all chunks before decrementing the used-by count
* INIT_OVR: decrement the used-by count only; do not reclaim chunks
*/
#define NO_RECLAIM 0
#define RECLAIM 1
#define INIT_OVR 2
struct copy_args { /* arguments passed to copy process */
_ii_info_t *ip;
int flag;
int rtype;
int wait;
spcs_s_info_t kstatus;
int rc;
};
/* set-specific kstats info */
ii_kstat_set_t ii_kstat_set = {
{ DSW_SKSTAT_SIZE, KSTAT_DATA_ULONG },
{ DSW_SKSTAT_MTIME, KSTAT_DATA_ULONG },
{ DSW_SKSTAT_FLAGS, KSTAT_DATA_ULONG },
{ DSW_SKSTAT_THROTTLE_UNIT, KSTAT_DATA_ULONG },
{ DSW_SKSTAT_THROTTLE_DELAY, KSTAT_DATA_ULONG },
{ DSW_SKSTAT_SHDCHKS, KSTAT_DATA_ULONG },
{ DSW_SKSTAT_SHDCHKUSED, KSTAT_DATA_ULONG },
{ DSW_SKSTAT_SHDBITS, KSTAT_DATA_ULONG },
{ DSW_SKSTAT_COPYBITS, KSTAT_DATA_ULONG },
{ DSW_SKSTAT_MSTA, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_MSTB, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_MSTC, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_MSTD, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_SETA, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_SETB, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_SETC, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_SETD, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_BMPA, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_BMPB, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_BMPC, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_BMPD, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_OVRA, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_OVRB, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_OVRC, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_OVRD, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_MSTIO, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_SHDIO, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_BMPIO, KSTAT_DATA_CHAR },
{ DSW_SKSTAT_OVRIO, KSTAT_DATA_CHAR },
};
/*
* _ii_init_dev
* Initialise the shadow driver
*
*/
int
_ii_init_dev()
{
_ii_io = nsc_register_io("ii", NSC_II_ID|NSC_REFCNT|NSC_FILTER,
_ii_io_def);
if (_ii_io == NULL)
cmn_err(CE_WARN, "!ii: nsc_register_io failed.");
_ii_ior = nsc_register_io("ii-raw", NSC_IIR_ID|NSC_REFCNT|NSC_FILTER,
_ii_ior_def);
if (_ii_ior == NULL)
cmn_err(CE_WARN, "!ii: nsc_register_io r failed.");
_ii_local_mem = nsc_register_mem("ii:kmem", NSC_MEM_LOCAL, 0);
if (_ii_local_mem == NULL)
cmn_err(CE_WARN, "!ii: nsc_register_mem failed.");
if (!_ii_io || !_ii_ior || !_ii_local_mem) {
_ii_deinit_dev();
return (ENOMEM);
}
mutex_init(&_ii_info_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&_ii_overflow_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&_ii_config_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&_ii_cluster_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&_ii_group_mutex, NULL, MUTEX_DRIVER, NULL);
ii_volume_update = nsc_register_svc("RDCVolumeUpdated", 0);
ii_report_luns = nsc_register_svc("IIReportLuns", 0);
ii_get_initiators = nsc_register_svc("IIGetInitiators", 0);
if (!ii_volume_update || !ii_report_luns || !ii_get_initiators) {
_ii_deinit_dev();
return (ENOMEM);
}
return (0);
}
/*
* _ii_deinit_dev
* De-initialise the shadow driver
*
*/
void
_ii_deinit_dev()
{
if (_ii_io)
(void) nsc_unregister_io(_ii_io, 0);
if (_ii_ior)
(void) nsc_unregister_io(_ii_ior, 0);
if (_ii_local_mem)
(void) nsc_unregister_mem(_ii_local_mem);
if (ii_volume_update)
(void) nsc_unregister_svc(ii_volume_update);
if (ii_report_luns)
(void) nsc_unregister_svc(ii_report_luns);
if (ii_get_initiators)
(void) nsc_unregister_svc(ii_get_initiators);
mutex_destroy(&_ii_info_mutex);
mutex_destroy(&_ii_overflow_mutex);
mutex_destroy(&_ii_config_mutex);
mutex_destroy(&_ii_cluster_mutex);
mutex_destroy(&_ii_group_mutex);
if (_ii_concopy_init)
sema_destroy(&_ii_concopy_sema);
_ii_concopy_init = 0;
}
static char *
ii_pathname(nsc_fd_t *fd)
{
char *rc;
if (fd == NULL || (rc = nsc_pathname(fd)) == NULL)
return ("");
else
return (rc);
}
/*
* _ii_rlse_d
* Internal mechanics of _ii_rlse_devs(). Takes care of
* resetting the ownership information as required.
*/
static void
_ii_rlse_d(ip, mst, raw)
_ii_info_t *ip;
int mst, raw;
{
_ii_info_dev_t *cip;
_ii_info_dev_t *rip;
rip = mst ? (ip->bi_mstrdev) : &(ip->bi_shdrdev);
cip = mst ? (ip->bi_mstdev) : &(ip->bi_shddev);
DTRACE_PROBE2(_ii_rlse_d_type,
_ii_info_dev_t *, rip,
_ii_info_dev_t *, cip);
if (RSRV(cip)) {
if (raw) {
ASSERT(cip->bi_orsrv > 0);
cip->bi_orsrv--;
} else {
ASSERT(cip->bi_rsrv > 0);
cip->bi_rsrv--;
}
if (cip->bi_rsrv > 0) {
nsc_set_owner(cip->bi_fd, cip->bi_iodev);
} else if (cip->bi_orsrv > 0) {
nsc_set_owner(cip->bi_fd, rip->bi_iodev);
} else {
nsc_set_owner(cip->bi_fd, NULL);
}
if (!RSRV(cip)) {
nsc_release(cip->bi_fd);
}
} else {
if (raw) {
ASSERT(rip->bi_rsrv > 0);
rip->bi_rsrv--;
} else {
ASSERT(rip->bi_orsrv > 0);
rip->bi_orsrv--;
}
if (rip->bi_rsrv > 0) {
nsc_set_owner(rip->bi_fd, rip->bi_iodev);
} else if (rip->bi_orsrv > 0) {
nsc_set_owner(rip->bi_fd, cip->bi_iodev);
} else {
nsc_set_owner(rip->bi_fd, NULL);
}
if (!RSRV(rip)) {
rip->bi_flag = 0;
nsc_release(rip->bi_fd);
cv_broadcast(&ip->bi_releasecv);
}
}
}
/*
* _ii_rlse_devs
* Release named underlying devices.
*
* NOTE: the 'devs' argument must be the same as that passed to
* the preceding _ii_rsrv_devs call.
*/
void
_ii_rlse_devs(ip, devs)
_ii_info_t *ip;
int devs;
{
ASSERT(!(devs & (MST|SHD)));
ASSERT(ip->bi_head != (_ii_info_t *)0xdeadbeef);
if (!ip) {
cmn_err(CE_WARN, "!ii: _ii_rlse_devs null ip");
return;
}
mutex_enter(&ip->bi_rsrvmutex);
DTRACE_PROBE(_ii_rlse_devs_mutex);
if ((devs&(MST|MSTR)) != 0 && (ip->bi_flags&DSW_SHDIMPORT) == 0) {
if (NSHADOWS(ip) && ip != ip->bi_master)
_ii_rlse_devs(ip->bi_master, devs&(MST|MSTR));
else
_ii_rlse_d(ip, 1, (devs&MSTR));
}
if ((devs&(SHD|SHDR)) != 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0) {
_ii_rlse_d(ip, 0, (devs&SHDR));
}
if ((devs&BMP) != 0 && ip->bi_bmpfd) {
if (--(ip->bi_bmprsrv) == 0)
nsc_release(ip->bi_bmpfd);
}
ASSERT(ip->bi_bmprsrv >= 0);
ASSERT(ip->bi_shdrsrv >= 0);
ASSERT(ip->bi_shdrrsrv >= 0);
mutex_exit(&ip->bi_rsrvmutex);
}
/*
* _ii_rsrv_d
* Reserve device flagged, unless its companion is already reserved,
* in that case increase the reserve on the companion.
*/
static int
_ii_rsrv_d(int raw, _ii_info_dev_t *rid, _ii_info_dev_t *cid, int flag,
_ii_info_t *ip)
{
_ii_info_dev_t *p = NULL;
int other = 0;
int rc;
/*
* If user wants to do a cache reserve and it's already
* raw reserved, we need to do a real nsc_reserve, so wait
* until the release has been done.
*/
if (RSRV(rid) && (flag == II_EXTERNAL) &&
(raw == 0) && (rid->bi_flag != II_EXTERNAL)) {
ip->bi_release++;
while (RSRV(rid)) {
DTRACE_PROBE1(_ii_rsrv_d_wait, _ii_info_dev_t *, rid);
cv_wait(&ip->bi_releasecv, &ip->bi_rsrvmutex);
DTRACE_PROBE1(_ii_rsrv_d_resume, _ii_info_dev_t *, rid);
}
ip->bi_release--;
}
if (RSRV(rid)) {
p = rid;
if (!raw) {
other = 1;
}
} else if (RSRV(cid)) {
p = cid;
if (raw) {
other = 1;
}
}
if (p) {
if (other) {
p->bi_orsrv++;
} else {
p->bi_rsrv++;
}
if (p->bi_iodev) {
nsc_set_owner(p->bi_fd, p->bi_iodev);
}
return (0);
}
p = raw ? rid : cid;
if ((rc = nsc_reserve(p->bi_fd, 0)) == 0) {
if (p->bi_iodev) {
nsc_set_owner(p->bi_fd, p->bi_iodev);
}
p->bi_rsrv++;
if (raw)
p->bi_flag = flag;
}
return (rc);
}
/*
* _ii_rsrv_devs
* Reserve named underlying devices.
*
*/
int
_ii_rsrv_devs(_ii_info_t *ip, int devs, int flag)
{
int rc = 0;
int got = 0;
ASSERT(!(devs & (MST|SHD)));
if (!ip) {
cmn_err(CE_WARN, "!ii: _ii_rsrv_devs null ip");
return (EINVAL);
}
mutex_enter(&ip->bi_rsrvmutex);
DTRACE_PROBE(_ii_rsrv_devs_mutex);
if (rc == 0 && (devs&(MST|MSTR)) != 0 &&
(ip->bi_flags&DSW_SHDIMPORT) == 0) {
DTRACE_PROBE(_ii_rsrv_devs_master);
if (NSHADOWS(ip) && ip != ip->bi_master) {
if ((rc = _ii_rsrv_devs(ip->bi_master, devs&(MST|MSTR),
flag)) != 0) {
cmn_err(CE_WARN,
"!ii: nsc_reserve multi-master failed");
} else {
got |= devs&(MST|MSTR);
}
} else {
if ((rc = _ii_rsrv_d((devs&MSTR) != 0, ip->bi_mstrdev,
ip->bi_mstdev, flag, ip)) != 0) {
cmn_err(CE_WARN,
"!ii: nsc_reserve master failed %d", rc);
} else {
got |= (devs&(MST|MSTR));
}
}
}
if (rc == 0 && (devs&(SHD|SHDR)) != 0 &&
(ip->bi_flags&DSW_SHDEXPORT) == 0) {
DTRACE_PROBE(_ii_rsrv_devs_shadow);
if ((rc = _ii_rsrv_d((devs&SHDR) != 0, &ip->bi_shdrdev,
&ip->bi_shddev, flag, ip)) != 0) {
cmn_err(CE_WARN,
"!ii: nsc_reserve shadow failed %d", rc);
} else {
got |= (devs&(SHD|SHDR));
}
}
if (rc == 0 && (devs&BMP) != 0 && ip->bi_bmpfd) {
DTRACE_PROBE(_ii_rsrv_devs_bitmap);
if ((ip->bi_bmprsrv == 0) &&
(rc = nsc_reserve(ip->bi_bmpfd, 0)) != 0) {
cmn_err(CE_WARN,
"!ii: nsc_reserve bitmap failed %d", rc);
} else {
(ip->bi_bmprsrv)++;
got |= BMP;
}
}
mutex_exit(&ip->bi_rsrvmutex);
if (rc != 0 && got != 0)
_ii_rlse_devs(ip, got);
return (rc);
}
static int
_ii_reserve_begin(_ii_info_t *ip)
{
int rc;
mutex_enter(&ip->bi_rlsemutex);
if ((rc = _ii_wait_for_it(ip)) == 0) {
++ip->bi_rsrvcnt;
}
mutex_exit(&ip->bi_rlsemutex);
return (rc);
}
static int
_ii_wait_for_it(_ii_info_t *ip)
{
int nosig;
nosig = 1;
while (ip->bi_rsrvcnt > 0) {
nosig = cv_wait_sig(&ip->bi_reservecv, &ip->bi_rlsemutex);
if (!nosig) {
break;
}
}
return (nosig? 0 : EINTR);
}
static void
_ii_reserve_end(_ii_info_t *ip)
{
mutex_enter(&ip->bi_rlsemutex);
if (ip->bi_rsrvcnt <= 0) {
mutex_exit(&ip->bi_rlsemutex);
return;
}
--ip->bi_rsrvcnt;
mutex_exit(&ip->bi_rlsemutex);
cv_broadcast(&ip->bi_reservecv);
}
static int
ii_fill_copy_bmp(_ii_info_t *ip)
{
int rc;
chunkid_t max_chunk, chunk_num;
if ((rc = II_FILL_COPY_BMP(ip)) != 0)
return (rc);
/*
* make certain that the last bits of the last byte of the bitmap
* aren't filled as they may be copied out to the user.
*/
chunk_num = ip->bi_size / DSW_SIZE;
if ((ip->bi_size % DSW_SIZE) != 0)
++chunk_num;
max_chunk = chunk_num;
if ((max_chunk & 0x7) != 0)
max_chunk = (max_chunk + 7) & ~7;
DTRACE_PROBE2(_ii_fill_copy_bmp_chunks, chunkid_t, chunk_num,
chunkid_t, max_chunk);
for (; chunk_num < max_chunk; chunk_num++) {
(void) II_CLR_COPY_BIT(ip, chunk_num);
}
return (0);
}
static int
ii_update_denied(_ii_info_t *ip, spcs_s_info_t kstatus,
int direction, int all)
{
rdc_update_t update;
int size;
unsigned char *bmp;
update.volume = direction == CV_SHD2MST ? ii_pathname(MSTFD(ip)) :
ip->bi_keyname;
update.denied = 0;
update.protocol = RDC_SVC_ONRETURN;
update.size = size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
update.status = kstatus;
update.bitmap = bmp = kmem_alloc(update.size, KM_SLEEP);
if (bmp == NULL) {
spcs_s_add(kstatus, ENOMEM);
return (1);
}
DTRACE_PROBE2(_ii_update_denied, int, all, int, size);
if (all) {
while (size-- > 0)
*bmp++ = (unsigned char)0xff;
} else {
if (II_CHANGE_BMP(ip, update.bitmap) != 0) {
/* failed to read bitmap */
spcs_s_add(kstatus, EIO);
update.denied = 1;
}
}
/* check that no user of volume objects */
if (update.denied == 0) {
(void) nsc_call_svc(ii_volume_update, (intptr_t)&update);
}
kmem_free(update.bitmap, FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)));
return (update.denied);
}
static int
ii_need_same_size(_ii_info_t *ip)
{
rdc_update_t update;
update.volume = ip->bi_keyname;
update.denied = 0;
update.protocol = RDC_SVC_VOL_ENABLED;
(void) nsc_call_svc(ii_volume_update, (intptr_t)&update);
return (update.denied);
}
/*
* ii_volume: check if vol is already known to Instant Image and return
* volume type if it is.
*/
static int
ii_volume(char *vol, int locked)
{
_ii_info_t *ip;
_ii_overflow_t *op;
int rc = NONE;
/* scan overflow volume list */
mutex_enter(&_ii_overflow_mutex);
DTRACE_PROBE(_ii_volume_mutex);
for (op = _ii_overflow_top; op; op = op->ii_next) {
if (strcmp(vol, op->ii_volname) == 0)
break;
}
mutex_exit(&_ii_overflow_mutex);
if (op) {
return (OVR);
}
if (!locked) {
mutex_enter(&_ii_info_mutex);
}
DTRACE_PROBE(_ii_volume_mutex2);
for (ip = _ii_info_top; ip; ip = ip->bi_next) {
if (strcmp(vol, ii_pathname(ip->bi_mstfd)) == 0) {
rc = MST;
break;
}
if (strcmp(vol, ip->bi_keyname) == 0) {
rc = SHD;
break;
}
if (strcmp(vol, ii_pathname(ip->bi_bmpfd)) == 0) {
rc = BMP;
break;
}
}
DTRACE_PROBE1(_ii_volume_data, int, rc);
if (!locked) {
mutex_exit(&_ii_info_mutex);
}
return (rc);
}
/*
* ii_open_shadow: open shadow volume for both cached and raw access,
* if the normal device open fails attempt a file open to allow
* shadowing into a file.
*/
static int
ii_open_shadow(_ii_info_t *ip, char *shadow_vol)
{
int rc = 0;
int file_rc = 0;
ip->bi_shdfd = nsc_open(shadow_vol,
NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
(blind_t)&(ip->bi_shddev), &rc);
if (!ip->bi_shdfd) {
ip->bi_shdfd = nsc_open(shadow_vol,
NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def,
(blind_t)&(ip->bi_shddev), &file_rc);
file_rc = 1;
if (!ip->bi_shdfd) {
return (rc);
}
DTRACE_PROBE(_ii_open_shadow);
}
else
DTRACE_PROBE(_ii_open_shadow);
if (file_rc == 0) {
ip->bi_shdrfd = nsc_open(shadow_vol,
NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
(blind_t)&(ip->bi_shdrdev), &rc);
DTRACE_PROBE(_ii_open_shadow);
} else {
ip->bi_shdrfd = nsc_open(shadow_vol,
NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def,
(blind_t)&(ip->bi_shdrdev), &rc);
DTRACE_PROBE(_ii_open_shadow);
}
if (!ip->bi_shdrfd) {
(void) nsc_close(ip->bi_shdfd);
DTRACE_PROBE(_ii_open_shadow);
return (rc);
}
return (0);
}
static void
ii_register_shd(_ii_info_t *ip)
{
ip->bi_shd_tok = _ii_register_path(ip->bi_keyname,
NSC_CACHE, _ii_io);
ip->bi_shdr_tok = _ii_register_path(ip->bi_keyname,
NSC_DEVICE, _ii_ior);
}
static void
ii_register_mst(_ii_info_t *ip)
{
ip->bi_mst_tok = _ii_register_path(ii_pathname(ip->bi_mstfd),
NSC_CACHE, _ii_io);
ip->bi_mstr_tok = _ii_register_path(ii_pathname(ip->bi_mstrfd),
NSC_DEVICE, _ii_ior);
}
static int
ii_register_ok(_ii_info_t *ip)
{
int rc;
int sibling;
int exported;
rc = 1;
sibling = NSHADOWS(ip) && ip != ip->bi_head;
exported = ip->bi_flags & DSW_SHDEXPORT;
if ((ip->bi_bmpfd && !ip->bi_bmp_tok) || (!exported && (
!ip->bi_shd_tok || !ip->bi_shdr_tok)))
rc = 0;
else if (!sibling && (!ip->bi_mst_tok || !ip->bi_mstr_tok))
rc = 0;
return (rc);
}
#ifndef DISABLE_KSTATS
/*
* _ii_kstat_create
* Create and install kstat_io data
*
* Calling/Exit State:
* Returns 0 if kstats couldn't be created, otherwise it returns
* a pointer to the created kstat_t.
*/
static kstat_t *
_ii_kstat_create(_ii_info_t *ip, char *type)
{
kstat_t *result;
char name[ IOSTAT_NAME_LEN ];
int setnum;
char *nptr;
static int mstnum = 0;
static int shdbmpnum = -1;
switch (*type) {
case 'm':
setnum = mstnum++;
nptr = ip->bi_kstat_io.mstio;
break;
case 's':
/* assumption: shadow kstats created before bitmap */
setnum = ++shdbmpnum;
nptr = ip->bi_kstat_io.shdio;
break;
case 'b':
setnum = shdbmpnum;
nptr = ip->bi_kstat_io.bmpio;
break;
default:
cmn_err(CE_WARN, "!Unable to determine kstat type (%c)", *type);
setnum = -1;
break;
}
/*
* The name of the kstat, defined below, is designed to work
* with the 'iostat -x' command. This command leaves only
* 9 characters for the name, and the kstats built in to Solaris
* all seem to be of the form <service><number>. For that
* reason, we have chosen ii<type><number>, where <type> is
* m, s, b, or o (for master, shadow, bitmap, and overflow
* respectively), and the number is monotonically increasing from
* 0 for each time one of those <type>s are created. Note that
* the shadow and bitmap are always created in pairs and so, for
* any given set, they will have the same <number>.
*/
(void) sprintf(name, "ii%c%d", *type, setnum);
(void) strncpy(nptr, name, IOSTAT_NAME_LEN);
result = kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0);
if (result) {
result->ks_private = ip;
result->ks_lock = &ip->bi_kstat_io.statmutex;
kstat_install(result);
} else {
cmn_err(CE_WARN, "!Unable to create %s kstats for set %s", type,
ip->bi_keyname);
}
return (result);
}
/*
* _ii_overflow_kstat_create
* Create and install kstat_io data for an overflow volume
*
* Calling/Exit State:
* Returns 0 if kstats couldn't be created, otherwise it returns
* a pointer to the created kstat_t.
*
* See comments in _ii_kstat_create for additional information.
*
*/
static kstat_t *
_ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op)
{
kstat_t *result;
char *nptr;
char name [IOSTAT_NAME_LEN];
static int ovrnum = 0;
int setnum = ovrnum++;
nptr = ip->bi_kstat_io.ovrio;
(void) sprintf(name, "iio%d", setnum);
(void) strncpy(nptr, name, IOSTAT_NAME_LEN);
mutex_init(&op->ii_kstat_mutex, NULL, MUTEX_DRIVER, NULL);
if ((result =
kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0))) {
result->ks_private = ip;
result->ks_lock = &op->ii_kstat_mutex;
kstat_install(result);
} else {
mutex_destroy(&op->ii_kstat_mutex);
cmn_err(CE_WARN, "!Unabled to create overflow kstat for set "
"%s", ip->bi_keyname);
}
return (result);
}
#endif
static void
ii_str_kstat_copy(char *str, char *p1, char *p2, char *p3, char *p4)
{
static int whinged = 0;
char *part[ 4 ];
char fulldata[ DSW_NAMELEN ];
int i, offset, remain;
int num_parts;
int leftover;
int kscharsize = KSTAT_DATA_CHAR_LEN - 1;
/*
* NOTE: the following lines must be changed if DSW_NAMELEN
* ever changes. You'll need a part[] for every kscharsize
* characters (or fraction thereof). The ii_kstat_set_t
* definition in dsw_dev.h will also need new ovr_? entries.
*/
part[ 0 ] = p1;
part[ 1 ] = p2;
part[ 2 ] = p3;
part[ 3 ] = p4;
bzero(fulldata, DSW_NAMELEN);
if (str) {
(void) strncpy(fulldata, str, DSW_NAMELEN);
}
num_parts = DSW_NAMELEN / kscharsize;
leftover = DSW_NAMELEN % kscharsize;
if (leftover) {
++num_parts;
}
if (num_parts > sizeof (part) / sizeof (part[0])) {
/*
* DSW_NAMELEN is 64 and kscharsize is 15.
* It's always "whinged"
*/
if (!whinged) {
#ifdef DEBUG
cmn_err(CE_WARN, "!May not have enough room "
"to store volume name in kstats");
#endif
whinged = 1;
}
num_parts = sizeof (part) / sizeof (part[0]);
}
offset = 0;
remain = DSW_NAMELEN;
for (i = 0; i < num_parts; i++) {
int to_copy = remain > kscharsize? kscharsize : remain;
bcopy(&fulldata[ offset ], part[ i ], to_copy);
offset += to_copy;
remain -= to_copy;
}
}
static int
ii_set_stats_update(kstat_t *ksp, int rw)
{
_ii_info_t *ip = (_ii_info_t *)ksp->ks_private;
ii_kstat_set_t *kp = (ii_kstat_set_t *)ksp->ks_data;
if (KSTAT_WRITE == rw) {
return (EACCES);
}
/* copy values over */
kp->size.value.ul = ip->bi_size;
kp->flags.value.ul = ip->bi_flags;
kp->unit.value.ul = ip->bi_throttle_unit;
kp->delay.value.ul = ip->bi_throttle_delay;
kp->mtime.value.ul = ip->bi_mtime;
/* update bitmap counters if necessary */
if (ip->bi_state & DSW_CNTCPYBITS) {
ip->bi_copybits = 0;
if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) {
ip->bi_state &= ~DSW_CNTCPYBITS;
II_CNT_BITS(ip, ip->bi_copyfba,
&ip->bi_copybits,
DSW_BM_SIZE_BYTES(ip));
_ii_rlse_devs(ip, BMP);
}
}
if (ip->bi_state & DSW_CNTSHDBITS) {
ip->bi_shdbits = 0;
if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) {
ip->bi_state &= ~DSW_CNTSHDBITS;
II_CNT_BITS(ip, ip->bi_shdfba,
&ip->bi_shdbits,
DSW_BM_SIZE_BYTES(ip));
_ii_rlse_devs(ip, BMP);
}
}
kp->copybits.value.ul = ip->bi_copybits;
kp->shdbits.value.ul = ip->bi_shdbits;
/* copy volume names */
ii_str_kstat_copy(ii_pathname(MSTFD(ip)),
kp->mst_a.value.c, kp->mst_b.value.c,
kp->mst_c.value.c, kp->mst_d.value.c);
ii_str_kstat_copy(ip->bi_keyname, kp->set_a.value.c, kp->set_b.value.c,
kp->set_c.value.c, kp->set_d.value.c);
ii_str_kstat_copy(ii_pathname(ip->bi_bmpfd),
kp->bmp_a.value.c, kp->bmp_b.value.c,
kp->bmp_c.value.c, kp->bmp_d.value.c);
if (ip->bi_overflow) {
ii_str_kstat_copy(ip->bi_overflow->ii_volname,
kp->ovr_a.value.c, kp->ovr_b.value.c, kp->ovr_c.value.c,
kp->ovr_d.value.c);
(void) strlcpy(kp->ovr_io.value.c, ip->bi_kstat_io.ovrio,
KSTAT_DATA_CHAR_LEN);
} else {
ii_str_kstat_copy("", kp->ovr_a.value.c, kp->ovr_b.value.c,
kp->ovr_c.value.c, kp->ovr_d.value.c);
bzero(kp->ovr_io.value.c, KSTAT_DATA_CHAR_LEN);
}
if ((ip->bi_flags) & DSW_TREEMAP) {
kp->shdchks.value.ul = ip->bi_shdchks;
kp->shdchkused.value.ul = ip->bi_shdchkused;
} else {
kp->shdchks.value.ul = 0;
kp->shdchkused.value.ul = 0;
}
/* make sure value.c are always null terminated */
(void) strlcpy(kp->mst_io.value.c, ip->bi_kstat_io.mstio,
KSTAT_DATA_CHAR_LEN);
(void) strlcpy(kp->shd_io.value.c, ip->bi_kstat_io.shdio,
KSTAT_DATA_CHAR_LEN);
(void) strlcpy(kp->bmp_io.value.c, ip->bi_kstat_io.bmpio,
KSTAT_DATA_CHAR_LEN);
return (0);
}
/*
* _ii_config
* Configure an II device pair
*
* Calling/Exit State:
* Returns 0 if the pairing was configured, otherwise an
* error code. The ioctl data stucture is copied out to the user
* and contains any additional error information, and the master
* and shadow volume names if not supplied by the user.
*
* Description:
* Reads the user configuration structure and attempts
* to establish an II pairing. The snapshot of the master
* device is established at this point in time.
*/
int
_ii_config(intptr_t arg, int ilp32, int *rvp, int iflags)
{
dsw_config_t uconf;
dsw_config32_t *uconf32;
_ii_info_t *ip, *hip, **ipp;
int rc;
int type;
int nshadows;
int add_to_mst_top;
int import;
int existing;
int resized;
nsc_size_t mst_size, shd_size, bmp_size;
nsc_off_t shdfba;
nsc_off_t copyfba;
int keylen, keyoffset;
ii_header_t *bm_header;
nsc_buf_t *tmp;
spcs_s_info_t kstatus;
spcs_s_info32_t ustatus32;
int rtype;
uint_t hints;
/* Import is a once only operation like an enable */
ASSERT((iflags&(II_EXISTING|II_IMPORT)) != (II_EXISTING|II_IMPORT));
existing = (iflags&II_EXISTING) != 0;
import = (iflags&II_IMPORT) != 0;
*rvp = 0;
if (ilp32) {
uconf32 = kmem_zalloc(sizeof (dsw_config32_t), KM_SLEEP);
if (uconf32 == NULL) {
return (ENOMEM);
}
if (copyin((void *)arg, uconf32, sizeof (*uconf32)) < 0)
return (EFAULT);
II_TAIL_COPY(uconf, (*uconf32), master_vol, dsw_config_t);
uconf.status = (spcs_s_info_t)uconf32->status;
ustatus32 = uconf32->status;
kmem_free(uconf32, sizeof (dsw_config32_t));
} else if (copyin((void *)arg, &uconf, sizeof (uconf)) < 0)
return (EFAULT);
DTRACE_PROBE3(_ii_config_info, char *, uconf.master_vol,
char *, uconf.shadow_vol, char *, uconf.bitmap_vol);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (_ii_shutting_down)
return (spcs_s_ocopyoutf(&kstatus, uconf.status,
DSW_ESHUTDOWN));
if (uconf.bitmap_vol[0] == 0)
return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY));
mutex_enter(&_ii_config_mutex);
ip = nsc_kmem_zalloc(sizeof (*ip), KM_SLEEP, _ii_local_mem);
if (!ip) {
mutex_exit(&_ii_config_mutex);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM));
}
ip->bi_mstdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP,
_ii_local_mem);
ip->bi_mstrdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP,
_ii_local_mem);
if (ip->bi_mstdev == NULL || ip->bi_mstrdev == NULL) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM));
}
ip->bi_disabled = 1; /* mark as disabled until we are ready to go */
mutex_init(&ip->bi_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ip->bi_bmpmutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ip->bi_rsrvmutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ip->bi_rlsemutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ip->bi_chksmutex, NULL, MUTEX_DRIVER, NULL);
cv_init(&ip->bi_copydonecv, NULL, CV_DRIVER, NULL);
cv_init(&ip->bi_reservecv, NULL, CV_DRIVER, NULL);
cv_init(&ip->bi_releasecv, NULL, CV_DRIVER, NULL);
cv_init(&ip->bi_ioctlcv, NULL, CV_DRIVER, NULL);
cv_init(&ip->bi_closingcv, NULL, CV_DRIVER, NULL);
cv_init(&ip->bi_busycv, NULL, CV_DRIVER, NULL);
rw_init(&ip->bi_busyrw, NULL, RW_DRIVER, NULL);
rw_init(&ip->bi_linkrw, NULL, RW_DRIVER, NULL);
(void) strncpy(ip->bi_keyname, uconf.shadow_vol, DSW_NAMELEN);
ip->bi_keyname[DSW_NAMELEN-1] = '\0';
ip->bi_throttle_unit = ii_throttle_unit;
ip->bi_throttle_delay = ii_throttle_delay;
/* First check the list to see if uconf.bitmap_vol's already there */
if (ii_volume(uconf.bitmap_vol, 0) != NONE) {
DTRACE_PROBE(_ii_config_bmp_found);
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
}
ip->bi_bmpfd = nsc_open(uconf.bitmap_vol,
NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(ip->bi_bmpdev), &rc);
if (!ip->bi_bmpfd)
ip->bi_bmpfd = nsc_open(uconf.bitmap_vol,
NSC_IIR_ID|NSC_CACHE|NSC_DEVICE|NSC_RDWR, NULL,
(blind_t)&(ip->bi_bmpdev), &rc);
if (!ip->bi_bmpfd && !existing) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
spcs_s_add(kstatus, rc);
DTRACE_PROBE(_ii_config_no_bmp);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
}
if (import) {
uconf.flag = DSW_GOLDEN;
II_FLAG_SETX(DSW_SHDIMPORT|DSW_GOLDEN, ip);
}
if (existing) {
DTRACE_PROBE(_ii_config_existing);
/*
* ii_config is used by enable, import and resume (existing)
* If not importing or resuming, then this must be enable.
* Indicate this fact for SNMP use.
*/
if (!ip->bi_bmpfd) {
/*
* Couldn't read bitmap, mark master and shadow as
* unusable.
*/
II_FLAG_ASSIGN(DSW_BMPOFFLINE|DSW_MSTOFFLINE|
DSW_SHDOFFLINE, ip);
/*
* Set cluster tag for this element so it can
* be suspended later
*/
(void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
/* need to check on master, might be shared */
goto header_checked;
}
/* check the header */
(void) _ii_rsrv_devs(ip, BMP, II_INTERNAL);
/* get first block of bit map */
mutex_enter(&ip->bi_mutex);
bm_header = _ii_bm_header_get(ip, &tmp);
mutex_exit(&ip->bi_mutex);
if (bm_header == NULL) {
if (ii_debug > 0)
cmn_err(CE_WARN,
"!ii: _ii_bm_header_get returned NULL");
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status,
DSW_EHDRBMP));
}
if (bm_header->ii_magic != DSW_DIRTY &&
bm_header->ii_magic != DSW_CLEAN) {
mutex_exit(&_ii_config_mutex);
_ii_bm_header_free(bm_header, ip, tmp);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status,
DSW_EINVALBMP));
}
II_FLAG_ASSIGN(bm_header->ii_state, ip);
/* Restore copy throttle parameters, if header version is 3 */
if (bm_header->ii_version >= 3) { /* II_HEADER_VERSION */
ip->bi_throttle_delay = bm_header->ii_throttle_delay;
ip->bi_throttle_unit = bm_header->ii_throttle_unit;
}
/* Restore cluster & group names, if header version is 4 */
if (bm_header->ii_version >= 4) {
/* cluster */
if (*bm_header->clstr_name) {
(void) strncpy(uconf.cluster_tag,
bm_header->clstr_name, DSW_NAMELEN);
(void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
}
/* group */
if (*bm_header->group_name) {
(void) strncpy(uconf.group_name,
bm_header->group_name, DSW_NAMELEN);
(void) II_LINK_GROUP(ip, uconf.group_name);
}
}
/* restore latest modification time, if header version >= 5 */
if (bm_header->ii_version >= 5) {
ip->bi_mtime = bm_header->ii_mtime;
}
/* Fetch master and shadow names from bitmap header */
if (uconf.master_vol[0] == 0)
(void) strncpy(uconf.master_vol, bm_header->master_vol,
DSW_NAMELEN);
if (uconf.shadow_vol[0] == 0)
(void) strncpy(uconf.shadow_vol, bm_header->shadow_vol,
DSW_NAMELEN);
/* return the fetched names to the user */
if (ilp32) {
uconf32 = kmem_zalloc(sizeof (dsw_config32_t),
KM_SLEEP);
if (uconf32 == NULL) {
mutex_exit(&_ii_config_mutex);
_ii_bm_header_free(bm_header, ip, tmp);
_ii_rlse_devs(ip, BMP);
_ii_info_free(ip);
return (ENOMEM);
}
uconf32->status = ustatus32;
II_TAIL_COPY((*uconf32), uconf, master_vol,
dsw_config32_t);
rc = copyout(uconf32, (void *)arg, sizeof (*uconf32));
kmem_free(uconf32, sizeof (dsw_config32_t));
} else {
rc = copyout(&uconf, (void *)arg, sizeof (uconf));
}
if (rc) {
mutex_exit(&_ii_config_mutex);
_ii_bm_header_free(bm_header, ip, tmp);
_ii_rlse_devs(ip, BMP);
_ii_info_free(ip);
return (EFAULT);
}
if (strncmp(bm_header->bitmap_vol, uconf.bitmap_vol,
DSW_NAMELEN) || ((!(ip->bi_flags&DSW_SHDIMPORT)) &&
strncmp(bm_header->master_vol, uconf.master_vol,
DSW_NAMELEN)) || strncmp(bm_header->shadow_vol,
uconf.shadow_vol, DSW_NAMELEN)) {
mutex_exit(&_ii_config_mutex);
_ii_bm_header_free(bm_header, ip, tmp);
_ii_rlse_devs(ip, BMP);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status,
DSW_EMISMATCH));
}
shdfba = bm_header->ii_shdfba;
copyfba = bm_header->ii_copyfba;
if ((ip->bi_flags)&DSW_TREEMAP) {
if (ii_debug > 0)
cmn_err(CE_NOTE,
"!II: Resuming short shadow volume");
ip->bi_mstchks = bm_header->ii_mstchks;
ip->bi_shdchks = bm_header->ii_shdchks;
ip->bi_shdchkused = bm_header->ii_shdchkused;
ip->bi_shdfchk = bm_header->ii_shdfchk;
if (bm_header->overflow_vol[0] != 0)
if ((rc = ii_overflow_attach(ip,
bm_header->overflow_vol, 0)) != 0) {
mutex_exit(&_ii_config_mutex);
_ii_bm_header_free(bm_header, ip, tmp);
_ii_rlse_devs(ip, BMP);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus,
uconf.status, rc));
}
}
_ii_bm_header_free(bm_header, ip, tmp);
_ii_rlse_devs(ip, BMP);
}
header_checked:
if (ip->bi_flags&DSW_SHDIMPORT)
(void) strcpy(uconf.master_vol, "<imported shadow>");
if (!uconf.master_vol[0] || !uconf.shadow_vol[0]) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY));
}
/* check that no volume has been given twice */
if (strncmp(uconf.master_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
}
if (strncmp(uconf.master_vol, uconf.bitmap_vol, DSW_NAMELEN) == 0) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
}
if (strncmp(uconf.bitmap_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
}
/* check that master is not already a bitmap, shadow or overflow */
type = ii_volume(uconf.master_vol, 1);
if (type != NONE && type != MST) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
}
/* check that shadow is not used as anything else */
type = ii_volume(uconf.shadow_vol, 1);
if (type != NONE && type != SHD) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
}
/* Setup the table bitmap operations table */
switch (ii_bitmap) {
case II_KMEM:
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: using volatile bitmaps");
ip->bi_bitmap_ops = &kmem_buf_bmp;
break;
case II_FWC:
hints = 0;
(void) nsc_node_hints(&hints);
if ((hints & NSC_FORCED_WRTHRU) == 0)
ip->bi_bitmap_ops = &kmem_buf_bmp;
else
ip->bi_bitmap_ops = &alloc_buf_bmp;
if (ii_debug > 0) {
cmn_err(CE_NOTE, "!ii: chosen to use %s bitmaps",
ip->bi_bitmap_ops == &kmem_buf_bmp ?
"volatile" : "persistent");
}
break;
case II_WTHRU:
default:
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: using persistent bitmaps");
ip->bi_bitmap_ops = &alloc_buf_bmp;
break;
}
/*
* If we found aother shadow volume with the same name,
* If this is an resume operation,
* If this shadow is in the exported state
* then try an on the fly join instead
*/
for (hip = _ii_info_top; hip; hip = hip->bi_next)
if (strcmp(uconf.shadow_vol, hip->bi_keyname) == 0)
break;
if ((hip) && (type == SHD) && existing &&
(ip->bi_flags & DSW_SHDEXPORT)) {
/*
* Stop any copy in progress
*/
while (_ii_stopcopy(hip) == EINTR)
;
/*
* Start the imported shadow teardown
*/
mutex_enter(&hip->bi_mutex);
/* disable accesss to imported shadow */
hip->bi_disabled = 1;
/* Wait for any I/O's to complete */
while (hip->bi_ioctl) {
hip->bi_state |= DSW_IOCTL;
cv_wait(&hip->bi_ioctlcv, &hip->bi_mutex);
}
mutex_exit(&hip->bi_mutex);
/* this rw_enter forces us to drain all active IO */
rw_enter(&hip->bi_linkrw, RW_WRITER);
rw_exit(&hip->bi_linkrw);
/* remove ip from _ii_info_top linked list */
mutex_enter(&_ii_info_mutex);
for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) {
if (hip == *ipp) {
*ipp = hip->bi_next;
break;
}
}
if (hip->bi_kstat) {
kstat_delete(hip->bi_kstat);
hip->bi_kstat = NULL;
}
mutex_exit(&_ii_info_mutex);
/* Gain access to both bitmap volumes */
rtype = BMP;
if (((rc = _ii_rsrv_devs(hip, rtype, II_INTERNAL)) != 0) ||
((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0)) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
}
/* Merge imported bitmap */
rc = II_JOIN_BMP(ip, hip);
/* Release access to bitmap volume */
_ii_rlse_devs(hip, rtype);
ii_sibling_free(hip);
/* Clear the fact that we are exported */
mutex_enter(&ip->bi_mutex);
II_FLAG_CLR(DSW_SHDEXPORT, ip);
/* Release resources */
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, BMP);
} else if (type != NONE) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
}
/*
* Handle non-exported shadow
*/
if ((ip->bi_flags & DSW_SHDEXPORT) == 0) {
if ((rc = ii_open_shadow(ip, uconf.shadow_vol)) != 0) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uconf.status,
DSW_EOPEN));
}
}
/*
* allocate _ii_concopy_sema and set to a value that won't allow
* all cache to be allocated by copy loops.
*/
if (_ii_concopy_init == 0 && ip->bi_bmpfd != NULL) {
int asize = 0, wsize;
nsc_size_t cfbas, maxfbas;
(void) nsc_cache_sizes(&asize, &wsize);
if (asize > 0) {
cfbas = FBA_NUM(asize);
(void) _ii_rsrv_devs(ip, BMP, II_INTERNAL);
rc = nsc_maxfbas(ip->bi_bmpfd, 0, &maxfbas);
_ii_rlse_devs(ip, BMP);
if (!II_SUCCESS(rc))
maxfbas = 1024; /* i.e. _SD_MAX_FBAS */
ii_nconcopy = cfbas / (maxfbas * 2) / 3;
}
if (ii_nconcopy < 2)
ii_nconcopy = 2;
ASSERT(ii_nconcopy > 0);
sema_init(&_ii_concopy_sema, ii_nconcopy, NULL,
SEMA_DRIVER, NULL);
_ii_concopy_init = 1;
}
/* check for shared master volume */
for (hip = _ii_mst_top; hip; hip = hip->bi_nextmst)
if (strcmp(uconf.master_vol, ii_pathname(hip->bi_mstfd)) == 0)
break;
add_to_mst_top = (hip == NULL);
if (!hip)
for (hip = _ii_info_top; hip; hip = hip->bi_next)
if (strcmp(uconf.master_vol,
ii_pathname(hip->bi_mstfd)) == 0)
break;
nshadows = (hip != NULL);
/* Check if master is offline */
if (hip) {
if (hip->bi_flags & DSW_MSTOFFLINE) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status,
DSW_EOFFLINE));
}
}
if (!nshadows && (ip->bi_flags&DSW_SHDIMPORT) == 0) {
ip->bi_mstfd = nsc_open(uconf.master_vol,
NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
(blind_t)(ip->bi_mstdev), &rc);
if (!ip->bi_mstfd) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uconf.status,
DSW_EOPEN));
}
ip->bi_mstrfd = nsc_open(uconf.master_vol,
NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
(blind_t)(ip->bi_mstrdev), &rc);
if (!ip->bi_mstrfd) {
mutex_exit(&_ii_config_mutex);
_ii_info_free(ip);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uconf.status,
DSW_EOPEN));
}
}
ip->bi_head = ip;
ip->bi_master = ip;
mutex_enter(&_ii_info_mutex);
ip->bi_next = _ii_info_top;
_ii_info_top = ip;
if (nshadows) {
/* link new shadow group together with others sharing master */
if (ii_debug > 0)
cmn_err(CE_NOTE,
"!II: shadow %s shares master %s with other shadow"
" groups", uconf.shadow_vol, uconf.master_vol);
hip = hip->bi_head;
nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev));
nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev));
ip->bi_mstrdev = hip->bi_mstrdev;
ip->bi_mstdev = hip->bi_mstdev;
ip->bi_head = hip;
ip->bi_sibling = hip->bi_sibling;
if (add_to_mst_top) {
hip->bi_nextmst = _ii_mst_top;
_ii_mst_top = hip;
}
hip->bi_sibling = ip;
ip->bi_master = ip->bi_head->bi_master;
}
mutex_exit(&_ii_info_mutex);
mutex_exit(&_ii_config_mutex);
keylen = strlen(ip->bi_keyname);
if (keylen > KSTAT_STRLEN - 1) {
keyoffset = keylen + 1 - KSTAT_STRLEN;
} else {
keyoffset = 0;
}
ip->bi_kstat = kstat_create("ii", _ii_instance++,
&ip->bi_keyname[ keyoffset ], "iiset", KSTAT_TYPE_NAMED,
sizeof (ii_kstat_set) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
if (ip->bi_kstat) {
ip->bi_kstat->ks_data = &ii_kstat_set;
ip->bi_kstat->ks_update = ii_set_stats_update;
ip->bi_kstat->ks_private = ip;
kstat_install(ip->bi_kstat);
} else {
cmn_err(CE_WARN, "!Unable to create set-specific kstats");
}
#ifndef DISABLE_KSTATS
/* create kstats information */
mutex_init(&ip->bi_kstat_io.statmutex, NULL, MUTEX_DRIVER, NULL);
if (ip == ip->bi_master) {
ip->bi_kstat_io.master = _ii_kstat_create(ip, "master");
} else {
ip->bi_kstat_io.master = ip->bi_master->bi_kstat_io.master;
(void) strlcpy(ip->bi_kstat_io.mstio,
ip->bi_master->bi_kstat_io.mstio, KSTAT_DATA_CHAR_LEN);
}
ip->bi_kstat_io.shadow = _ii_kstat_create(ip, "shadow");
ip->bi_kstat_io.bitmap = _ii_kstat_create(ip, "bitmap");
#endif
(void) _ii_reserve_begin(ip);
rtype = MSTR|SHDR|BMP;
if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
spcs_s_add(kstatus, rc);
rc = DSW_ERSRVFAIL;
goto fail;
}
if (ip->bi_flags&DSW_SHDIMPORT) {
rc = 0; /* no master for imported volumes */
mst_size = 0;
} else
rc = nsc_partsize(MSTFD(ip), &mst_size);
if (rc == 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0)
rc = nsc_partsize(SHDFD(ip), &shd_size);
if (!ip->bi_bmpfd)
rc = EINVAL;
if (rc == 0)
rc = nsc_partsize(ip->bi_bmpfd, &bmp_size);
if (ip->bi_flags&DSW_SHDIMPORT)
ip->bi_size = shd_size;
else
ip->bi_size = mst_size;
if ((((ip->bi_flags&DSW_SHDIMPORT) != DSW_SHDIMPORT) &&
(mst_size < 1)) ||
(((ip->bi_flags&DSW_SHDEXPORT) != DSW_SHDEXPORT) &&
(shd_size < 1)) ||
((rc == 0) && (bmp_size < 1))) {
/* could be really zero, or could be > 1 TB; fail the enable */
rc = EINVAL;
}
if (rc != 0) { /* rc set means an nsc_partsize() failed */
/*
* If existing group, mark bitmap as offline and set
* bmp_size to "right size".
*/
if (existing) {
bmp_size = 2 * DSW_BM_FBA_LEN(mst_size) +
DSW_SHD_BM_OFFSET;
goto no_more_bmp_tests;
}
spcs_s_add(kstatus, rc);
rc = DSW_EPARTSIZE;
_ii_rlse_devs(ip, rtype);
_ii_reserve_end(ip);
goto fail;
}
if (ip->bi_flags&DSW_SHDIMPORT)
mst_size = shd_size;
if (ip->bi_flags&DSW_SHDEXPORT)
shd_size = mst_size;
/*
* Check with RDC if the master & shadow sizes are different.
* Once II is enabled, the shadow size will be made to appear
* the same as the master, and this will panic RDC if we're
* changing sizes on it.
*/
resized = (shd_size != mst_size);
if (resized && ii_need_same_size(ip)) {
cmn_err(CE_WARN, "!Cannot enable II set: would change volume "
"size on RDC");
rc = DSW_EOPACKAGE;
_ii_rlse_devs(ip, rtype);
_ii_reserve_end(ip);
goto fail;
}
if (bmp_size < 2 * DSW_BM_FBA_LEN(mst_size) + DSW_SHD_BM_OFFSET) {
/* bitmap volume too small */
if (ii_debug > 0)
cmn_err(CE_NOTE,
"!ii: invalid sizes: bmp %" NSC_SZFMT " mst %"
NSC_SZFMT " %" NSC_SZFMT "",
bmp_size, mst_size, DSW_BM_FBA_LEN(mst_size));
rc = DSW_EBMPSIZE;
_ii_rlse_devs(ip, rtype);
_ii_reserve_end(ip);
goto fail;
}
if ((shd_size < mst_size) && (uconf.flag&DSW_GOLDEN) != 0) {
/* shadow volume too small */
if (ii_debug > 0)
cmn_err(CE_NOTE, "!shd size too small (%" NSC_SZFMT
") for independent set's master (%" NSC_SZFMT ")",
shd_size, mst_size);
rc = DSW_ESHDSIZE;
_ii_rlse_devs(ip, rtype);
_ii_reserve_end(ip);
goto fail;
}
ip->bi_busy = kmem_zalloc(1 + (ip->bi_size / (DSW_SIZE * DSW_BITS)),
KM_SLEEP);
if (!ip->bi_busy) {
rc = ENOMEM;
_ii_rlse_devs(ip, rtype);
_ii_reserve_end(ip);
goto fail;
}
if (existing == 0) {
DTRACE_PROBE(_ii_config);
/* first time this shadow has been set up */
mutex_enter(&ip->bi_mutex);
bm_header = _ii_bm_header_get(ip, &tmp);
mutex_exit(&ip->bi_mutex);
if (bm_header == NULL) {
if (ii_debug > 0)
cmn_err(CE_WARN,
"!ii: _ii_bm_header_get returned NULL");
rc = DSW_EHDRBMP;
_ii_rlse_devs(ip, rtype);
_ii_reserve_end(ip);
goto fail;
}
bzero(bm_header, sizeof (*bm_header));
/* copy pathnames into it */
(void) strncpy(bm_header->master_vol, uconf.master_vol,
DSW_NAMELEN);
(void) strncpy(bm_header->shadow_vol, uconf.shadow_vol,
DSW_NAMELEN);
(void) strncpy(bm_header->bitmap_vol, uconf.bitmap_vol,
DSW_NAMELEN);
(void) strncpy(bm_header->clstr_name, uconf.cluster_tag,
DSW_NAMELEN);
(void) strncpy(bm_header->group_name, uconf.group_name,
DSW_NAMELEN);
if (uconf.cluster_tag[0] != 0)
(void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
if (uconf.group_name[0] != 0)
(void) II_LINK_GROUP(ip, uconf.group_name);
bm_header->ii_state = (uconf.flag & DSW_GOLDEN);
II_FLAG_ASSIGN(bm_header->ii_state, ip);
if (import) {
II_FLAG_SETX(DSW_SHDIMPORT, ip);
bm_header->ii_state |= DSW_SHDIMPORT;
}
if (resized) {
II_FLAG_SETX(DSW_RESIZED, ip);
bm_header->ii_state |= DSW_RESIZED;
}
bm_header->ii_type = (uconf.flag & DSW_GOLDEN) ?
DSW_GOLDEN_TYPE : DSW_QUICK_TYPE;
bm_header->ii_magic = DSW_DIRTY;
bm_header->ii_version = II_HEADER_VERSION;
bm_header->ii_shdfba = DSW_SHD_BM_OFFSET;
bm_header->ii_copyfba = DSW_COPY_BM_OFFSET;
bm_header->ii_throttle_delay = ip->bi_throttle_delay;
bm_header->ii_throttle_unit = ip->bi_throttle_unit;
ip->bi_shdfba = bm_header->ii_shdfba;
ip->bi_copyfba = bm_header->ii_copyfba;
ip->bi_mtime = ddi_get_time();
/* write it to disk */
mutex_enter(&ip->bi_mutex);
rc = _ii_bm_header_put(bm_header, ip, tmp);
mutex_exit(&ip->bi_mutex);
if (!II_SUCCESS(rc)) {
spcs_s_add(kstatus, rc);
rc = DSW_EHDRBMP;
_ii_rlse_devs(ip, rtype);
_ii_reserve_end(ip);
goto fail;
}
if ((shd_size < mst_size) && (uconf.flag & DSW_GOLDEN) == 0) {
/*
* shadow volume smaller than master, must use a dependent
* copy with a bitmap file stored mapping for chunk locations.
*/
/* number of chunks in shadow volume */
nsc_size_t shd_chunks;
nsc_size_t bmp_chunks;
nsc_size_t tmp_chunks;
if (ii_debug > 1)
cmn_err(CE_NOTE, "!ii: using tree index on %s",
uconf.master_vol);
shd_chunks = shd_size / DSW_SIZE;
/* do not add in partial chunk at end */
ip->bi_mstchks = mst_size / DSW_SIZE;
if (mst_size % DSW_SIZE != 0)
ip->bi_mstchks++;
bmp_chunks = ii_btsize(bmp_size - ip->bi_copyfba -
DSW_BM_FBA_LEN(ip->bi_size));
tmp_chunks = ip->bi_copyfba +
DSW_BM_FBA_LEN(ip->bi_size);
if (bmp_chunks < (nsc_size_t)ip->bi_mstchks) {
if (ii_debug > -1) {
cmn_err(CE_NOTE, "!ii: bitmap vol too"
"small: %" NSC_SZFMT " vs. %"
NSC_SZFMT, bmp_size,
tmp_chunks);
}
spcs_s_add(kstatus, rc);
rc = DSW_EHDRBMP;
_ii_rlse_devs(ip, rtype);
_ii_reserve_end(ip);
goto fail;
}
mutex_enter(&ip->bi_mutex);
II_FLAG_SET(DSW_TREEMAP, ip);
mutex_exit(&ip->bi_mutex);
/* following values are written to header by ii_tinit */
#if (defined(NSC_MULTI_TERABYTE) && !defined(II_MULTIMULTI_TERABYTE))
ASSERT(shd_chunks <= INT32_MAX);
ASSERT(mst_size / DSW_SIZE <= INT32_MAX);
#endif
ip->bi_mstchks = mst_size / DSW_SIZE;
if (mst_size % DSW_SIZE != 0)
ip->bi_mstchks++;
#ifdef II_MULTIMULTI_TERABYTE
ip->bi_shdchks = shd_chunks;
#else
/* still have 31 bit chunkid's */
ip->bi_shdchks = (chunkid_t)shd_chunks;
#endif
ip->bi_shdchkused = 0;
rc = ii_tinit(ip);
} else {
ip->bi_shdchks = shd_size / DSW_SIZE;
ip->bi_shdchkused = 0;
}
if (rc == 0)
rc = II_LOAD_BMP(ip, 1);
if (rc == 0)
rc = II_ZEROBM(ip);
if (rc == 0)
rc = II_COPYBM(ip); /* also clear copy bitmap */
if (rc == 0 && (uconf.flag & DSW_GOLDEN) && !import)
rc = ii_fill_copy_bmp(ip);
if (rc) {
spcs_s_add(kstatus, rc);
rc = DSW_EHDRBMP;
_ii_rlse_devs(ip, rtype);
goto fail;
}
/* check that changing shadow won't upset RDC */
if (ii_update_denied(ip, kstatus, 0, 1)) {
rc = DSW_EOPACKAGE;
_ii_rlse_devs(ip, rtype);
_ii_reserve_end(ip);
goto fail;
}
ip->bi_disabled = 0; /* all okay and ready, we can go now */
_ii_rlse_devs(ip, rtype);
/* no _ii_reserve_end() here - we must register first */
ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd),
NSC_CACHE|NSC_DEVICE, _ii_io);
if (!nshadows)
ii_register_mst(ip);
ii_register_shd(ip);
if (!ii_register_ok(ip)) {
ip->bi_disabled = 1; /* argh */
rc = DSW_EREGISTER;
goto fail;
}
/* no _ii_reserve_begin() here -- we're still in process */
(void) _ii_rsrv_devs(ip, rtype, II_INTERNAL);
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: config: master %s shadow %s",
uconf.master_vol, uconf.shadow_vol);
rc = 0;
if ((uconf.flag & DSW_GOLDEN) && !import) {
mutex_enter(&ip->bi_mutex);
II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip);
ip->bi_ioctl++; /* we are effectively in an ioctl */
mutex_exit(&ip->bi_mutex);
rc = _ii_copyvol(ip, 0, rtype, kstatus, 1);
}
_ii_rlse_devs(ip, rtype);
_ii_reserve_end(ip);
++iigkstat.num_sets.value.ul;
return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
}
ip->bi_shdchks = shd_size / DSW_SIZE;
ip->bi_shdfba = shdfba;
ip->bi_copyfba = copyfba;
rc = II_LOAD_BMP(ip, 0); /* reload saved bitmap */
mutex_enter(&ip->bi_mutex);
if (rc == 0)
bm_header = _ii_bm_header_get(ip, &tmp);
mutex_exit(&ip->bi_mutex);
if (rc || bm_header == NULL) {
if (existing) {
goto no_more_bmp_tests;
}
rc = DSW_EHDRBMP;
goto fail;
}
/*
* If the header is dirty and it wasn't kept on persistent storage
* then the bitmaps must be assumed to be bad.
*/
if (bm_header->ii_magic == DSW_DIRTY &&
ip->bi_bitmap_ops != &alloc_buf_bmp) {
type = bm_header->ii_type;
_ii_bm_header_free(bm_header, ip, tmp);
if (type == DSW_GOLDEN_TYPE) {
if ((ip->bi_flags & DSW_COPYINGM) != 0)
_ii_error(ip, DSW_SHDOFFLINE);
else if ((ip->bi_flags & DSW_COPYINGS) != 0)
_ii_error(ip, DSW_MSTOFFLINE);
else {
/* No copying, so they're just different */
rc = ii_fill_copy_bmp(ip);
if (rc) {
spcs_s_add(kstatus, rc);
rc = DSW_EHDRBMP;
goto fail;
}
}
} else
_ii_error(ip, DSW_SHDOFFLINE);
mutex_enter(&ip->bi_mutex);
bm_header = _ii_bm_header_get(ip, &tmp);
mutex_exit(&ip->bi_mutex);
if (bm_header == NULL) {
rc = DSW_EHDRBMP;
goto fail;
}
}
bm_header->ii_magic = DSW_DIRTY;
mutex_enter(&ip->bi_mutex);
rc = _ii_bm_header_put(bm_header, ip, tmp);
mutex_exit(&ip->bi_mutex);
if (!II_SUCCESS(rc)) {
spcs_s_add(kstatus, rc);
rc = DSW_EHDRBMP;
goto fail;
}
ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd),
NSC_CACHE|NSC_DEVICE, _ii_io);
no_more_bmp_tests:
_ii_rlse_devs(ip, rtype);
ip->bi_disabled = 0; /* all okay and ready, we can go now */
if (!nshadows)
ii_register_mst(ip);
if ((ip->bi_flags & DSW_SHDEXPORT) == 0)
ii_register_shd(ip);
if (!ii_register_ok(ip)) {
rc = DSW_EREGISTER;
goto fail;
}
_ii_reserve_end(ip);
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: config: master %s shadow %s",
uconf.master_vol, uconf.shadow_vol);
rc = 0;
if (ip->bi_flags & DSW_COPYINGP) {
/* Copy was in progress, so continue it */
(void) _ii_rsrv_devs(ip, rtype, II_INTERNAL);
mutex_enter(&ip->bi_mutex);
ip->bi_ioctl++; /* we are effectively in an ioctl */
mutex_exit(&ip->bi_mutex);
rc = _ii_copyvol(ip, ((ip->bi_flags & DSW_COPYINGS) != 0) ?
CV_SHD2MST : 0, rtype, kstatus, 0);
}
++iigkstat.num_sets.value.ul;
return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
fail:
/* remove ip from _ii_info_top linked list */
mutex_enter(&_ii_info_mutex);
for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) {
if (ip == *ipp) {
*ipp = ip->bi_next;
break;
}
}
mutex_exit(&_ii_info_mutex);
ii_sibling_free(ip);
return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
}
static int
_ii_perform_disable(char *setname, spcs_s_info_t *kstatusp, int reclaim)
{
_ii_info_t **xip, *ip;
_ii_overflow_t *op;
nsc_buf_t *tmp = NULL;
int rc;
ii_header_t *bm_header;
int rtype;
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(setname);
if (ip == NULL) {
mutex_exit(&_ii_info_mutex);
return (DSW_ENOTFOUND);
}
if ((ip->bi_flags & DSW_GOLDEN) &&
((ip->bi_flags & DSW_COPYINGP) != 0)) {
/*
* Cannot disable an independent copy while still copying
* as it means that a data dependency exists.
*/
mutex_exit(&_ii_info_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
DTRACE_PROBE(_ii_perform_disable_end_DSW_EDEPENDENCY);
return (DSW_EDEPENDENCY);
}
if ((ip->bi_flags & DSW_GOLDEN) == 0 &&
ii_update_denied(ip, *kstatusp, 0, 1)) {
/* Cannot disable a dependent shadow while RDC is unsure */
mutex_exit(&_ii_info_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
DTRACE_PROBE(DSW_EOPACKAGE);
return (DSW_EOPACKAGE);
}
if (((ip->bi_flags & DSW_RESIZED) == DSW_RESIZED) &&
ii_need_same_size(ip)) {
/* We can't disable the set whilst RDC is using it */
mutex_exit(&_ii_info_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
cmn_err(CE_WARN, "!Cannot disable II set: would change "
"volume size on RDC");
DTRACE_PROBE(DSW_EOPACKAGE_resize);
return (DSW_EOPACKAGE);
}
ip->bi_disabled = 1;
if (NSHADOWS(ip) && (ip->bi_master == ip)) {
ip->bi_flags &= (~DSW_COPYING);
ip->bi_state |= DSW_MULTIMST;
}
mutex_exit(&_ii_info_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_stopvol(ip);
rtype = SHDR|BMP;
if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
spcs_s_add(*kstatusp, rc);
DTRACE_PROBE(DSW_ERSRVFAIL);
return (DSW_ERSRVFAIL);
}
if ((ii_header < 128) &&
(((ip->bi_flags & DSW_GOLDEN) == 0) ||
(ip->bi_flags & DSW_COPYING))) {
/*
* Not a full copy so attempt to prevent use of partial copy
* by clearing where the first ufs super-block would be
* located. Solaris often incorporates the disk header into
* the start of the first slice, so avoid clearing the very
* first 16 blocks of the volume.
*/
if (ii_debug > 1)
cmn_err(CE_NOTE, "!ii: Shadow copy invalidated");
II_READ_START(ip, shadow);
rc = nsc_alloc_buf(SHDFD(ip), ii_header, 128 - ii_header,
NSC_RDWRBUF, &tmp);
II_READ_END(ip, shadow, rc, 128 - ii_header);
if (II_SUCCESS(rc)) {
rc = nsc_zero(tmp, ii_header, 128 - ii_header, 0);
if (II_SUCCESS(rc)) {
II_NSC_WRITE(ip, shadow, rc, tmp, ii_header,
(128 - ii_header), 0);
}
}
if (tmp)
(void) nsc_free_buf(tmp);
if (!II_SUCCESS(rc))
_ii_error(ip, DSW_SHDOFFLINE);
}
/* this rw_enter forces us to drain all active IO */
rw_enter(&ip->bi_linkrw, RW_WRITER);
rw_exit(&ip->bi_linkrw);
/* remove ip from _ii_info_top linked list */
mutex_enter(&_ii_info_mutex);
for (xip = &_ii_info_top; *xip; xip = &((*xip)->bi_next)) {
if (ip == *xip) {
*xip = ip->bi_next;
break;
}
}
if (ip->bi_kstat) {
kstat_delete(ip->bi_kstat);
ip->bi_kstat = NULL;
}
mutex_exit(&_ii_info_mutex);
rc = II_SAVE_BMP(ip, 1);
mutex_enter(&ip->bi_mutex);
if (rc == 0)
bm_header = _ii_bm_header_get(ip, &tmp);
if (rc == 0 && bm_header) {
if (ii_debug > 1)
cmn_err(CE_NOTE, "!ii: Invalid header written");
bm_header->ii_magic = DSW_INVALID;
/* write it to disk */
(void) _ii_bm_header_put(bm_header, ip, tmp);
}
mutex_exit(&ip->bi_mutex);
op = ip->bi_overflow;
if (op && (reclaim == -1)) {
reclaim = (op->ii_drefcnt == 1? NO_RECLAIM : RECLAIM);
}
if ((op != NULL) && (op->ii_hversion >= 1) &&
(op->ii_hmagic == II_OMAGIC)) {
mutex_enter(&_ii_overflow_mutex);
if (ip->bi_flags & DSW_OVRHDRDRTY) {
mutex_enter(&ip->bi_mutex);
ip->bi_flags &= ~DSW_OVRHDRDRTY;
mutex_exit(&ip->bi_mutex);
ASSERT(op->ii_urefcnt > 0);
op->ii_urefcnt--;
}
if (op->ii_urefcnt == 0) {
op->ii_flags &= ~IIO_CNTR_INVLD;
op->ii_unused = op->ii_nchunks - 1;
}
mutex_exit(&_ii_overflow_mutex);
}
ii_overflow_free(ip, reclaim);
_ii_rlse_devs(ip, rtype);
ii_sibling_free(ip);
--iigkstat.num_sets.value.ul;
return (0);
}
/*
* _ii_disable
* Deconfigures an II pair
*
* Calling/Exit State:
* Returns 0 if the pair was disabled. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*
* Description:
* Reads the user configuration structure and attempts to
* deconfigure that pairing based on the master device pathname.
*/
int
_ii_disable(intptr_t arg, int ilp32, int *rvp)
{
dsw_ioctl_t uparms;
dsw_ioctl32_t uparms32;
_ii_overflow_t *op;
int rc, rerr;
spcs_s_info_t kstatus;
uint64_t hash;
int reclaim;
_ii_lsthead_t *oldhead, **head;
_ii_lstinfo_t *np, **xnp, *oldp;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
return (EFAULT);
II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
uparms.status = (spcs_s_info_t)uparms32.status;
} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!uparms.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
DTRACE_PROBE2(_ii_disable_info, char *, uparms.shadow_vol,
int, uparms.flags);
/* group or single set? */
if (uparms.flags & CV_IS_GROUP) {
hash = nsc_strhash(uparms.shadow_vol);
mutex_enter(&_ii_group_mutex);
for (head = &_ii_group_top; *head;
head = &((*head)->lst_next)) {
if ((hash == (*head)->lst_hash) &&
strncmp((*head)->lst_name, uparms.shadow_vol,
DSW_NAMELEN) == 0)
break;
}
if (!*head) {
mutex_exit(&_ii_group_mutex);
return (spcs_s_ocopyoutf(&kstatus, uparms.status,
DSW_EGNOTFOUND));
}
/* clear any overflow vol usage counts */
for (np = (*head)->lst_start; np; np = np->lst_next) {
if (np->lst_ip->bi_overflow) {
np->lst_ip->bi_overflow->ii_detachcnt = 0;
}
}
/* now increment */
for (np = (*head)->lst_start; np; np = np->lst_next) {
if (np->lst_ip->bi_overflow) {
++np->lst_ip->bi_overflow->ii_detachcnt;
}
}
/* finally, disable all group members */
rerr = 0;
xnp = &(*head)->lst_start;
while (*xnp) {
op = (*xnp)->lst_ip->bi_overflow;
if (op) {
reclaim = (op->ii_drefcnt == op->ii_detachcnt?
NO_RECLAIM : RECLAIM);
--op->ii_detachcnt;
}
/* clear out the group pointer */
(*xnp)->lst_ip->bi_group = NULL;
rc = _ii_perform_disable((*xnp)->lst_ip->bi_keyname,
&kstatus, reclaim);
if (rc) {
/* restore group name */
(*xnp)->lst_ip->bi_group = (*head)->lst_name;
/* restore detachcnt */
if (op) {
++op->ii_detachcnt;
}
/* don't delete branch */
++rerr;
spcs_s_add(kstatus, rc);
/* move forward in linked list */
xnp = &(*xnp)->lst_next;
} else {
oldp = (*xnp);
*xnp = (*xnp)->lst_next;
kmem_free(oldp, sizeof (_ii_lstinfo_t));
}
}
if (rerr) {
mutex_exit(&_ii_group_mutex);
return (spcs_s_ocopyoutf(&kstatus, uparms.status,
DSW_EDISABLE));
}
/* no errors, all sets disabled, OK to free list head */
oldhead = *head;
*head = (*head)->lst_next;
kmem_free(oldhead, sizeof (_ii_lsthead_t));
mutex_exit(&_ii_group_mutex);
} else {
/* only a single set is being disabled */
rc = _ii_perform_disable(uparms.shadow_vol, &kstatus, -1);
if (rc)
return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
}
spcs_s_kfree(kstatus);
return (0);
}
/*
* _ii_stat
* Get state of the shadow.
*
* Calling/Exit State:
* Returns 0 on success, otherwise an error code is returned
* and any additional error information is copied out to the user.
* The size variable in the dsw_stat_t is set to the FBA size
* of the volume, the stat variable is set to the state, and
* the structure is copied out.
*/
/*ARGSUSED*/
int
_ii_stat(intptr_t arg, int ilp32, int *rvp)
{
dsw_stat_t ustat;
dsw_stat32_t ustat32;
_ii_info_t *ip;
spcs_s_info_t kstatus;
char *group, *cluster;
if (ilp32) {
if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0)
return (EFAULT);
II_TAIL_COPY(ustat, ustat32, shadow_vol, dsw_stat_t);
ustat.status = (spcs_s_info_t)ustat32.status;
} else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!ustat.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(ustat.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, ustat.status,
DSW_ENOTFOUND));
ustat.stat = ip->bi_flags;
ustat.size = ip->bi_size;
ustat.mtime = ip->bi_mtime;
if (ilp32)
bzero(ustat32.overflow_vol, DSW_NAMELEN);
else
bzero(ustat.overflow_vol, DSW_NAMELEN);
if (ip->bi_overflow) {
(void) strncpy(ilp32 ? ustat32.overflow_vol :
ustat.overflow_vol, ip->bi_overflow->ii_volname,
DSW_NAMELEN);
}
ustat.shdsize = ip->bi_shdchks;
if ((ip->bi_flags) & DSW_TREEMAP) {
ustat.shdused = ip->bi_shdchkused;
} else {
ustat.shdused = 0;
}
/* copy over group and cluster associations */
group = ilp32? ustat32.group_name : ustat.group_name;
cluster = ilp32? ustat32.cluster_tag : ustat.cluster_tag;
bzero(group, DSW_NAMELEN);
bzero(cluster, DSW_NAMELEN);
if (ip->bi_group)
(void) strncpy(group, ip->bi_group, DSW_NAMELEN);
if (ip->bi_cluster)
(void) strncpy(cluster, ip->bi_cluster, DSW_NAMELEN);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_kfree(kstatus);
if (ilp32) {
ustat32.stat = ustat.stat;
ustat32.size = ustat.size;
ustat32.shdsize = ustat.shdsize;
ustat32.shdused = ustat.shdused;
ustat32.mtime = ustat.mtime;
if (copyout(&ustat32, (void *)arg, sizeof (ustat32)))
return (EFAULT);
} else if (copyout(&ustat, (void *)arg, sizeof (ustat)))
return (EFAULT);
return (0);
}
/*
* _ii_list
* List what shadow sets are currently configured.
*
* Calling/Exit State:
* Returns 0 on success, otherwise an error code is returned
* and any additional error information is copied out to the user.
*/
/*ARGSUSED*/
int
_ii_list(intptr_t arg, int ilp32, int *rvp)
{
dsw_list_t ulist;
dsw_list32_t ulist32;
_ii_info_t *ip;
dsw_config_t cf, *cfp;
dsw_config32_t cf32, *cf32p;
int rc;
int used;
spcs_s_info_t kstatus;
if (ilp32) {
if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0)
return (EFAULT);
II_TAIL_COPY(ulist, ulist32, list_size, dsw_list_t);
ulist.status = (spcs_s_info_t)ulist32.status;
} else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
cf32p = (dsw_config32_t *)(unsigned long)ulist32.list;
cfp = ulist.list;
ulist.list_used = 0;
mutex_enter(&_ii_info_mutex);
ip = _ii_info_top;
DTRACE_PROBE1(_ii_list_count, int, ulist.list_size);
for (rc = used = 0; used < ulist.list_size && ip; ip = ip->bi_next) {
if (ip->bi_disabled)
continue;
mutex_enter(&ip->bi_mutex);
ip->bi_ioctl++;
if (ilp32) {
bzero(&cf32, sizeof (cf32));
cf32.flag = ip->bi_flags;
(void) strncpy(cf32.master_vol,
ii_pathname(ip->bi_mstfd), DSW_NAMELEN);
(void) strncpy(cf32.shadow_vol,
ip->bi_keyname, DSW_NAMELEN);
(void) strncpy(cf32.bitmap_vol, (ip->bi_bmpfd)
? ii_pathname(ip->bi_bmpfd)
: "<offline_bitmap>", DSW_NAMELEN);
if (copyout(&cf32, (void *)cf32p, sizeof (cf32)))
rc = EFAULT;
cf32p++;
} else {
bzero(&cf, sizeof (cf));
cf.flag = ip->bi_flags;
(void) strncpy(cf.master_vol,
ii_pathname(ip->bi_mstfd), DSW_NAMELEN);
(void) strncpy(cf.shadow_vol,
ip->bi_keyname, DSW_NAMELEN);
(void) strncpy(cf.bitmap_vol, (ip->bi_bmpfd)
? ii_pathname(ip->bi_bmpfd)
: "<offline_bitmap>", DSW_NAMELEN);
if (copyout(&cf, (void *)cfp, sizeof (cf)))
rc = EFAULT;
cfp++;
}
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
used++;
}
mutex_exit(&_ii_info_mutex);
spcs_s_kfree(kstatus);
if (rc)
return (rc);
ulist.list_used = used;
if (ilp32) {
ulist32.list_used = ulist.list_used;
if (copyout(&ulist32, (void *)arg, sizeof (ulist32)))
return (EFAULT);
} else if (copyout(&ulist, (void *)arg, sizeof (ulist)))
return (EFAULT);
return (0);
}
/*
* _ii_listlen
* Counts the number of items the DSWIOC_LIST and DSWIOC_OLIST
* ioctl calls would return.
*
* Calling/Exit State:
* Returns 0 on success, otherwise an error code is returned.
* Result is returned as successful ioctl value.
*/
/*ARGSUSED*/
int
_ii_listlen(int cmd, int ilp32, int *rvp)
{
_ii_info_t *ip;
_ii_overflow_t *op;
int count = 0;
switch (cmd) {
case DSWIOC_LISTLEN:
mutex_enter(&_ii_info_mutex);
for (ip = _ii_info_top; ip; ip = ip->bi_next) {
if (ip->bi_disabled == 0) {
count++;
}
}
mutex_exit(&_ii_info_mutex);
break;
case DSWIOC_OLISTLEN:
mutex_enter(&_ii_overflow_mutex);
for (op = _ii_overflow_top; op; op = op->ii_next)
count++;
mutex_exit(&_ii_overflow_mutex);
break;
default:
return (EINVAL);
}
*rvp = count;
return (0);
}
/*
* _ii_report_bmp
*
* Report to the user daemon that the bitmap has gone bad
*/
static int
_ii_report_bmp(_ii_info_t *ip)
{
int rc;
struct nskernd *nsk;
nsk = kmem_zalloc(sizeof (*nsk), KM_SLEEP);
if (!nsk) {
return (ENOMEM);
}
nsk->command = NSKERND_IIBITMAP;
nsk->data1 = (int64_t)(ip->bi_flags | DSW_BMPOFFLINE);
(void) strncpy(nsk->char1, ip->bi_keyname,
min(DSW_NAMELEN, NSC_MAXPATH));
rc = nskernd_get(nsk);
if (rc == 0) {
rc = (int)nsk->data1;
}
if (rc == 0) {
DTRACE_PROBE(_ii_report_bmp_end);
} else {
DTRACE_PROBE1(_ii_report_bmp_end_2, int, rc);
}
kmem_free(nsk, sizeof (*nsk));
return (rc);
}
/*
* _ii_offline
* Set volume offline flag(s) for a shadow.
*
* Calling/Exit State:
* Returns 0 on success, otherwise an error code is returned
* and any additional error information is copied out to the user.
*/
/*ARGSUSED*/
int
_ii_offline(intptr_t arg, int ilp32, int *rvp)
{
dsw_ioctl_t uparms;
dsw_ioctl32_t uparms32;
_ii_info_t *ip;
int rc;
spcs_s_info_t kstatus;
if (ilp32) {
if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
return (EFAULT);
II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
uparms.status = (spcs_s_info_t)uparms32.status;
} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!uparms.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(uparms.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, uparms.status,
DSW_ENOTFOUND));
if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uparms.status,
DSW_ERSRVFAIL));
}
mutex_exit(&ip->bi_mutex);
_ii_error(ip, uparms.flags & DSW_OFFLINE);
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, BMP);
spcs_s_kfree(kstatus);
return (0);
}
/*
* _ii_wait
* Wait for a copy to complete.
*
* Calling/Exit State:
* Returns 0 if the copy completed, otherwise error code.
*
*/
/*ARGSUSED*/
int
_ii_wait(intptr_t arg, int ilp32, int *rvp)
{
dsw_ioctl_t uparms;
dsw_ioctl32_t uparms32;
_ii_info_t *ip;
int rc = 0;
spcs_s_info_t kstatus;
if (ilp32) {
if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
return (EFAULT);
II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
uparms.status = (spcs_s_info_t)uparms32.status;
uparms.pid = uparms32.pid;
} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!uparms.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(uparms.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, uparms.status,
DSW_ENOTFOUND));
while (ip->bi_flags & DSW_COPYINGP) {
if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) {
/* Awoken by a signal */
rc = EINTR;
break;
}
}
/* Is this an attempt to unlock the copy/update PID? */
if (uparms.flags & CV_LOCK_PID) {
if (ip->bi_locked_pid == 0) {
rc = DSW_ENOTLOCKED;
} else if (uparms.pid == -1) {
cmn_err(CE_WARN, "!ii: Copy/Update PID %d, cleared",
ip->bi_locked_pid);
ip->bi_locked_pid = 0;
} else if (uparms.pid != ip->bi_locked_pid) {
rc = DSW_EINUSE;
} else {
ip->bi_locked_pid = 0;
}
}
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
}
static int
_ii_reset_mstvol(_ii_info_t *ip)
{
_ii_info_t *xip;
if (!NSHADOWS(ip))
return (DSW_COPYINGS | DSW_COPYINGP);
/* check for siblings updating master */
for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
if (xip == ip)
continue;
/* check if master is okay */
if ((xip->bi_flags & DSW_MSTOFFLINE) == 0) {
return (0);
}
}
return (DSW_COPYINGS | DSW_COPYINGP);
}
/*
* _ii_reset
* Reset offlined underlying volumes
*
* Calling/Exit State:
* Returns 0 on success, otherwise an error code is returned
* and any additional error information is copied out to the user.
*/
/*ARGSUSED*/
int
_ii_reset(intptr_t arg, int ilp32, int *rvp)
{
dsw_ioctl_t uparms;
dsw_ioctl32_t uparms32;
_ii_info_t *ip;
nsc_buf_t *tmp = NULL;
int rc;
int flags;
ii_header_t *bm_header;
spcs_s_info_t kstatus;
int rtype;
if (ilp32) {
if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
return (EFAULT);
II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
uparms.status = (spcs_s_info_t)uparms32.status;
} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!uparms.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(uparms.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, uparms.status,
DSW_ENOTFOUND));
mutex_exit(&ip->bi_mutex);
/* Figure out what to do according to what was flagged as */
if ((ip->bi_flags & DSW_OFFLINE) == 0) {
/* Nothing offline, so no op */
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_kfree(kstatus);
return (0);
}
if (!ip->bi_bmpfd) {
/* No bitmap fd, can't do anything */
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_kfree(kstatus);
return (DSW_EHDRBMP);
}
rtype = MSTR|SHDR|BMP;
if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uparms.status,
DSW_ERSRVFAIL));
}
/*
* Cannot use _ii_bm_header_get as it will fail if DSW_BMPOFFLINE
*/
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, 0, FBA_LEN(sizeof (ii_header_t)),
NSC_RDWRBUF, &tmp);
II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t)));
if (!II_SUCCESS(rc)) {
_ii_rlse_devs(ip, rtype);
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
}
bm_header = (ii_header_t *)(tmp)->sb_vec[0].sv_addr;
if (bm_header == NULL) {
_ii_rlse_devs(ip, rtype);
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
if (tmp)
(void) nsc_free_buf(tmp);
return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
}
flags = ip->bi_flags & ~DSW_COPY_FLAGS;
if ((flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) == 0) {
if (((flags & DSW_SHDOFFLINE) == 0) &&
((flags & DSW_MSTOFFLINE) == DSW_MSTOFFLINE)) {
/* Shadow was OK but master was offline */
flags |= _ii_reset_mstvol(ip);
} else if ((flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) {
/* Shadow was offline, don't care what the master was */
flags |= (DSW_COPYINGM | DSW_COPYINGP);
}
}
if (ip->bi_flags & DSW_VOVERFLOW) {
ip->bi_flags &= ~DSW_VOVERFLOW;
ip->bi_flags |= DSW_FRECLAIM;
}
flags &= ~(DSW_OFFLINE | DSW_CFGOFFLINE | DSW_VOVERFLOW | DSW_OVERFLOW);
if ((ip->bi_flags & DSW_BMPOFFLINE) == DSW_BMPOFFLINE) {
/* free any overflow allocation */
ii_overflow_free(ip, INIT_OVR);
/* Bitmap now OK, so set up new bitmap header */
(void) strncpy(bm_header->master_vol, ii_pathname(ip->bi_mstfd),
DSW_NAMELEN);
(void) strncpy(bm_header->shadow_vol, ii_pathname(ip->bi_shdfd),
DSW_NAMELEN);
(void) strncpy(bm_header->bitmap_vol, ii_pathname(ip->bi_bmpfd),
DSW_NAMELEN);
if (ip->bi_cluster) {
(void) strncpy(bm_header->clstr_name, ip->bi_cluster,
DSW_NAMELEN);
}
if (ip->bi_group) {
(void) strncpy(bm_header->group_name, ip->bi_group,
DSW_NAMELEN);
}
bm_header->ii_type = (flags & DSW_GOLDEN) ?
DSW_GOLDEN_TYPE : DSW_QUICK_TYPE;
bm_header->ii_magic = DSW_DIRTY;
bm_header->ii_version = II_HEADER_VERSION;
bm_header->ii_shdfba = DSW_SHD_BM_OFFSET;
bm_header->ii_copyfba = DSW_COPY_BM_OFFSET;
bm_header->ii_throttle_delay = ip->bi_throttle_delay;
bm_header->ii_throttle_unit = ip->bi_throttle_unit;
ip->bi_shdfba = bm_header->ii_shdfba;
ip->bi_copyfba = bm_header->ii_copyfba;
} else if ((ip->bi_flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) {
/* bitmap didn't go offline, but shadow did */
if (ip->bi_overflow) {
ii_overflow_free(ip, RECLAIM);
}
}
_ii_lock_chunk(ip, II_NULLCHUNK);
mutex_enter(&ip->bi_mutex);
II_FLAG_ASSIGN(flags, ip);
mutex_exit(&ip->bi_mutex);
rc = ii_fill_copy_bmp(ip);
if (rc == 0)
rc = II_ZEROBM(ip);
if (rc == 0) {
if ((ip->bi_flags&(DSW_GOLDEN)) == 0) {
/* just clear bitmaps for dependent copy */
if (ip->bi_flags & DSW_TREEMAP) {
bm_header->ii_state = ip->bi_flags;
mutex_enter(&ip->bi_mutex);
rc = _ii_bm_header_put(bm_header, ip, tmp);
mutex_exit(&ip->bi_mutex);
tmp = NULL;
if (rc == 0) {
rc = ii_tinit(ip);
if (rc == 0) {
mutex_enter(&ip->bi_mutex);
bm_header =
_ii_bm_header_get(ip, &tmp);
mutex_exit(&ip->bi_mutex);
}
}
}
if (rc == 0)
II_FLAG_CLRX(DSW_COPY_FLAGS, ip);
/*
* if copy flags were set, another process may be
* waiting
*/
if (rc == 0 && (flags & DSW_COPYINGP))
cv_broadcast(&ip->bi_copydonecv);
if (rc == 0)
rc = II_COPYBM(ip);
}
}
_ii_unlock_chunk(ip, II_NULLCHUNK);
if (rc) {
if (tmp)
_ii_bm_header_free(bm_header, ip, tmp);
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, rtype);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
}
bm_header->ii_state = ip->bi_flags;
mutex_enter(&ip->bi_mutex);
rc = _ii_bm_header_put(bm_header, ip, tmp);
if (!II_SUCCESS(rc)) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, rtype);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
}
/* check with RDC */
if (ii_update_denied(ip, kstatus, (ip->bi_flags & DSW_COPYINGS) ?
CV_SHD2MST : 0, 1)) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, rtype);
return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
}
/* don't perform copy for dependent shadows */
if ((ip->bi_flags&(DSW_GOLDEN)) == 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, rtype);
return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
}
mutex_exit(&ip->bi_mutex);
/* _ii_copyvol calls _ii_ioctl_done() */
if (ip->bi_flags & DSW_COPYINGS)
rc = _ii_copyvol(ip, CV_SHD2MST, rtype, kstatus, 1);
else if (ip->bi_flags & DSW_COPYINGM)
rc = _ii_copyvol(ip, 0, rtype, kstatus, 1);
else {
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
}
_ii_rlse_devs(ip, rtype);
return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
}
/*
* _ii_version
* Get version of the InstantImage module.
*
* Calling/Exit State:
* Returns 0 on success, otherwise EFAULT is returned.
* The major and minor revisions are copied out to the user if
* successful.
*/
/*ARGSUSED*/
int
_ii_version(intptr_t arg, int ilp32, int *rvp)
{
dsw_version_t uversion;
dsw_version32_t uversion32;
if (ilp32) {
if (copyin((void *)arg, &uversion32, sizeof (uversion32)) < 0)
return (EFAULT);
uversion32.major = dsw_major_rev;
uversion32.minor = dsw_minor_rev;
uversion32.micro = dsw_micro_rev;
uversion32.baseline = dsw_baseline_rev;
if (copyout(&uversion32, (void *)arg, sizeof (uversion32)))
return (EFAULT);
} else {
if (copyin((void *)arg, &uversion, sizeof (uversion)) < 0)
return (EFAULT);
uversion.major = dsw_major_rev;
uversion.minor = dsw_minor_rev;
uversion.micro = dsw_micro_rev;
uversion.baseline = dsw_baseline_rev;
if (copyout(&uversion, (void *)arg, sizeof (uversion)))
return (EFAULT);
}
return (0);
}
/*
* _ii_copyparm
* Get and set copy parameters.
*
* Calling/Exit State:
* Returns 0 on success, otherwise EFAULT is returned.
* The previous values are returned to the user.
*/
/*ARGSUSED*/
int
_ii_copyparm(intptr_t arg, int ilp32, int *rvp)
{
dsw_copyp_t copyp;
dsw_copyp32_t copyp32;
spcs_s_info_t kstatus;
_ii_info_t *ip;
int rc = 0;
int tmp;
if (ilp32) {
if (copyin((void *)arg, &copyp32, sizeof (copyp32)) < 0)
return (EFAULT);
II_TAIL_COPY(copyp, copyp32, shadow_vol, dsw_copyp_t);
copyp.status = (spcs_s_info_t)copyp32.status;
} else if (copyin((void *)arg, &copyp, sizeof (copyp)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!copyp.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, copyp.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(copyp.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, copyp.status,
DSW_ENOTFOUND));
tmp = ip->bi_throttle_delay;
if (copyp.copy_delay != -1) {
if (copyp.copy_delay >= MIN_THROTTLE_DELAY &&
copyp.copy_delay <= MAX_THROTTLE_DELAY)
ip->bi_throttle_delay = copyp.copy_delay;
else {
cmn_err(CE_WARN, "!ii: delay out of range %d",
copyp.copy_delay);
rc = EINVAL;
}
}
copyp.copy_delay = tmp;
tmp = ip->bi_throttle_unit;
if (copyp.copy_unit != -1) {
if (copyp.copy_unit >= MIN_THROTTLE_UNIT &&
copyp.copy_unit <= MAX_THROTTLE_UNIT) {
if (rc != EINVAL)
ip->bi_throttle_unit = copyp.copy_unit;
} else {
cmn_err(CE_WARN, "!ii: unit out of range %d",
copyp.copy_unit);
if (rc != EINVAL) {
rc = EINVAL;
ip->bi_throttle_delay = copyp.copy_delay;
}
}
}
copyp.copy_unit = tmp;
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
if (ilp32) {
copyp32.copy_delay = copyp.copy_delay;
copyp32.copy_unit = copyp.copy_unit;
if (copyout(&copyp32, (void *)arg, sizeof (copyp32)) < 0)
return (EFAULT);
} else if (copyout(&copyp, (void *)arg, sizeof (copyp)))
return (EFAULT);
return (spcs_s_ocopyoutf(&kstatus, copyp.status, rc));
}
/*
* _ii_suspend_vol
* suspend an individual InstantImage group
*
* Calling/Exit State:
* Returns 0 on success, nonzero otherwise
*/
int
_ii_suspend_vol(_ii_info_t *ip)
{
_ii_info_t **xip;
int copy_flag;
int rc;
nsc_buf_t *tmp = NULL;
ii_header_t *bm_header;
copy_flag = ip->bi_flags & DSW_COPY_FLAGS;
_ii_stopvol(ip);
ASSERT(total_ref(ip) == 0);
if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0)
return (rc);
/* this rw_enter forces us to drain all active IO */
rw_enter(&ip->bi_linkrw, RW_WRITER);
rw_exit(&ip->bi_linkrw);
mutex_enter(&_ii_info_mutex);
for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
if (ip == *xip)
break;
}
*xip = ip->bi_next;
mutex_exit(&_ii_info_mutex);
rc = II_SAVE_BMP(ip, 1);
mutex_enter(&ip->bi_mutex);
if (rc == 0)
bm_header = _ii_bm_header_get(ip, &tmp);
if (rc == 0 && bm_header) {
bm_header->ii_magic = DSW_CLEAN;
bm_header->ii_state |= copy_flag;
bm_header->ii_throttle_delay = ip->bi_throttle_delay;
bm_header->ii_throttle_unit = ip->bi_throttle_unit;
/* copy over the mtime */
bm_header->ii_mtime = ip->bi_mtime;
/* write it to disk */
rc = _ii_bm_header_put(bm_header, ip, tmp);
}
--iigkstat.num_sets.value.ul;
mutex_exit(&ip->bi_mutex);
ii_overflow_free(ip, NO_RECLAIM);
_ii_rlse_devs(ip, BMP);
ii_sibling_free(ip);
return (rc);
}
/*
* _ii_suspend_cluster
* Cluster resource group is switching over to another node, so
* all shadowed volumes in that group are suspended.
*
* Returns 0 on success, or ESRCH if the name of the cluster resource
* group couldn't be found.
*/
int
_ii_suspend_cluster(char *shadow_vol)
{
int found, last;
uint64_t hash;
_ii_info_t *ip;
_ii_lsthead_t **cp, *xcp;
_ii_lstinfo_t **np, *xnp;
/* find appropriate cluster list */
mutex_enter(&_ii_cluster_mutex);
hash = nsc_strhash(shadow_vol);
for (cp = &_ii_cluster_top; *cp; cp = &((*cp)->lst_next)) {
if ((hash == (*cp)->lst_hash) && strncmp(shadow_vol,
(*cp)->lst_name, DSW_NAMELEN) == 0)
break;
}
if (!*cp) {
mutex_exit(&_ii_cluster_mutex);
return (DSW_ECNOTFOUND);
}
found = 1;
last = 0;
while (found && !last) {
found = 0;
mutex_enter(&_ii_info_mutex);
for (np = &(*cp)->lst_start; *np; np = &((*np)->lst_next)) {
ip = (*np)->lst_ip;
if (ip->bi_disabled)
continue;
found++;
ip->bi_disabled = 1;
if (NSHADOWS(ip) && (ip->bi_master == ip)) {
ip->bi_flags &= (~DSW_COPYING);
ip->bi_state |= DSW_MULTIMST;
}
mutex_exit(&_ii_info_mutex);
xnp = *np;
*np = (*np)->lst_next;
kmem_free(xnp, sizeof (_ii_lstinfo_t));
ip->bi_cluster = NULL;
(void) _ii_suspend_vol(ip);
break;
}
if (found == 0)
mutex_exit(&_ii_info_mutex);
else if (!(*cp)->lst_start) {
xcp = *cp;
*cp = (*cp)->lst_next;
kmem_free(xcp, sizeof (_ii_lsthead_t));
last = 1;
}
}
mutex_exit(&_ii_cluster_mutex);
return (0);
}
/*
* _ii_shutdown
* System is shutting down, so all shadowed volumes are suspended.
*
* This always succeeds, so always returns 0.
*/
/* ARGSUSED */
int
_ii_shutdown(intptr_t arg, int *rvp)
{
_ii_info_t **xip, *ip;
int found;
*rvp = 0;
_ii_shutting_down = 1;
/* Go through the list until only disabled entries are found */
found = 1;
while (found) {
found = 0;
mutex_enter(&_ii_info_mutex);
for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
ip = *xip;
if (ip->bi_disabled) {
/* Also covers not fully configured yet */
continue;
}
found++;
ip->bi_disabled = 1;
mutex_exit(&_ii_info_mutex);
(void) _ii_suspend_vol(ip);
break;
}
if (found == 0)
mutex_exit(&_ii_info_mutex);
}
_ii_shutting_down = 0;
return (0);
}
/*
* _ii_suspend
* Suspend an InstantImage, saving its state to allow a subsequent resume.
*
* Calling/Exit State:
* Returns 0 if the pair was suspended. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*/
/* ARGSUSED */
int
_ii_suspend(intptr_t arg, int ilp32, int *rvp)
{
dsw_ioctl_t uparms;
dsw_ioctl32_t uparms32;
_ii_info_t *ip;
int rc;
spcs_s_info_t kstatus;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
return (EFAULT);
II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
uparms.status = (spcs_s_info_t)uparms32.status;
} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!uparms.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
if ((uparms.flags & CV_IS_CLUSTER) != 0) {
rc = _ii_suspend_cluster(uparms.shadow_vol);
} else {
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(uparms.shadow_vol);
if (ip == NULL) {
mutex_exit(&_ii_info_mutex);
return (spcs_s_ocopyoutf(&kstatus, uparms.status,
DSW_ENOTFOUND));
}
ip->bi_disabled = 1;
if (NSHADOWS(ip) && (ip->bi_master == ip)) {
ip->bi_flags &= (~DSW_COPYING);
ip->bi_state |= DSW_MULTIMST;
}
mutex_exit(&_ii_info_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
rc = _ii_suspend_vol(ip);
}
return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
}
/*
* _ii_abort
* Stop any copying process for shadow.
*
* Calling/Exit State:
* Returns 0 if the abort succeeded. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*/
/* ARGSUSED */
int
_ii_abort(intptr_t arg, int ilp32, int *rvp)
{
dsw_ioctl_t uabort;
dsw_ioctl32_t uabort32;
_ii_info_t *ip;
int rc;
spcs_s_info_t kstatus;
if (ilp32) {
if (copyin((void *)arg, &uabort32, sizeof (uabort32)) < 0)
return (EFAULT);
II_TAIL_COPY(uabort, uabort32, shadow_vol, dsw_ioctl_t);
uabort.status = (spcs_s_info_t)uabort32.status;
} else if (copyin((void *)arg, &uabort, sizeof (uabort)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!uabort.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, uabort.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(uabort.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, uabort.status,
DSW_ENOTFOUND));
mutex_exit(&ip->bi_mutex);
rc = _ii_stopcopy(ip);
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, uabort.status, rc));
}
/*
* _ii_segment
* Copy out II pair bitmaps (cpy, shd, idx) in segments
*
* Calling/Exit State:
* Returns 0 if the operation succeeded. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*
*/
int
_ii_segment(intptr_t arg, int ilp32, int *rvp)
{
dsw_segment_t usegment;
dsw_segment32_t usegment32;
_ii_info_t *ip;
int rc, size;
spcs_s_info_t kstatus;
int32_t bi_idxfba;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &usegment32, sizeof (usegment32)))
return (EFAULT);
usegment.status = (spcs_s_info_t)usegment32.status;
bcopy(usegment32.shadow_vol, usegment.shadow_vol, DSW_NAMELEN);
usegment.seg_number = (unsigned)usegment32.seg_number;
usegment.shd_bitmap =
(unsigned char *)(unsigned long)usegment32.shd_bitmap;
usegment.shd_size = usegment32.shd_size;
usegment.cpy_bitmap =
(unsigned char *)(unsigned long)usegment32.cpy_bitmap;
usegment.cpy_size = usegment32.cpy_size;
usegment.idx_bitmap =
(unsigned char *)(unsigned long)usegment32.idx_bitmap;
usegment.idx_size = usegment32.idx_size;
} else if (copyin((void *)arg, &usegment, sizeof (usegment)))
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (usegment.shadow_vol[0]) {
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(usegment.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, usegment.status,
DSW_ENOTFOUND));
} else
return (spcs_s_ocopyoutf(&kstatus, usegment.status,
DSW_EEMPTY));
mutex_exit(&ip->bi_mutex);
size = ((((ip->bi_size + (DSW_SIZE-1))
/ DSW_SIZE) + (DSW_BITS-1))) / DSW_BITS;
bi_idxfba = ip->bi_copyfba + (ip->bi_copyfba - ip->bi_shdfba);
if (((nsc_size_t)usegment.seg_number > DSW_BM_FBA_LEN(ip->bi_size)) ||
(usegment.shd_size > size) ||
(usegment.cpy_size > size) ||
(!(ip->bi_flags & DSW_GOLDEN) && (usegment.idx_size > size*32))) {
_ii_ioctl_done(ip);
return (spcs_s_ocopyoutf(&kstatus, usegment.status,
DSW_EMISMATCH));
}
if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, usegment.status,
DSW_ERSRVFAIL));
}
if (usegment.shd_bitmap && usegment.shd_size > 0)
rc = II_CO_BMP(ip, ip->bi_shdfba+usegment.seg_number,
usegment.shd_bitmap, usegment.shd_size);
if (rc == 0 && usegment.cpy_bitmap && usegment.cpy_size > 0)
rc = II_CO_BMP(ip, ip->bi_copyfba+usegment.seg_number,
usegment.cpy_bitmap, usegment.cpy_size);
if (!(ip->bi_flags & DSW_GOLDEN)) {
if (rc == 0 && usegment.idx_bitmap && usegment.idx_size > 0)
rc = II_CO_BMP(ip, bi_idxfba+usegment.seg_number*32,
usegment.idx_bitmap, usegment.idx_size);
}
_ii_rlse_devs(ip, BMP);
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
if (rc) {
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, usegment.status, DSW_EIO));
}
spcs_s_kfree(kstatus);
return (0);
}
/*
* _ii_bitmap
* Copy out II pair bitmaps to user program
*
* Calling/Exit State:
* Returns 0 if the operation succeeded. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*/
int
_ii_bitmap(intptr_t arg, int ilp32, int *rvp)
{
dsw_bitmap_t ubitmap;
dsw_bitmap32_t ubitmap32;
_ii_info_t *ip;
int rc;
spcs_s_info_t kstatus;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)))
return (EFAULT);
ubitmap.status = (spcs_s_info_t)ubitmap32.status;
bcopy(ubitmap32.shadow_vol, ubitmap.shadow_vol, DSW_NAMELEN);
ubitmap.shd_bitmap =
(unsigned char *)(unsigned long)ubitmap32.shd_bitmap;
ubitmap.shd_size = ubitmap32.shd_size;
ubitmap.copy_bitmap =
(unsigned char *)(unsigned long)ubitmap32.copy_bitmap;
ubitmap.copy_size = ubitmap32.copy_size;
} else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)))
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!ubitmap.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(ubitmap.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
DSW_ENOTFOUND));
mutex_exit(&ip->bi_mutex);
if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
DSW_ERSRVFAIL));
}
if (ubitmap.shd_bitmap && ubitmap.shd_size > 0)
rc = II_CO_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap,
ubitmap.shd_size);
if (rc == 0 && ubitmap.copy_bitmap && ubitmap.copy_size > 0)
rc = II_CO_BMP(ip, ip->bi_copyfba, ubitmap.copy_bitmap,
ubitmap.copy_size);
_ii_rlse_devs(ip, BMP);
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
if (rc) {
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
}
spcs_s_kfree(kstatus);
return (0);
}
/*
* _ii_export
* Exports the shadow volume
*
* Calling/Exit State:
* Returns 0 if the shadow was exported. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*
* Description:
*/
int
_ii_export(intptr_t arg, int ilp32, int *rvp)
{
dsw_ioctl_t uparms;
dsw_ioctl32_t uparms32;
_ii_info_t *ip;
nsc_fd_t *fd;
int rc = 0;
spcs_s_info_t kstatus;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
return (EFAULT);
II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
uparms.status = (spcs_s_info_t)uparms32.status;
} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!uparms.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(uparms.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, uparms.status,
DSW_ENOTFOUND));
if ((ip->bi_flags & DSW_GOLDEN) == 0 ||
((ip->bi_flags & (DSW_COPYING|DSW_SHDEXPORT|DSW_SHDIMPORT)) != 0)) {
/*
* Cannot export a dependent copy or while still copying or
* the shadow is already in an exported state
*/
rc = ip->bi_flags & (DSW_SHDEXPORT|DSW_SHDIMPORT)
? DSW_EALREADY : DSW_EDEPENDENCY;
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
}
if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uparms.status,
DSW_ERSRVFAIL));
}
II_FLAG_SET(DSW_SHDEXPORT, ip);
mutex_exit(&ip->bi_mutex);
/* this rw_enter forces us to drain all active IO */
rw_enter(&ip->bi_linkrw, RW_WRITER);
rw_exit(&ip->bi_linkrw);
mutex_enter(&ip->bi_mutex);
_ii_rlse_devs(ip, BMP);
/* Shut shadow volume. */
if (ip->bi_shdfd) {
if (ip->bi_shdrsrv) {
nsc_release(ip->bi_shdfd);
ip->bi_shdrsrv = NULL;
}
fd = ip->bi_shdfd;
ip->bi_shdfd = NULL;
mutex_exit(&ip->bi_mutex);
(void) nsc_close(fd);
mutex_enter(&ip->bi_mutex);
}
if (ip->bi_shdrfd) {
if (ip->bi_shdrrsrv) {
nsc_release(ip->bi_shdrfd);
ip->bi_shdrrsrv = NULL;
}
fd = ip->bi_shdrfd;
ip->bi_shdrfd = NULL;
mutex_exit(&ip->bi_mutex);
(void) nsc_close(fd);
mutex_enter(&ip->bi_mutex);
}
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
(void) _ii_reserve_begin(ip);
if (ip->bi_shd_tok) {
(void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
ip->bi_shd_tok = NULL;
}
if (ip->bi_shdr_tok) {
(void) _ii_unregister_path(ip->bi_shdr_tok, 0,
"raw shadow");
ip->bi_shdr_tok = NULL;
}
_ii_reserve_end(ip);
spcs_s_kfree(kstatus);
return (0);
}
/*
* _ii_join
* Rejoins the shadow volume
*
* Calling/Exit State:
* Returns 0 if the shadow was exported. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*
* Description:
*/
int
_ii_join(intptr_t arg, int ilp32, int *rvp)
{
dsw_bitmap_t ubitmap;
dsw_bitmap32_t ubitmap32;
_ii_info_t *ip;
uint64_t bm_size;
int rc = 0;
int rtype = 0;
spcs_s_info_t kstatus;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0)
return (EFAULT);
II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t);
ubitmap.status = (spcs_s_info_t)ubitmap32.status;
ubitmap.shd_bitmap =
(unsigned char *)(unsigned long)ubitmap32.shd_bitmap;
ubitmap.shd_size = ubitmap32.shd_size;
} else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!ubitmap.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(ubitmap.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
DSW_ENOTFOUND));
/*
* Check that group has shadow exported.
*/
if ((ip->bi_flags & DSW_SHDEXPORT) == 0) {
/*
* Cannot join if the shadow isn't exported.
*/
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
DSW_ENOTEXPORTED));
}
/* check bitmap is at least large enough for master volume size */
bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
if (ubitmap.shd_size < bm_size) {
/* bitmap is to small */
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
DSW_EINVALBMP));
}
/* read in bitmap and or with differences bitmap */
rtype = BMP;
if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
DSW_ERSRVFAIL));
}
rc = II_CI_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap,
ubitmap.shd_size);
/* open up shadow */
if ((rc = ii_open_shadow(ip, ip->bi_keyname)) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
_ii_rlse_devs(ip, rtype);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EOPEN));
}
ii_register_shd(ip);
if (!rc)
II_FLAG_CLR(DSW_SHDEXPORT, ip);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, rtype);
if (rc) {
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
}
spcs_s_kfree(kstatus);
return (0);
}
/*
* _ii_ocreate
* Configures a volume suitable for use as an overflow volume.
*
* Calling/Exit State:
* Returns 0 if the volume was configured successfully. Otherwise
* an error code is returned and any additional error information
* is copied out to the user.
*
* Description:
*/
int
_ii_ocreate(intptr_t arg, int ilp32, int *rvp)
{
dsw_ioctl_t uioctl;
dsw_ioctl32_t uioctl32;
_ii_overflow_t ov;
_ii_overflow_t *op = &ov;
int rc = 0;
nsc_fd_t *fd;
nsc_iodev_t *iodev;
nsc_size_t vol_size;
char *overflow_vol;
spcs_s_info_t kstatus;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &uioctl32, sizeof (uioctl32)) < 0)
return (EFAULT);
II_TAIL_COPY(uioctl, uioctl32, shadow_vol, dsw_ioctl_t);
uioctl.status = (spcs_s_info_t)uioctl32.status;
} else if (copyin((void *)arg, &uioctl, sizeof (uioctl)) < 0)
return (EFAULT);
overflow_vol = uioctl.shadow_vol;
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!overflow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EEMPTY));
if (ii_volume(overflow_vol, 0) != NONE)
return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EINUSE));
fd = nsc_open(overflow_vol,
NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(iodev), &rc);
if (!fd)
fd = nsc_open(uioctl.shadow_vol,
NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL,
(blind_t)&(iodev), &rc);
if (fd == NULL) {
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
}
if ((rc = nsc_reserve(fd, 0)) != 0) {
spcs_s_add(kstatus, rc);
(void) nsc_close(fd);
return (spcs_s_ocopyoutf(&kstatus, uioctl.status,
DSW_ERSRVFAIL));
}
/* setup magic number etc; */
rc = nsc_partsize(fd, &vol_size);
if (rc) {
spcs_s_add(kstatus, rc);
(void) nsc_close(fd);
return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
}
op->ii_hmagic = II_OMAGIC;
/* take 1 off as chunk 0 contains header */
op->ii_nchunks = (vol_size / DSW_SIZE) -1;
op->ii_drefcnt = 0;
op->ii_used = 1; /* we have used the header */
op->ii_unused = op->ii_nchunks - op->ii_used;
op->ii_freehead = II_NULLNODE;
op->ii_hversion = OV_HEADER_VERSION;
op->ii_flags = 0;
op->ii_urefcnt = 0;
(void) strncpy(op->ii_volname, uioctl.shadow_vol, DSW_NAMELEN);
rc = _ii_nsc_io(0, KS_NA, fd, NSC_WRBUF, II_OHEADER_FBA,
(unsigned char *)&op->ii_do, sizeof (op->ii_do));
(void) nsc_release(fd);
(void) nsc_close(fd);
if (rc) {
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
}
spcs_s_kfree(kstatus);
return (0);
}
/*
* _ii_oattach
* Attaches the volume in the "bitmap_vol" field as an overflow volume.
*
* Calling/Exit State:
* Returns 0 if the volume was attached. Fails if the shadow group
* is of the wrong type (eg independent) or already has an overflow
* volume attached.
*
* Description:
*/
int
_ii_oattach(intptr_t arg, int ilp32, int *rvp)
{
dsw_config_t uconfig;
dsw_config32_t uconfig32;
_ii_info_t *ip;
int rc = 0;
int rtype = 0;
ii_header_t *bm_header;
nsc_buf_t *tmp = NULL;
spcs_s_info_t kstatus;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &uconfig32, sizeof (uconfig32)) < 0)
return (EFAULT);
II_TAIL_COPY(uconfig, uconfig32, shadow_vol, dsw_config_t);
uconfig.status = (spcs_s_info_t)uconfig32.status;
} else if (copyin((void *)arg, &uconfig, sizeof (uconfig)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!uconfig.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EEMPTY));
switch (ii_volume(uconfig.bitmap_vol, 0)) {
case NONE:
case OVR:
break;
default:
return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EINUSE));
}
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(uconfig.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
DSW_ENOTFOUND));
/* check shadow doesn't already have an overflow volume */
if (ip->bi_overflow) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
DSW_EALREADY));
}
/* check shadow is mapped so can have an overflow */
if ((ip->bi_flags&DSW_TREEMAP) == 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
DSW_EWRONGTYPE));
}
rtype = BMP;
if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
DSW_ERSRVFAIL));
}
/* attach volume */
if ((rc = ii_overflow_attach(ip, uconfig.bitmap_vol, 1)) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, rtype);
return (spcs_s_ocopyoutf(&kstatus, uconfig.status, rc));
}
/* re-write header so shadow can be restarted with overflow volume */
bm_header = _ii_bm_header_get(ip, &tmp);
if (bm_header == NULL) {
/* detach volume */
ii_overflow_free(ip, RECLAIM);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, rtype);
return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
DSW_EHDRBMP));
}
(void) strncpy(bm_header->overflow_vol, uconfig.bitmap_vol,
DSW_NAMELEN);
(void) _ii_bm_header_put(bm_header, ip, tmp);
_ii_rlse_devs(ip, rtype);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_kfree(kstatus);
return (0);
}
/*
* _ii_odetach
* Breaks the link with the overflow volume.
*
* Calling/Exit State:
* Returns 0 if the overflow volume was detached. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*
* Description:
*/
int
_ii_odetach(intptr_t arg, int ilp32, int *rvp)
{
dsw_bitmap_t ubitmap;
dsw_bitmap32_t ubitmap32;
_ii_info_t *ip;
int rc = 0;
int rtype = 0;
ii_header_t *bm_header;
nsc_buf_t *tmp = NULL;
spcs_s_info_t kstatus;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0)
return (EFAULT);
II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t);
ubitmap.status = (spcs_s_info_t)ubitmap32.status;
} else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!ubitmap.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(ubitmap.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
DSW_ENOTFOUND));
if ((ip->bi_flags&DSW_VOVERFLOW) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
DSW_EODEPENDENCY));
}
rtype = BMP;
if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
DSW_ERSRVFAIL));
}
ii_overflow_free(ip, RECLAIM);
/* re-write header to break link with overflow volume */
bm_header = _ii_bm_header_get(ip, &tmp);
if (bm_header == NULL) {
_ii_rlse_devs(ip, rtype);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
DSW_EHDRBMP));
}
bzero(bm_header->overflow_vol, DSW_NAMELEN);
(void) _ii_bm_header_put(bm_header, ip, tmp);
_ii_rlse_devs(ip, rtype);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
if (rc) {
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
}
spcs_s_kfree(kstatus);
--iigkstat.assoc_over.value.ul;
return (0);
}
/*
* _ii_gc_list
* Returns a list of all lists, or all entries in a list
*
*/
int
_ii_gc_list(intptr_t arg, int ilp32, int *rvp, kmutex_t *mutex,
_ii_lsthead_t *lst)
{
dsw_aioctl_t ulist;
dsw_aioctl32_t ulist32;
int name_offset, i;
spcs_s_info_t kstatus;
char *carg = (char *)arg;
uint64_t hash;
_ii_lsthead_t *cp;
_ii_lstinfo_t *np;
*rvp = 0;
name_offset = (int)&(((dsw_aioctl_t *)0)->shadow_vol[0]);
if (ilp32) {
if (copyin((void *) arg, &ulist32, sizeof (ulist32)) < 0)
return (EFAULT);
II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t);
ulist.status = (spcs_s_info_t)ulist32.status;
name_offset = (int)&(((dsw_aioctl32_t *)0)->shadow_vol[0]);
} else if (copyin((void *) arg, &ulist, sizeof (ulist)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
mutex_enter(mutex);
if (ulist.shadow_vol[ 0 ] != 0) {
/* search for specific list */
hash = nsc_strhash(ulist.shadow_vol);
for (cp = lst; cp; cp = cp->lst_next) {
if ((hash == cp->lst_hash) && strncmp(ulist.shadow_vol,
cp->lst_name, DSW_NAMELEN) == 0) {
break;
}
}
if (cp) {
for (i = 0, np = cp->lst_start; i < ulist.count && np;
np = np->lst_next, carg += DSW_NAMELEN, i++) {
if (copyout(np->lst_ip->bi_keyname,
carg + name_offset, DSW_NAMELEN)) {
mutex_exit(mutex);
return (spcs_s_ocopyoutf(&kstatus,
ulist.status, EFAULT));
}
}
} else {
i = 0;
}
} else {
/* return full list */
for (i = 0, cp = lst; i < ulist.count && cp;
carg += DSW_NAMELEN, i++, cp = cp->lst_next) {
if (copyout(cp->lst_name, carg + name_offset,
DSW_NAMELEN)) {
mutex_exit(mutex);
return (spcs_s_ocopyoutf(&kstatus, ulist.status,
EFAULT));
}
}
}
mutex_exit(mutex);
ulist32.count = ulist.count = i;
if (ilp32) {
if (copyout(&ulist32, (void *) arg, name_offset))
return (EFAULT);
} else {
if (copyout(&ulist, (void*) arg, name_offset))
return (EFAULT);
}
return (spcs_s_ocopyoutf(&kstatus, ulist.status, 0));
}
/*
* _ii_olist
* Breaks the link with the overflow volume.
*
* Calling/Exit State:
* Returns 0 if the overflow volume was detached. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*
* Description:
*/
int
_ii_olist(intptr_t arg, int ilp32, int *rvp)
{
dsw_aioctl_t ulist;
dsw_aioctl32_t ulist32;
_ii_overflow_t *op;
int rc = 0;
int name_offset, i;
char *carg = (char *)arg;
spcs_s_info_t kstatus;
*rvp = 0;
name_offset = (int)&(((dsw_aioctl_t *)0)->shadow_vol[0]);
if (ilp32) {
if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0)
return (EFAULT);
II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t);
ulist.status = (spcs_s_info_t)ulist32.status;
name_offset = (int)&(((dsw_aioctl32_t *)0)->shadow_vol[0]);
} else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
i = 0;
mutex_enter(&_ii_overflow_mutex);
for (op = _ii_overflow_top; i < ulist.count && op;
carg += DSW_NAMELEN) {
if (copyout(op->ii_volname, carg+name_offset, DSW_NAMELEN)) {
mutex_exit(&_ii_overflow_mutex);
return (spcs_s_ocopyoutf(&kstatus, ulist.status,
EFAULT));
}
i++;
op = op->ii_next;
}
mutex_exit(&_ii_overflow_mutex);
ulist32.count = ulist.count = i;
/* return count of items listed to user */
if (ilp32) {
if (copyout(&ulist32, (void *)arg, name_offset))
return (EFAULT);
} else {
if (copyout(&ulist, (void *)arg, name_offset))
return (EFAULT);
}
return (spcs_s_ocopyoutf(&kstatus, ulist.status, rc));
}
/*
* _ii_ostat
* Breaks the link with the overflow volume.
*
* Calling/Exit State:
* Returns 0 if the overflow volume was detached. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*
* Description:
*/
int
_ii_ostat(intptr_t arg, int ilp32, int *rvp, int is_iost_2)
{
dsw_ostat_t ustat;
dsw_ostat32_t ustat32;
_ii_overflow_t *op;
spcs_s_info_t kstatus;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0)
return (EFAULT);
II_TAIL_COPY(ustat, ustat32, overflow_vol, dsw_ostat_t);
ustat.status = (spcs_s_info_t)ustat32.status;
} else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!ustat.overflow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY));
op = _ii_find_overflow(ustat.overflow_vol);
if (op == NULL)
return (spcs_s_ocopyoutf(&kstatus, ustat.status,
DSW_ENOTFOUND));
ustat.nchunks = op->ii_nchunks;
ustat.used = op->ii_used;
ustat.unused = op->ii_unused;
ustat.drefcnt = op->ii_drefcnt;
ustat.crefcnt = op->ii_crefcnt;
if (is_iost_2) {
ustat.hversion = op->ii_hversion;
ustat.flags = op->ii_flags;
ustat.hmagic = op->ii_hmagic;
}
spcs_s_kfree(kstatus);
if (ilp32) {
ustat32.nchunks = ustat.nchunks;
ustat32.used = ustat.used;
ustat32.unused = ustat.unused;
ustat32.drefcnt = ustat.drefcnt;
ustat32.crefcnt = ustat.crefcnt;
if (is_iost_2) {
ustat32.hversion = ustat.hversion;
ustat32.flags = ustat.flags;
ustat32.hmagic = ustat.hmagic;
}
if (copyout(&ustat32, (void *)arg, sizeof (ustat32)))
return (EFAULT);
} else {
if (copyout(&ustat, (void *)arg, sizeof (ustat)))
return (EFAULT);
}
return (0);
}
/*
* _ii_move_grp()
* Move a set from one group to another, possibly creating the new
* group.
*/
int
_ii_move_grp(intptr_t arg, int ilp32, int *rvp)
{
dsw_movegrp_t umove;
dsw_movegrp32_t umove32;
spcs_s_info_t kstatus;
_ii_info_t *ip;
int rc = 0;
nsc_buf_t *tmp;
ii_header_t *bm_header;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0)
return (EFAULT);
II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t);
umove.status = (spcs_s_info_t)umove32.status;
} else if (copyin((void *)arg, &umove, sizeof (umove)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!umove.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(umove.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (!ip)
return (spcs_s_ocopyoutf(&kstatus, umove.status,
DSW_ENOTFOUND));
if (!umove.new_group[0]) {
/* are we clearing the group association? */
if (ip->bi_group) {
DTRACE_PROBE2(_ii_move_grp1, char *, ip->bi_keyname,
char *, ip->bi_group);
rc = II_UNLINK_GROUP(ip);
}
} else if (!ip->bi_group) {
rc = II_LINK_GROUP(ip, umove.new_group);
DTRACE_PROBE2(_ii_move_grp2, char *, ip->bi_keyname,
char *, ip->bi_group);
} else {
/* remove it from one group and add it to the other */
DTRACE_PROBE3(_ii_move_grp, char *, ip->bi_keyname,
char *, ip->bi_group, char *, umove.new_group);
rc = II_UNLINK_GROUP(ip);
if (!rc)
rc = II_LINK_GROUP(ip, umove.new_group);
}
/* ** BEGIN UPDATE BITMAP HEADER ** */
if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, umove.status,
DSW_ERSRVFAIL));
}
bm_header = _ii_bm_header_get(ip, &tmp);
if (bm_header) {
(void) strncpy(bm_header->group_name, umove.new_group,
DSW_NAMELEN);
(void) _ii_bm_header_put(bm_header, ip, tmp);
}
_ii_rlse_devs(ip, BMP);
/* ** END UPDATE BITMAP HEADER ** */
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, umove.status, rc));
}
/*
* _ii_change_tag()
* Move a set from one group to another, possibly creating the new
* group.
*/
int
_ii_change_tag(intptr_t arg, int ilp32, int *rvp)
{
dsw_movegrp_t umove;
dsw_movegrp32_t umove32;
spcs_s_info_t kstatus;
_ii_info_t *ip;
int rc = 0;
nsc_buf_t *tmp;
ii_header_t *bm_header;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0)
return (EFAULT);
II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t);
umove.status = (spcs_s_info_t)umove32.status;
} else if (copyin((void *)arg, &umove, sizeof (umove)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!umove.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(umove.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (!ip)
return (spcs_s_ocopyoutf(&kstatus, umove.status,
DSW_ENOTFOUND));
if (!umove.new_group[0]) {
/* are we clearing the group association? */
if (ip->bi_cluster) {
DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname,
char *, ip->bi_cluster);
rc = II_UNLINK_CLUSTER(ip);
}
} else if (!ip->bi_cluster) {
/* are we adding it to a group for the first time? */
rc = II_LINK_CLUSTER(ip, umove.new_group);
DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname,
char *, ip->bi_cluster);
} else {
/* remove it from one group and add it to the other */
DTRACE_PROBE3(_ii_change_tag_2, char *, ip->bi_keyname,
char *, ip->bi_cluster, char *, umove.new_group);
rc = II_UNLINK_CLUSTER(ip);
if (!rc)
rc = II_LINK_CLUSTER(ip, umove.new_group);
}
/* ** BEGIN UPDATE BITMAP HEADER ** */
if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, umove.status,
DSW_ERSRVFAIL));
}
bm_header = _ii_bm_header_get(ip, &tmp);
if (bm_header) {
(void) strncpy(bm_header->clstr_name, umove.new_group,
DSW_NAMELEN);
(void) _ii_bm_header_put(bm_header, ip, tmp);
}
_ii_rlse_devs(ip, BMP);
/* ** END UPDATE BITMAP HEADER ** */
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(&kstatus, umove.status, rc));
}
/*
* _ii_spcs_s_ocopyoutf()
* Wrapper for spcs_s_ocopyoutf() used by _ii_chk_copy() which permits
* the spcs_s_info_t argument to be NULL. _ii_chk_copy() requires this
* functionality as it is sometimes called by _ii_control_copy() which
* has no user context to copy any errors into. At all other times a NULL
* spcs_s_info_t argument would indicate a bug in the calling function.
*/
static int
_ii_spcs_s_ocopyoutf(spcs_s_info_t *kstatusp, spcs_s_info_t ustatus, int err)
{
if (ustatus)
return (spcs_s_ocopyoutf(kstatusp, ustatus, err));
spcs_s_kfree(*kstatusp);
return (err);
}
static int
_ii_chk_copy(_ii_info_t *ip, int flags, spcs_s_info_t *kstatusp, pid_t pid,
spcs_s_info_t ustatus)
{
_ii_info_t *xip;
int rc;
int rtype;
if ((ip->bi_flags & DSW_COPYINGP) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING));
}
if (ip->bi_flags & DSW_OFFLINE) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EOFFLINE));
}
if ((ip->bi_flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
DSW_EISEXPORTED));
}
if ((flags & CV_SHD2MST) == CV_SHD2MST) {
if ((ip->bi_flags & DSW_COPYINGM) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
DSW_ECOPYING));
}
/* check if any sibling shadow is copying towards this master */
for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
if (ip != xip && (xip->bi_flags & DSW_COPYINGS) != 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
DSW_ECOPYING));
}
}
}
if (((flags & CV_SHD2MST) == 0) &&
((ip->bi_flags & DSW_COPYINGS) != 0)) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING));
}
if (ip->bi_flags & DSW_TREEMAP) {
if ((ip->bi_flags & DSW_OVERFLOW) && (flags & CV_SHD2MST)) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
DSW_EINCOMPLETE));
}
}
/* Assure that no other PID owns this copy/update */
if (ip->bi_locked_pid == 0) {
if (flags & CV_LOCK_PID)
ip->bi_locked_pid = pid;
} else if (ip->bi_locked_pid != pid) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EINUSE));
}
mutex_exit(&ip->bi_mutex);
rtype = MSTR|SHDR|BMP;
if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(*kstatusp, rc);
return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
DSW_ERSRVFAIL));
}
if (ii_update_denied(ip, *kstatusp, flags & CV_SHD2MST, 0)) {
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, rtype);
return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
DSW_EOPACKAGE));
}
return (0);
}
static int
_ii_do_copy(_ii_info_t *ip, int flags, spcs_s_info_t kstatus, int waitflag)
{
int rc = 0;
int rtype = MSTR|SHDR|BMP;
_ii_overflow_t *op;
int quick_update = 0;
waitflag = (waitflag != 0);
/*
* a copy of a tree-mapped device must be downgraded to
* an update.
*/
if (ip->bi_flags & DSW_TREEMAP)
flags |= CV_BMP_ONLY;
/*
* If we want to update the dependent shadow we only need to zero
* the shadow bitmap.
*/
if (((ip->bi_flags & DSW_GOLDEN) == 0) &&
(flags & (CV_BMP_ONLY|CV_SHD2MST)) == CV_BMP_ONLY) {
DTRACE_PROBE(DEPENDENT);
/* assign updating time */
ip->bi_mtime = ddi_get_time();
if (ip->bi_flags & DSW_TREEMAP) {
DTRACE_PROBE(COMPACT_DEPENDENT);
if (ip->bi_overflow &&
(ip->bi_overflow->ii_flags & IIO_VOL_UPDATE) == 0) {
/* attempt to do a quick update */
quick_update = 1;
ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE;
ip->bi_overflow->ii_detachcnt = 1;
}
rc = ii_tinit(ip);
if (quick_update && ip->bi_overflow) {
/* clean up */
ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE);
ip->bi_overflow->ii_detachcnt = 0;
}
}
if (rc == 0)
rc = II_ZEROBM(ip); /* update copy of shadow */
if (((op = ip->bi_overflow) != NULL) &&
(op->ii_hversion >= 1) && (op->ii_hmagic == II_OMAGIC)) {
mutex_enter(&_ii_overflow_mutex);
if (ip->bi_flags & DSW_OVRHDRDRTY) {
mutex_enter(&ip->bi_mutex);
ip->bi_flags &= ~DSW_OVRHDRDRTY;
mutex_exit(&ip->bi_mutex);
ASSERT(op->ii_urefcnt > 0);
op->ii_urefcnt--;
}
if (op->ii_urefcnt == 0) {
op->ii_flags &= ~IIO_CNTR_INVLD;
op->ii_unused = op->ii_nchunks - 1;
}
mutex_exit(&_ii_overflow_mutex);
}
mutex_enter(&ip->bi_mutex);
II_FLAG_CLR(DSW_OVERFLOW, ip);
mutex_exit(&ip->bi_mutex);
_ii_unlock_chunk(ip, II_NULLCHUNK);
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, rtype);
if (rc) {
spcs_s_add(kstatus, rc);
return (DSW_EIO);
} else {
DTRACE_PROBE(_ii_do_copy_end);
return (0);
}
}
/*
* need to perform an actual copy.
*/
/*
* Perform bitmap copy if asked or from dependent shadow to master.
*/
if ((flags & CV_BMP_ONLY) ||
((flags & CV_SHD2MST) &&
((ip->bi_flags & DSW_GOLDEN) == 0))) {
DTRACE_PROBE(INDEPENDENT_fast);
rc = II_ORBM(ip); /* save shadow bits for copy */
} else {
DTRACE_PROBE(INDEPENDENT_slow);
rc = ii_fill_copy_bmp(ip); /* set bits for independent copy */
}
if (rc == 0)
rc = II_ZEROBM(ip);
_ii_unlock_chunk(ip, II_NULLCHUNK);
if (rc == 0) {
mutex_enter(&ip->bi_mutex);
if (ip->bi_flags & (DSW_COPYINGP | DSW_SHDEXPORT)) {
rc = (ip->bi_flags & DSW_COPYINGP)
? DSW_ECOPYING : DSW_EISEXPORTED;
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
_ii_rlse_devs(ip, rtype);
return (rc);
}
/* assign copying time */
ip->bi_mtime = ddi_get_time();
if (flags & CV_SHD2MST)
II_FLAG_SET(DSW_COPYINGS | DSW_COPYINGP, ip);
else
II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip);
mutex_exit(&ip->bi_mutex);
rc = _ii_copyvol(ip, (flags & CV_SHD2MST),
rtype, kstatus, waitflag);
} else {
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
}
if (waitflag)
_ii_rlse_devs(ip, rtype);
return (rc);
}
/*
* _ii_copy
* Copy or update (take snapshot) II volume.
*
* Calling/Exit State:
* Returns 0 if the operation succeeded. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*/
int
_ii_copy(intptr_t arg, int ilp32, int *rvp)
{
dsw_ioctl_t ucopy;
dsw_ioctl32_t ucopy32;
_ii_info_t *ip;
int rc = 0;
spcs_s_info_t kstatus;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0)
return (EFAULT);
II_TAIL_COPY(ucopy, ucopy32, shadow_vol, dsw_ioctl_t);
ucopy.status = (spcs_s_info_t)ucopy32.status;
} else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!ucopy.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, ucopy.status, DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(ucopy.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
DSW_ENOTFOUND));
/* Check that the copy/update makes sense */
if ((rc = _ii_chk_copy(ip, ucopy.flags, &kstatus, ucopy.pid,
ucopy.status)) == 0) {
/* perform the copy */
_ii_lock_chunk(ip, II_NULLCHUNK);
/* _ii_do_copy() calls _ii_ioctl_done() */
rc = _ii_do_copy(ip, ucopy.flags, kstatus, 1);
return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc));
}
return (rc);
}
/*
* _ii_mass_copy
* Copies/updates the sets pointed to in the ipa array.
*
* Calling/Exit State:
* Returns 0 if the operations was successful. Otherwise an
* error code.
*/
int
_ii_mass_copy(_ii_info_t **ipa, dsw_aioctl_t *ucopy, int wait)
{
int i;
int rc = 0;
int failed;
int rtype = MSTR|SHDR|BMP;
_ii_info_t *ip;
spcs_s_info_t kstatus;
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
/* Check copy validitity */
for (i = 0; i < ucopy->count; i++) {
ip = ipa[i];
rc = _ii_chk_copy(ip, ucopy->flags, &kstatus, ucopy->pid,
ucopy->status);
if (rc) {
/* Clean up the mess */
DTRACE_PROBE1(_ii_mass_copy_end1, int, rc);
/*
* The array ipa now looks like:
* 0..(i-1): needs mutex_enter/ioctl_done/mutex_exit
* i: needs nothing (_ii_chk_copy does cleanup)
* (i+1)..n: needs just ioctl_done/mutex_exit
*/
failed = i;
for (i = 0; i < failed; i++) {
mutex_enter(&(ipa[i]->bi_mutex));
_ii_ioctl_done(ipa[i]);
mutex_exit(&(ipa[i]->bi_mutex));
_ii_rlse_devs(ipa[i], rtype);
}
/* skip 'failed', start with failed + 1 */
for (i = failed + 1; i < ucopy->count; i++) {
_ii_ioctl_done(ipa[i]);
mutex_exit(&(ipa[i]->bi_mutex));
}
return (rc);
}
}
/* Check for duplicate shadows in same II group */
if (ucopy->flags & CV_SHD2MST) {
/* Reset the state of all masters */
for (i = 0; i < ucopy->count; i++) {
ip = ipa[i];
ip->bi_master->bi_state &= ~DSW_MSTTARGET;
}
for (i = 0; i < ucopy->count; i++) {
ip = ipa[i];
/*
* Check the state of the master. If DSW_MSTTARGET is
* set, it's because this master is attached to another
* shadow within this set.
*/
if (ip->bi_master->bi_state & DSW_MSTTARGET) {
rc = EINVAL;
break;
}
/*
* Set the DSW_MSTTARGET bit on the master associated
* with this shadow. This will allow us to detect
* multiple shadows pointing to this master within
* this loop.
*/
ip->bi_master->bi_state |= DSW_MSTTARGET;
}
}
/* Handle error */
if (rc) {
DTRACE_PROBE1(_ii_mass_copy_end2, int, rc);
for (i = 0; i < ucopy->count; i++) {
ip = ipa[i];
_ii_rlse_devs(ip, rtype);
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
}
return (spcs_s_ocopyoutf(&kstatus, ucopy->status, rc));
}
/* Lock bitmaps & prepare counts */
for (i = 0; i < ucopy->count; i++) {
ip = ipa[i];
_ii_lock_chunk(ip, II_NULLCHUNK);
if (ip->bi_overflow) {
ip->bi_overflow->ii_detachcnt = 0;
}
}
/* determine which volumes we're dealing with */
for (i = 0; i < ucopy->count; i++) {
ip = ipa[i];
if (ip->bi_overflow) {
ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE;
if ((ucopy->flags & (CV_BMP_ONLY|CV_SHD2MST)) ==
CV_BMP_ONLY) {
++ip->bi_overflow->ii_detachcnt;
}
}
}
/* Perform copy */
for (i = 0; i < ucopy->count; i++) {
ip = ipa[i];
rc = _ii_do_copy(ip, ucopy->flags, kstatus, wait);
/* Hum... what to do if one of these fails? */
}
/* clear out flags so as to prevent any accidental reuse */
for (i = 0; i < ucopy->count; i++) {
ip = ipa[i];
if (ip->bi_overflow)
ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE);
}
/*
* We can only clean up the kstatus structure if there are
* no waiters. If someone's waiting for the information,
* _ii_copyvolp() uses spcs_s_add to write to kstatus. Panic
* would ensue if we freed it up now.
*/
if (!wait)
rc = spcs_s_ocopyoutf(&kstatus, ucopy->status, rc);
return (rc);
}
/*
* _ii_list_copy
* Retrieve a list from a character array and use _ii_mass_copy to
* initiate a copy/update operation on all of the specified sets.
*
* Calling/Exit State:
* Returns 0 if the operations was successful. Otherwise an
* error code.
*/
int
_ii_list_copy(char *list, dsw_aioctl_t *ucopy, int wait)
{
int i;
int rc = 0;
char *name;
_ii_info_t *ip;
_ii_info_t **ipa;
ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP);
/* Reserve devices */
name = list;
mutex_enter(&_ii_info_mutex);
for (i = 0; i < ucopy->count; i++, name += DSW_NAMELEN) {
ip = _ii_find_set(name);
if (ip == NULL) {
rc = DSW_ENOTFOUND;
break;
}
ipa[i] = ip;
}
if (rc != 0) {
/* Failed to find all sets, release those we do have */
while (i-- > 0) {
ip = ipa[i];
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
}
} else {
/* Begin copy operation */
rc = _ii_mass_copy(ipa, ucopy, wait);
}
mutex_exit(&_ii_info_mutex);
kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count);
return (rc);
}
/*
* _ii_group_copy
* Retrieve list of sets in a group and use _ii_mass_copy to initiate
* a copy/update of all of them.
*
* Calling/Exit State:
* Returns 0 if the operations was successful. Otherwise an
* error code.
*/
int
_ii_group_copy(char *name, dsw_aioctl_t *ucopy, int wait)
{
int i;
int rc;
uint64_t hash;
_ii_info_t **ipa;
_ii_lsthead_t *head;
_ii_lstinfo_t *np;
/* find group */
hash = nsc_strhash(name);
mutex_enter(&_ii_group_mutex);
for (head = _ii_group_top; head; head = head->lst_next) {
if (hash == head->lst_hash && strncmp(head->lst_name,
name, DSW_NAMELEN) == 0)
break;
}
if (!head) {
mutex_exit(&_ii_group_mutex);
DTRACE_PROBE(_ii_group_copy);
return (DSW_EGNOTFOUND);
}
/* Count entries */
for (ucopy->count = 0, np = head->lst_start; np; np = np->lst_next)
++ucopy->count;
if (ucopy->count == 0) {
mutex_exit(&_ii_group_mutex);
return (DSW_EGNOTFOUND);
}
ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP);
if (ipa == NULL) {
mutex_exit(&_ii_group_mutex);
return (ENOMEM);
}
/* Create list */
mutex_enter(&_ii_info_mutex);
np = head->lst_start;
for (i = 0; i < ucopy->count; i++) {
ASSERT(np != 0);
ipa[i] = np->lst_ip;
mutex_enter(&ipa[i]->bi_mutex);
ipa[i]->bi_ioctl++;
np = np->lst_next;
}
/* Begin copy operation */
rc = _ii_mass_copy(ipa, ucopy, wait);
mutex_exit(&_ii_info_mutex);
mutex_exit(&_ii_group_mutex);
kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count);
return (rc);
}
/*
* _ii_acopy
* Copy or update (take snapshot) II multiple volumes.
*
* Calling/Exit State:
* Returns 0 if the operation succeeded. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*/
int
_ii_acopy(intptr_t arg, int ilp32, int *rvp)
{
int rc;
int name_offset;
char *list;
char *nptr;
char name[DSW_NAMELEN];
dsw_aioctl_t ucopy;
dsw_aioctl32_t ucopy32;
spcs_s_info_t kstatus;
*rvp = 0;
name_offset = (int)&(((dsw_aioctl_t *)0)->shadow_vol[0]);
if (ilp32) {
if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0)
return (EFAULT);
II_TAIL_COPY(ucopy, ucopy32, flags, dsw_ioctl_t);
ucopy.status = (spcs_s_info_t)ucopy32.status;
name_offset = (int)&(((dsw_aioctl32_t *)0)->shadow_vol[0]);
} else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0)
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
nptr = (char *)arg + name_offset;
rc = 0;
if (ucopy.flags & CV_IS_GROUP) {
if (copyin(nptr, name, DSW_NAMELEN) < 0)
return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
EFAULT));
/* kstatus information is handled within _ii_group_copy */
rc = _ii_group_copy(name, &ucopy, 0);
} else if (ucopy.count > 0) {
list = kmem_alloc(DSW_NAMELEN * ucopy.count, KM_SLEEP);
if (list == NULL)
return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
ENOMEM));
if (copyin(nptr, list, DSW_NAMELEN * ucopy.count) < 0)
return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
EFAULT));
rc = _ii_list_copy(list, &ucopy, 0);
kmem_free(list, DSW_NAMELEN * ucopy.count);
}
return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc));
}
/*
* _ii_bitsset
* Copy out II pair bitmaps to user program
*
* Calling/Exit State:
* Returns 0 if the operation succeeded. Otherwise an error code
* is returned and any additional error information is copied
* out to the user.
*/
int
_ii_bitsset(intptr_t arg, int ilp32, int cmd, int *rvp)
{
dsw_bitsset_t ubitsset;
dsw_bitsset32_t ubitsset32;
nsc_size_t nbitsset;
_ii_info_t *ip;
int rc;
spcs_s_info_t kstatus;
int bitmap_size;
*rvp = 0;
if (ilp32) {
if (copyin((void *)arg, &ubitsset32, sizeof (ubitsset32)))
return (EFAULT);
ubitsset.status = (spcs_s_info_t)ubitsset32.status;
bcopy(ubitsset32.shadow_vol, ubitsset.shadow_vol, DSW_NAMELEN);
} else if (copyin((void *)arg, &ubitsset, sizeof (ubitsset)))
return (EFAULT);
kstatus = spcs_s_kcreate();
if (kstatus == NULL)
return (ENOMEM);
if (!ubitsset.shadow_vol[0])
return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
DSW_EEMPTY));
mutex_enter(&_ii_info_mutex);
ip = _ii_find_set(ubitsset.shadow_vol);
mutex_exit(&_ii_info_mutex);
if (ip == NULL)
return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
DSW_ENOTFOUND));
mutex_exit(&ip->bi_mutex);
if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
DSW_ERSRVFAIL));
}
ubitsset.tot_size = ip->bi_size / DSW_SIZE;
if ((ip->bi_size % DSW_SIZE) != 0)
++ubitsset.tot_size;
bitmap_size = (ubitsset.tot_size + 7) / 8;
if (cmd == DSWIOC_SBITSSET)
rc = II_CNT_BITS(ip, ip->bi_shdfba, &nbitsset, bitmap_size);
else
rc = II_CNT_BITS(ip, ip->bi_copyfba, &nbitsset, bitmap_size);
ubitsset.tot_set = nbitsset;
_ii_rlse_devs(ip, BMP);
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
if (rc) {
spcs_s_add(kstatus, rc);
return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, DSW_EIO));
}
spcs_s_kfree(kstatus);
/* return the fetched names to the user */
if (ilp32) {
ubitsset32.status = (spcs_s_info32_t)ubitsset.status;
ubitsset32.tot_size = ubitsset.tot_size;
ubitsset32.tot_set = ubitsset.tot_set;
rc = copyout(&ubitsset32, (void *)arg, sizeof (ubitsset32));
} else {
rc = copyout(&ubitsset, (void *)arg, sizeof (ubitsset));
}
return (rc);
}
/*
* _ii_stopvol
* Stop any copying process for shadow, and stop shadowing
*
*/
static void
_ii_stopvol(_ii_info_t *ip)
{
nsc_path_t *mst_tok;
nsc_path_t *mstr_tok;
nsc_path_t *shd_tok;
nsc_path_t *shdr_tok;
nsc_path_t *bmp_tok;
int rc;
while (_ii_stopcopy(ip) == EINTR)
;
DTRACE_PROBE(_ii_stopvol);
mutex_enter(&ip->bi_mutex);
mst_tok = ip->bi_mst_tok;
mstr_tok = ip->bi_mstr_tok;
shd_tok = ip->bi_shd_tok;
shdr_tok = ip->bi_shdr_tok;
bmp_tok = ip->bi_bmp_tok;
ip->bi_shd_tok = 0;
ip->bi_shdr_tok = 0;
if (!NSHADOWS(ip)) {
ip->bi_mst_tok = 0;
ip->bi_mstr_tok = 0;
}
ip->bi_bmp_tok = 0;
/* Wait for any _ii_open() calls to complete */
while (ip->bi_ioctl) {
ip->bi_state |= DSW_IOCTL;
cv_wait(&ip->bi_ioctlcv, &ip->bi_mutex);
}
mutex_exit(&ip->bi_mutex);
rc = _ii_reserve_begin(ip);
if (rc) {
cmn_err(CE_WARN, "!_ii_stopvol: _ii_reserve_begin %d", rc);
}
if (!NSHADOWS(ip)) {
if (mst_tok) {
rc = _ii_unregister_path(mst_tok, NSC_PCATCH,
"master");
if (rc)
cmn_err(CE_WARN, "!ii: unregister master %d",
rc);
}
if (mstr_tok) {
rc = _ii_unregister_path(mstr_tok, NSC_PCATCH,
"raw master");
if (rc)
cmn_err(CE_WARN, "!ii: unregister raw "
"master %d", rc);
}
}
if (shd_tok) {
rc = _ii_unregister_path(shd_tok, NSC_PCATCH, "shadow");
if (rc)
cmn_err(CE_WARN, "!ii: unregister shadow %d", rc);
}
if (shdr_tok) {
rc = _ii_unregister_path(shdr_tok, NSC_PCATCH, "raw shadow");
if (rc)
cmn_err(CE_WARN, "!ii: unregister raw shadow %d", rc);
}
if (bmp_tok) {
rc = _ii_unregister_path(bmp_tok, NSC_PCATCH, "bitmap");
if (rc)
cmn_err(CE_WARN, "!ii: unregister bitmap %d", rc);
}
_ii_reserve_end(ip);
/* Wait for all necessary _ii_close() calls to complete */
mutex_enter(&ip->bi_mutex);
while (total_ref(ip) != 0) {
ip->bi_state |= DSW_CLOSING;
cv_wait(&ip->bi_closingcv, &ip->bi_mutex);
}
if (!NSHADOWS(ip)) {
nsc_set_owner(ip->bi_mstfd, NULL);
nsc_set_owner(ip->bi_mstrfd, NULL);
}
nsc_set_owner(ip->bi_shdfd, NULL);
nsc_set_owner(ip->bi_shdrfd, NULL);
mutex_exit(&ip->bi_mutex);
}
/*
* _ii_ioctl_done
* If this is the last one to complete, wakeup all processes waiting
* for ioctls to complete
*
*/
static void
_ii_ioctl_done(_ii_info_t *ip)
{
ASSERT(ip->bi_ioctl > 0);
ip->bi_ioctl--;
if (ip->bi_ioctl == 0 && (ip->bi_state & DSW_IOCTL)) {
ip->bi_state &= ~DSW_IOCTL;
cv_broadcast(&ip->bi_ioctlcv);
}
}
/*
* _ii_find_vol
* Search the configured shadows list for the supplied volume.
* If found, flag an ioctl in progress and return the locked _ii_info_t.
*
* The caller must check to see if the bi_disable flag is set and
* treat it appropriately.
*
* ASSUMPTION:
* _ii_info_mutex must be locked prior to calling this function
*
*/
static _ii_info_t *
_ii_find_vol(char *volume, int vol)
{
_ii_info_t **xip, *ip;
for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
if ((*xip)->bi_disabled)
continue;
if (strcmp(volume, vol == MST ? ii_pathname((*xip)->bi_mstfd) :
(*xip)->bi_keyname) == 0) {
break;
}
}
if (!*xip) {
DTRACE_PROBE(VolNotFound);
return (NULL);
}
ip = *xip;
if (!ip->bi_shd_tok && ((ip->bi_flags & DSW_SHDEXPORT) == 0)) {
/* Not fully configured until bi_shd_tok is set */
DTRACE_PROBE(SetNotConfiged);
return (NULL);
}
mutex_enter(&ip->bi_mutex);
ip->bi_ioctl++;
return (ip);
}
static _ii_info_t *
_ii_find_set(char *volume)
{
return (_ii_find_vol(volume, SHD));
}
/*
* _ii_find_overflow
* Search the configured shadows list for the supplied overflow volume.
*
*/
static _ii_overflow_t *
_ii_find_overflow(char *volume)
{
_ii_overflow_t **xop, *op;
mutex_enter(&_ii_overflow_mutex);
DTRACE_PROBE(_ii_find_overflowmutex);
for (xop = &_ii_overflow_top; *xop; xop = &(*xop)->ii_next) {
if (strcmp(volume, (*xop)->ii_volname) == 0) {
break;
}
}
if (!*xop) {
mutex_exit(&_ii_overflow_mutex);
return (NULL);
}
op = *xop;
mutex_exit(&_ii_overflow_mutex);
return (op);
}
/*
* _ii_bm_header_get
* Fetch the bitmap volume header
*
*/
ii_header_t *
_ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp)
{
ii_header_t *hdr;
nsc_off_t read_fba;
int rc;
ASSERT(ip->bi_bmprsrv); /* assert bitmap is reserved */
ASSERT(MUTEX_HELD(&ip->bi_mutex));
if ((ip->bi_flags & DSW_BMPOFFLINE) != 0)
return (NULL);
*tmp = NULL;
read_fba = 0;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, read_fba,
FBA_LEN(sizeof (ii_header_t)), NSC_RDWRBUF, tmp);
II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t)));
if (!II_SUCCESS(rc)) {
if (ii_debug > 2)
cmn_err(CE_WARN, "!ii: nsc_alloc_buf returned 0x%x",
rc);
if (*tmp)
(void) nsc_free_buf(*tmp);
*tmp = NULL;
mutex_exit(&ip->bi_mutex);
_ii_error(ip, DSW_BMPOFFLINE);
mutex_enter(&ip->bi_mutex);
return (NULL);
}
hdr = (ii_header_t *)(*tmp)->sb_vec[0].sv_addr;
return (hdr);
}
/*
* _ii_bm_header_free
* Free the bitmap volume header
*
*/
/* ARGSUSED */
void
_ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp)
{
(void) nsc_free_buf(tmp);
}
/*
* _ii_bm_header_put
* Write out the modified bitmap volume header and free it
*
*/
/* ARGSUSED */
int
_ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp)
{
nsc_off_t write_fba;
int rc;
ASSERT(MUTEX_HELD(&ip->bi_mutex));
write_fba = 0;
II_NSC_WRITE(ip, bitmap, rc, tmp, write_fba,
FBA_LEN(sizeof (ii_header_t)), 0);
(void) nsc_free_buf(tmp);
if (!II_SUCCESS(rc)) {
mutex_exit(&ip->bi_mutex);
_ii_error(ip, DSW_BMPOFFLINE);
mutex_enter(&ip->bi_mutex);
DTRACE_PROBE(_ii_bm_header_put);
return (rc);
} else {
DTRACE_PROBE(_ii_bm_header_put_end);
return (0);
}
}
/*
* _ii_flag_op
* Clear or set a flag in bi_flags and dsw_state.
* This relies on the ownership of the header block's nsc_buf
* for locking.
*
*/
void
_ii_flag_op(and, or, ip, update)
int and, or;
_ii_info_t *ip;
int update;
{
ii_header_t *bm_header;
nsc_buf_t *tmp;
ip->bi_flags &= and;
ip->bi_flags |= or;
if (update == TRUE) {
/*
* No point trying to access bitmap header if it's offline
* or has been disassociated from set via DSW_HANGING
*/
if ((ip->bi_flags & (DSW_BMPOFFLINE|DSW_HANGING)) == 0) {
bm_header = _ii_bm_header_get(ip, &tmp);
if (bm_header == NULL) {
if (tmp)
(void) nsc_free_buf(tmp);
DTRACE_PROBE(_ii_flag_op_end);
return;
}
bm_header->ii_state &= and;
bm_header->ii_state |= or;
/* copy over the mtime */
bm_header->ii_mtime = ip->bi_mtime;
(void) _ii_bm_header_put(bm_header, ip, tmp);
}
}
}
/*
* _ii_nsc_io
* Perform read or write on an underlying nsc device
* fd - nsc file descriptor
* flag - nsc io direction and characteristics flag
* fba_pos - offset from beginning of device in FBAs
* io_addr - pointer to data buffer
* io_len - length of io in bytes
*/
int
_ii_nsc_io(_ii_info_t *ip, int ks, nsc_fd_t *fd, int flag, nsc_off_t fba_pos,
unsigned char *io_addr, nsc_size_t io_len)
{
nsc_buf_t *tmp = NULL;
nsc_vec_t *vecp;
uchar_t *vaddr;
size_t copy_len;
int64_t vlen;
int rc;
nsc_size_t fba_req, fba_len;
nsc_size_t maxfbas = 0;
nsc_size_t tocopy;
unsigned char *toaddr;
rc = nsc_maxfbas(fd, 0, &maxfbas);
if (!II_SUCCESS(rc)) {
#ifdef DEBUG
cmn_err(CE_WARN, "!_ii_nsc_io: maxfbas failed (%d)", rc);
#endif
maxfbas = DSW_CBLK_FBA;
}
toaddr = io_addr;
fba_req = FBA_LEN(io_len);
#ifdef DEBUG_SPLIT_IO
cmn_err(CE_NOTE, "!_ii_nsc_io: maxfbas = %08x", maxfbas);
cmn_err(CE_NOTE, "!_ii_nsc_io: toaddr=%08x, io_len=%08x, fba_req=%08x",
toaddr, io_len, fba_req);
#endif
loop:
tmp = NULL;
fba_len = min(fba_req, maxfbas);
tocopy = min(io_len, FBA_SIZE(fba_len));
DTRACE_PROBE2(_ii_nsc_io_buffer, nsc_off_t, fba_pos,
nsc_size_t, fba_len);
#ifdef DEBUG_SPLIT_IO
cmn_err(CE_NOTE, "!_ii_nsc_io: fba_pos=%08x, fba_len=%08x",
fba_pos, fba_len);
#endif
#ifndef DISABLE_KSTATS
if (flag & NSC_READ) {
switch (ks) {
case KS_MST:
II_READ_START(ip, master);
break;
case KS_SHD:
II_READ_START(ip, shadow);
break;
case KS_BMP:
II_READ_START(ip, bitmap);
break;
case KS_OVR:
II_READ_START(ip, overflow);
break;
default:
cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
break;
}
}
#endif
rc = nsc_alloc_buf(fd, fba_pos, fba_len, flag, &tmp);
#ifndef DISABLE_KSTATS
if (flag & NSC_READ) {
switch (ks) {
case KS_MST:
II_READ_END(ip, master, rc, fba_len);
break;
case KS_SHD:
II_READ_END(ip, shadow, rc, fba_len);
break;
case KS_BMP:
II_READ_END(ip, bitmap, rc, fba_len);
break;
case KS_OVR:
II_READ_END(ip, overflow, rc, fba_len);
break;
}
}
#endif
if (!II_SUCCESS(rc)) {
if (tmp) {
(void) nsc_free_buf(tmp);
}
return (EIO);
}
if ((flag & (NSC_WRITE|NSC_READ)) == NSC_WRITE &&
(FBA_OFF(io_len) != 0)) {
/*
* Not overwriting all of the last FBA, so read in the
* old contents now before we overwrite it with the new
* data.
*/
#ifdef DEBUG_SPLIT_IO
cmn_err(CE_NOTE, "!_ii_nsc_io: Read-B4-Write %08x",
fba_pos+FBA_NUM(io_len));
#endif
#ifdef DISABLE_KSTATS
rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
#else
switch (ks) {
case KS_MST:
II_NSC_READ(ip, master, rc, tmp,
fba_pos+FBA_NUM(io_len), 1, 0);
break;
case KS_SHD:
II_NSC_READ(ip, shadow, rc, tmp,
fba_pos+FBA_NUM(io_len), 1, 0);
break;
case KS_BMP:
II_NSC_READ(ip, bitmap, rc, tmp,
fba_pos+FBA_NUM(io_len), 1, 0);
break;
case KS_OVR:
II_NSC_READ(ip, overflow, rc, tmp,
fba_pos+FBA_NUM(io_len), 1, 0);
break;
case KS_NA:
rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
break;
default:
cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
break;
}
#endif
if (!II_SUCCESS(rc)) {
(void) nsc_free_buf(tmp);
return (EIO);
}
}
vecp = tmp->sb_vec;
vlen = vecp->sv_len;
vaddr = vecp->sv_addr;
while (tocopy > 0) {
if (vecp->sv_addr == 0 || vecp->sv_len == 0) {
#ifdef DEBUG
cmn_err(CE_WARN, "!_ii_nsc_io: ran off end of handle");
#endif
break;
}
copy_len = (size_t)min(vlen, tocopy);
DTRACE_PROBE1(_ii_nsc_io_bcopy, size_t, copy_len);
if (flag & NSC_WRITE)
bcopy(io_addr, vaddr, copy_len);
else
bcopy(vaddr, io_addr, copy_len);
toaddr += copy_len;
tocopy -= copy_len;
io_addr += copy_len;
io_len -= copy_len;
vaddr += copy_len;
vlen -= copy_len;
if (vlen <= 0) {
vecp++;
vaddr = vecp->sv_addr;
vlen = vecp->sv_len;
}
}
if (flag & NSC_WRITE) {
#ifdef DISABLE_KSTATS
rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
#else
switch (ks) {
case KS_MST:
II_NSC_WRITE(ip, master, rc, tmp, tmp->sb_pos,
tmp->sb_len, 0);
break;
case KS_SHD:
II_NSC_WRITE(ip, shadow, rc, tmp, tmp->sb_pos,
tmp->sb_len, 0);
break;
case KS_BMP:
II_NSC_WRITE(ip, bitmap, rc, tmp, tmp->sb_pos,
tmp->sb_len, 0);
break;
case KS_OVR:
II_NSC_WRITE(ip, overflow, rc, tmp, tmp->sb_pos,
tmp->sb_len, 0);
break;
case KS_NA:
rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
break;
default:
cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
break;
}
#endif
if (!II_SUCCESS(rc)) {
(void) nsc_free_buf(tmp);
return (rc);
}
}
(void) nsc_free_buf(tmp);
fba_pos += fba_len;
fba_req -= fba_len;
if (fba_req > 0)
goto loop;
return (0);
}
/*
* ii_overflow_attach
*/
static int
ii_overflow_attach(_ii_info_t *ip, char *name, int first)
{
_ii_overflow_t *op;
int rc = 0;
int reserved = 0;
int mutex_set = 0;
int II_OLD_OMAGIC = 0x426c7565; /* "Blue" */
mutex_enter(&_ii_overflow_mutex);
/* search for name in list */
for (op = _ii_overflow_top; op; op = op->ii_next) {
if (strncmp(op->ii_volname, name, DSW_NAMELEN) == 0)
break;
}
if (op) {
ip->bi_overflow = op;
op->ii_crefcnt++;
op->ii_drefcnt++;
if ((op->ii_flags & IIO_CNTR_INVLD) && (op->ii_hversion >= 1)) {
if (!first)
mutex_enter(&ip->bi_mutex);
ip->bi_flags |= DSW_OVRHDRDRTY;
if (!first)
mutex_exit(&ip->bi_mutex);
op->ii_urefcnt++;
}
#ifndef DISABLE_KSTATS
ip->bi_kstat_io.overflow = op->ii_overflow;
(void) strlcpy(ip->bi_kstat_io.ovrio, op->ii_ioname,
KSTAT_DATA_CHAR_LEN);
#endif
/* write header */
if (!(rc = nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI))) {
rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd,
NSC_WRBUF, II_OHEADER_FBA,
(unsigned char *)&op->ii_do, sizeof (op->ii_do));
(void) nsc_release(op->ii_dev->bi_fd);
++iigkstat.assoc_over.value.ul;
}
mutex_exit(&_ii_overflow_mutex);
return (rc);
}
if ((op = kmem_zalloc(sizeof (*op), KM_SLEEP)) == NULL) {
mutex_exit(&_ii_overflow_mutex);
return (ENOMEM);
}
if ((op->ii_dev = kmem_zalloc(sizeof (_ii_info_dev_t), KM_SLEEP))
== NULL) {
kmem_free(op, sizeof (*op));
mutex_exit(&_ii_overflow_mutex);
return (ENOMEM);
}
#ifndef DISABLE_KSTATS
if ((op->ii_overflow = _ii_overflow_kstat_create(ip, op))) {
ip->bi_kstat_io.overflow = op->ii_overflow;
(void) strlcpy(op->ii_ioname, ip->bi_kstat_io.ovrio,
KSTAT_DATA_CHAR_LEN);
} else {
goto fail;
}
#endif
/* open overflow volume */
op->ii_dev->bi_fd = nsc_open(name, NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL,
(blind_t)&(op->ii_dev->bi_iodev), &rc);
if (!op->ii_dev->bi_fd)
op->ii_dev->bi_fd = nsc_open(name,
NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL,
(blind_t)&(op->ii_dev->bi_iodev), &rc);
if (op->ii_dev->bi_fd == NULL) {
goto fail;
}
if ((rc = nsc_reserve(op->ii_dev->bi_fd, 0)) != 0)
goto fail;
reserved = 1;
/* register path */
op->ii_dev->bi_tok = _ii_register_path(name, NSC_DEVICE,
_ii_ior);
if (!op->ii_dev->bi_tok) {
goto fail;
}
/* read header */
rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_RDBUF,
II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do));
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_OVROFFLINE);
goto fail;
}
/* On resume, check for old hmagic */
if (strncmp(op->ii_volname, name, DSW_NAMELEN) ||
((op->ii_hmagic != II_OLD_OMAGIC) &&
(op->ii_hmagic != II_OMAGIC))) {
rc = DSW_EOMAGIC;
goto fail;
}
/* set up counts */
op->ii_crefcnt = 1;
op->ii_drefcnt = 0;
op->ii_urefcnt = 0;
op->ii_hmagic = II_OMAGIC;
if (!first) {
/* if header version > 0, check if header written */
if (((op->ii_flags & IIO_HDR_WRTN) == 0) &&
(op->ii_hversion >= 1)) {
op->ii_flags |= IIO_CNTR_INVLD;
mutex_enter(&ip->bi_mutex);
ip->bi_flags |= DSW_OVRHDRDRTY;
mutex_exit(&ip->bi_mutex);
op->ii_urefcnt++;
}
}
op->ii_flags &= ~IIO_HDR_WRTN;
op->ii_drefcnt++;
/* write header */
rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF,
II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do));
nsc_release(op->ii_dev->bi_fd);
reserved = 0;
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_OVROFFLINE);
goto fail;
}
mutex_init(&op->ii_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_set++;
/* link onto list */
op->ii_next = _ii_overflow_top;
_ii_overflow_top = op;
ip->bi_overflow = op;
++iigkstat.assoc_over.value.ul;
mutex_exit(&_ii_overflow_mutex);
DTRACE_PROBE(_ii_overflow_attach_end);
return (0);
fail:
#ifndef DISABLE_KSTATS
/* Clean-up kstat stuff */
if (op->ii_overflow) {
kstat_delete(op->ii_overflow);
mutex_destroy(&op->ii_kstat_mutex);
}
#endif
/* clean up mutex if we made it that far */
if (mutex_set) {
mutex_destroy(&op->ii_mutex);
}
if (op->ii_dev) {
if (op->ii_dev->bi_tok) {
(void) _ii_unregister_path(op->ii_dev->bi_tok, 0,
"overflow");
}
if (reserved)
(void) nsc_release(op->ii_dev->bi_fd);
if (op->ii_dev->bi_fd)
(void) nsc_close(op->ii_dev->bi_fd);
kmem_free(op->ii_dev, sizeof (_ii_info_dev_t));
}
kmem_free(op, sizeof (*op));
mutex_exit(&_ii_overflow_mutex);
return (rc);
}
/*
* ii_overflow_free
* Assumes that ip is locked for I/O
*/
static void
ii_overflow_free(_ii_info_t *ip, int reclaim)
{
_ii_overflow_t *op, **xp;
if ((op = ip->bi_overflow) == NULL)
return;
ip->bi_kstat_io.overflow = NULL;
mutex_enter(&_ii_overflow_mutex);
switch (reclaim) {
case NO_RECLAIM:
if (--(op->ii_drefcnt) == 0) {
/* indicate header written */
op->ii_flags |= IIO_HDR_WRTN;
/* write out header */
ASSERT(op->ii_dev->bi_fd);
(void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI);
(void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd,
NSC_WRBUF, II_OHEADER_FBA,
(unsigned char *)&op->ii_do,
sizeof (op->ii_do));
nsc_release(op->ii_dev->bi_fd);
}
break;
case RECLAIM:
ii_reclaim_overflow(ip);
/* FALLTHRU */
case INIT_OVR:
if (--(op->ii_drefcnt) == 0) {
/* reset to new condition, c.f. _ii_ocreate() */
op->ii_used = 1;
op->ii_unused = op->ii_nchunks - op->ii_used;
op->ii_freehead = II_NULLNODE;
}
/* write out header */
ASSERT(op->ii_dev->bi_fd);
(void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI);
(void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF,
II_OHEADER_FBA, (unsigned char *)&op->ii_do,
sizeof (op->ii_do));
nsc_release(op->ii_dev->bi_fd);
}
if (--(op->ii_crefcnt) == 0) {
/* Close fd and unlink from active chain; */
(void) _ii_unregister_path(op->ii_dev->bi_tok, 0, "overflow");
(void) nsc_close(op->ii_dev->bi_fd);
for (xp = &_ii_overflow_top; *xp && *xp != op;
xp = &((*xp)->ii_next))
/* NULL statement */;
*xp = op->ii_next;
if (op->ii_overflow) {
kstat_delete(op->ii_overflow);
}
/* Clean up ii_overflow_t mutexs */
mutex_destroy(&op->ii_kstat_mutex);
mutex_destroy(&op->ii_mutex);
if (op->ii_dev)
kmem_free(op->ii_dev, sizeof (_ii_info_dev_t));
kmem_free(op, sizeof (*op));
}
ip->bi_overflow = NULL;
--iigkstat.assoc_over.value.ul;
mutex_exit(&_ii_overflow_mutex);
}
/*
* ii_sibling_free
* Free resources and unlink the sibling chains etc.
*/
static void
ii_sibling_free(_ii_info_t *ip)
{
_ii_info_t *hip, *yip;
if (!ip)
return;
if (ip->bi_shdr_tok)
(void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow");
if (ip->bi_shd_tok)
(void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
rw_enter(&ip->bi_linkrw, RW_WRITER);
ip->bi_shd_tok = NULL;
ip->bi_shdr_tok = NULL;
if (NSHADOWS(ip)) {
mutex_enter(&_ii_info_mutex);
if (ip->bi_head == ip) { /* removing head of list */
hip = ip->bi_sibling;
for (yip = hip; yip; yip = yip->bi_sibling)
yip->bi_head = hip;
} else { /* removing member of list */
hip = ip->bi_head;
for (yip = ip->bi_head; yip; yip = yip->bi_sibling) {
if (yip->bi_sibling == ip) {
yip->bi_sibling = ip->bi_sibling;
break;
}
}
}
hip->bi_master->bi_head = hip;
if (ip->bi_master == ip) { /* master I/O goes through this */
mutex_exit(&_ii_info_mutex);
_ii_info_freeshd(ip);
rw_exit(&ip->bi_linkrw);
return;
}
mutex_exit(&_ii_info_mutex);
} else {
if (ip->bi_master != ip) /* last ref to master side ip */
_ii_info_free(ip->bi_master); /* ==A== */
}
if (ip->bi_master != ip) { /* info_free ==A== will close these */
/*
* Null out any pointers to shared master side resources
* that should only be freed once when the last reference
* to this master is freed and calls _ii_info_free().
*/
ip->bi_mstdev = NULL;
ip->bi_mstrdev = NULL;
ip->bi_kstat_io.master = NULL;
}
rw_exit(&ip->bi_linkrw);
_ii_info_free(ip);
}
/*
* _ii_info_freeshd
* Free shadow side resources
*
* Calling/Exit State:
* No mutexes should be held on entry to this function.
*
* Description:
* Frees the system resources associated with the shadow
* access, leaving the master side alone. This allows the
* original master side to continue in use while there are
* outstanding references to this _ii_info_t.
*/
static void
_ii_info_freeshd(_ii_info_t *ip)
{
if (!ip)
return;
if ((ip->bi_flags&DSW_HANGING) == DSW_HANGING)
return; /* this work has already been completed */
II_FLAG_SETX(DSW_HANGING, ip);
if (ip->bi_cluster)
(void) II_UNLINK_CLUSTER(ip);
if (ip->bi_group)
(void) II_UNLINK_GROUP(ip);
if (ip->bi_shdfd && ip->bi_shdrsrv)
nsc_release(ip->bi_shdfd);
if (ip->bi_shdrfd && ip->bi_shdrrsrv)
nsc_release(ip->bi_shdrfd);
if (ip->bi_bmpfd && ip->bi_bmprsrv)
nsc_release(ip->bi_bmpfd);
if (ip->bi_bmp_tok)
(void) _ii_unregister_path(ip->bi_bmp_tok, 0, "bitmap");
if (ip->bi_shdr_tok)
(void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow");
if (ip->bi_shd_tok)
(void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
ip->bi_shd_tok = NULL;
ip->bi_shdr_tok = NULL;
if (ip->bi_shdfd)
(void) nsc_close(ip->bi_shdfd);
if (ip->bi_shdrfd)
(void) nsc_close(ip->bi_shdrfd);
if (ip->bi_bmpfd)
(void) nsc_close(ip->bi_bmpfd);
ip->bi_shdfd = NULL;
ip->bi_shdrfd = NULL;
ip->bi_bmpfd = NULL;
if (ip->bi_busy)
kmem_free(ip->bi_busy,
1 + (ip->bi_size / (DSW_SIZE * DSW_BITS)));
ip->bi_busy = NULL;
if (ip->bi_kstat_io.shadow) {
kstat_delete(ip->bi_kstat_io.shadow);
ip->bi_kstat_io.shadow = NULL;
}
if (ip->bi_kstat_io.bitmap) {
kstat_delete(ip->bi_kstat_io.bitmap);
ip->bi_kstat_io.bitmap = NULL;
}
if (ip->bi_kstat) {
kstat_delete(ip->bi_kstat);
ip->bi_kstat = NULL;
}
}
/*
* _ii_info_free
* Free resources
*
* Calling/Exit State:
* No mutexes should be held on entry to this function.
*
* Description:
* Frees the system resources associated with the specified
* II information structure.
*/
static void
_ii_info_free(_ii_info_t *ip)
{
_ii_info_t **xip;
if (!ip)
return;
mutex_enter(&_ii_info_mutex);
for (xip = &_ii_mst_top; *xip; xip = &((*xip)->bi_nextmst)) {
if (ip == *xip) {
*xip = ip->bi_nextmst;
break;
}
}
mutex_exit(&_ii_info_mutex);
/* this rw_enter forces us to wait until all nsc_buffers are freed */
rw_enter(&ip->bi_linkrw, RW_WRITER);
if (ip->bi_mstdev && ip->bi_mstfd && ip->bi_mstrsrv)
nsc_release(ip->bi_mstfd);
if (ip->bi_mstrdev && ip->bi_mstrfd && ip->bi_mstrrsrv)
nsc_release(ip->bi_mstrfd);
if (ip->bi_mstdev && ip->bi_mst_tok)
(void) _ii_unregister_path(ip->bi_mst_tok, 0, "master");
if (ip->bi_mstrdev && ip->bi_mstr_tok)
(void) _ii_unregister_path(ip->bi_mstr_tok, 0, "raw master");
if (ip->bi_mstdev && ip->bi_mstfd)
(void) nsc_close(ip->bi_mstfd);
if (ip->bi_mstrdev && ip->bi_mstrfd)
(void) nsc_close(ip->bi_mstrfd);
rw_exit(&ip->bi_linkrw);
if (ip->bi_mstdev) {
nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev));
}
if (ip->bi_mstrdev) {
nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev));
}
if (ip->bi_kstat_io.master) {
kstat_delete(ip->bi_kstat_io.master);
}
if (ip->bi_kstat_io.shadow) {
kstat_delete(ip->bi_kstat_io.shadow);
ip->bi_kstat_io.shadow = 0;
}
if (ip->bi_kstat_io.bitmap) {
kstat_delete(ip->bi_kstat_io.bitmap);
ip->bi_kstat_io.bitmap = 0;
}
if (ip->bi_kstat) {
kstat_delete(ip->bi_kstat);
ip->bi_kstat = NULL;
}
/* this rw_enter forces us to wait until all nsc_buffers are freed */
rw_enter(&ip->bi_linkrw, RW_WRITER);
rw_exit(&ip->bi_linkrw);
mutex_destroy(&ip->bi_mutex);
mutex_destroy(&ip->bi_rsrvmutex);
mutex_destroy(&ip->bi_rlsemutex);
mutex_destroy(&ip->bi_bmpmutex);
mutex_destroy(&ip->bi_chksmutex);
cv_destroy(&ip->bi_copydonecv);
cv_destroy(&ip->bi_reservecv);
cv_destroy(&ip->bi_releasecv);
cv_destroy(&ip->bi_ioctlcv);
cv_destroy(&ip->bi_closingcv);
cv_destroy(&ip->bi_busycv);
rw_destroy(&ip->bi_busyrw);
rw_destroy(&ip->bi_linkrw);
_ii_info_freeshd(ip);
#ifdef DEBUG
ip->bi_head = (_ii_info_t *)0xdeadbeef;
#endif
nsc_kmem_free(ip, sizeof (*ip));
}
/*
* _ii_copy_chunks
* Perform a copy of some chunks
*
* Calling/Exit State:
* Returns 0 if the data was copied successfully, otherwise
* error code.
*
* Description:
* flag is set to CV_SHD2MST if the data is to be copied from the shadow
* to the master, 0 if it is to be copied from the master to the shadow.
*/
static int
_ii_copy_chunks(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks)
{
int mst_flag;
int shd_flag;
int ovr_flag;
nsc_off_t pos;
nsc_size_t len;
int rc;
nsc_off_t shd_pos;
chunkid_t shd_chunk;
nsc_buf_t *mst_tmp = NULL;
nsc_buf_t *shd_tmp = NULL;
if (ip->bi_flags & DSW_MSTOFFLINE) {
DTRACE_PROBE(_ii_copy_chunks_end);
return (EIO);
}
if (ip->bi_flags & (DSW_SHDOFFLINE|DSW_SHDEXPORT|DSW_SHDIMPORT)) {
DTRACE_PROBE(_ii_copy_chunks_end);
return (EIO);
}
if (flag == CV_SHD2MST) {
mst_flag = NSC_WRBUF|NSC_WRTHRU;
shd_flag = NSC_RDBUF;
} else {
shd_flag = NSC_WRBUF|NSC_WRTHRU;
mst_flag = NSC_RDBUF;
}
pos = DSW_CHK2FBA(chunk_num);
len = DSW_SIZE * nchunks;
if (pos + len > ip->bi_size)
len = ip->bi_size - pos;
if (ip->bi_flags & DSW_TREEMAP) {
ASSERT(nchunks == 1);
shd_chunk = ii_tsearch(ip, chunk_num);
if (shd_chunk == II_NULLNODE) {
/* shadow is full */
mutex_enter(&ip->bi_mutex);
II_FLAG_SET(DSW_OVERFLOW, ip);
mutex_exit(&ip->bi_mutex);
DTRACE_PROBE(_ii_copy_chunks_end);
return (EIO);
}
ovr_flag = II_ISOVERFLOW(shd_chunk);
shd_pos = DSW_CHK2FBA((ovr_flag) ?
II_2OVERFLOW(shd_chunk) : shd_chunk);
} else {
ovr_flag = FALSE;
shd_chunk = chunk_num;
shd_pos = pos;
}
/*
* Always allocate the master side before the shadow to
* avoid deadlocks on the same chunk.
*/
DTRACE_PROBE2(_ii_copy_chunks_alloc, nsc_off_t, pos, nsc_size_t, len);
II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, mst_flag, &mst_tmp);
if (!II_SUCCESS(rc)) {
if (mst_tmp)
(void) nsc_free_buf(mst_tmp);
_ii_error(ip, DSW_MSTOFFLINE);
DTRACE_PROBE(_ii_copy_chunks_end);
return (rc);
}
if (ovr_flag) {
/* use overflow volume */
(void) nsc_reserve(OVRFD(ip), NSC_MULTI);
II_ALLOC_BUF(ip, overflow, rc, OVRFD(ip), shd_pos, len,
shd_flag, &shd_tmp);
} else {
II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), shd_pos, len, shd_flag,
&shd_tmp);
}
if (!II_SUCCESS(rc)) {
(void) nsc_free_buf(mst_tmp);
if (shd_tmp)
(void) nsc_free_buf(shd_tmp);
if (ovr_flag)
nsc_release(OVRFD(ip));
_ii_error(ip, DSW_SHDOFFLINE);
if (ovr_flag)
_ii_error(ip, DSW_OVROFFLINE);
DTRACE_PROBE(_ii_copy_chunks_end);
return (rc);
}
/*
* The direction of copy is determined by the mst_flag.
*/
DTRACE_PROBE2(_ii_copy_chunks_copy, kstat_named_t, ii_copy_direct,
int, mst_flag);
if (ii_copy_direct) {
if (mst_flag & NSC_WRBUF) {
if (ovr_flag) {
II_NSC_COPY_DIRECT(ip, overflow, master, rc,
shd_tmp, mst_tmp, shd_pos, pos, len)
} else {
II_NSC_COPY_DIRECT(ip, shadow, master, rc,
shd_tmp, mst_tmp, shd_pos, pos, len)
}
if (!II_SUCCESS(rc)) {
/* A copy has failed - something is wrong */
_ii_error(ip, DSW_MSTOFFLINE);
_ii_error(ip, DSW_SHDOFFLINE);
if (ovr_flag)
_ii_error(ip, DSW_OVROFFLINE);
}
} else {
if (ovr_flag) {
II_NSC_COPY_DIRECT(ip, master, overflow, rc,
mst_tmp, shd_tmp, pos, shd_pos, len);
} else {
II_NSC_COPY_DIRECT(ip, master, shadow, rc,
mst_tmp, shd_tmp, pos, shd_pos, len);
}
if (!II_SUCCESS(rc)) {
/*
* A failure has occurred during the above copy.
* The macro calls nsc_copy_direct, which will
* never return a read failure, only a write
* failure. With this assumption, we should
* take only the target volume offline.
*/
_ii_error(ip, DSW_SHDOFFLINE);
if (ovr_flag)
_ii_error(ip, DSW_OVROFFLINE);
}
}
} else {
if (mst_flag & NSC_WRBUF) {
rc = nsc_copy(shd_tmp, mst_tmp, shd_pos, pos, len);
if (II_SUCCESS(rc)) {
II_NSC_WRITE(ip, master, rc, mst_tmp, pos, len,
0);
if (!II_SUCCESS(rc))
_ii_error(ip, DSW_MSTOFFLINE);
} else {
/* A copy has failed - something is wrong */
_ii_error(ip, DSW_MSTOFFLINE);
_ii_error(ip, DSW_SHDOFFLINE);
}
} else {
rc = nsc_copy(mst_tmp, shd_tmp, pos, shd_pos, len);
if (II_SUCCESS(rc)) {
if (ovr_flag) {
II_NSC_WRITE(ip, overflow, rc, shd_tmp,
shd_pos, len, 0);
} else {
II_NSC_WRITE(ip, shadow, rc, shd_tmp,
shd_pos, len, 0);
}
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_SHDOFFLINE);
if (ovr_flag)
_ii_error(ip, DSW_OVROFFLINE);
}
} else {
/* A copy has failed - something is wrong */
_ii_error(ip, DSW_MSTOFFLINE);
_ii_error(ip, DSW_SHDOFFLINE);
}
}
}
(void) nsc_free_buf(mst_tmp);
(void) nsc_free_buf(shd_tmp);
if (ovr_flag)
nsc_release(OVRFD(ip));
DTRACE_PROBE(_ii_copy_chunks);
if (II_SUCCESS(rc)) {
(void) II_CLR_COPY_BITS(ip, chunk_num, nchunks);
rc = 0;
}
return (rc);
}
/*
* _ii_copy_on_write
*
* Calling/Exit State:
* Returns 0 on success, otherwise error code.
*
* Description:
* Determines if a copy on write is necessary, and performs it.
* A copy on write is necessary in the following cases:
* - No copy is in progress and the shadow bit is clear, which
* means this is the first write to this track.
* - A copy is in progress and the copy bit is set, which means
* that a track copy is required.
* If a copy to the master is to be done, make a recursive call to this
* function to do any necessary copy on write on other InstantImage groups
* that share the same master volume.
*/
static int
_ii_copy_on_write(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks)
{
int rc = 0;
int rtype;
int hanging = (ip->bi_flags&DSW_HANGING);
if (hanging ||
(flag & (CV_SIBLING|CV_SHD2MST)) == CV_SHD2MST && NSHADOWS(ip)) {
_ii_info_t *xip;
/*
* Preserve copy of master for all other shadows of this master
* before writing our data onto the master.
*/
/*
* Avoid deadlock with COW on same chunk of sibling shadow
* by unlocking this chunk before copying all other sibling
* chunks.
*/
/*
* Only using a single chunk when copying to master avoids
* complex code here.
*/
ASSERT(nchunks == 1);
if (!hanging)
_ii_unlock_chunk(ip, chunk_num);
for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
if (xip == ip) /* don't copy ourselves again */
continue;
DTRACE_PROBE(_ii_copy_on_write);
rw_enter(&xip->bi_linkrw, RW_READER);
mutex_enter(&xip->bi_mutex);
if (xip->bi_disabled) {
mutex_exit(&xip->bi_mutex);
rw_exit(&xip->bi_linkrw);
continue; /* this set is stopping */
}
xip->bi_shdref++;
mutex_exit(&xip->bi_mutex);
/* don't waste time asking for MST as ip shares it */
rtype = SHDR|BMP;
(void) _ii_rsrv_devs(xip, rtype, II_INTERNAL);
_ii_lock_chunk(xip, chunk_num);
rc = _ii_copy_on_write(xip, flag | CV_SIBLING,
chunk_num, 1);
/*
* See comments in _ii_shadow_write()
*/
if (rc == 0 ||
(rc == EIO && (xip->bi_flags&DSW_OVERFLOW) != 0))
(void) II_SET_SHD_BIT(xip, chunk_num);
_ii_unlock_chunk(xip, chunk_num);
_ii_rlse_devs(xip, rtype);
mutex_enter(&xip->bi_mutex);
xip->bi_shdref--;
if (xip->bi_state & DSW_CLOSING) {
if (total_ref(xip) == 0) {
cv_signal(&xip->bi_closingcv);
}
}
mutex_exit(&xip->bi_mutex);
rw_exit(&xip->bi_linkrw);
}
if (hanging) {
DTRACE_PROBE(_ii_copy_on_write_end);
return (0);
}
/*
* Reacquire chunk lock and check that a COW by a sibling
* has not already copied this chunk.
*/
_ii_lock_chunk(ip, chunk_num);
rc = II_TST_SHD_BIT(ip, chunk_num);
if (rc < 0) {
DTRACE_PROBE(_ii_copy_on_write_end);
return (EIO);
}
if (rc != 0) {
DTRACE_PROBE(_ii_copy_on_write_end);
return (0);
}
}
if ((ip->bi_flags & DSW_COPYING) == 0) {
/* Not copying at all */
if ((ip->bi_flags & DSW_GOLDEN) == DSW_GOLDEN) {
/* No copy-on-write as it is independent */
DTRACE_PROBE(_ii_copy_on_write_end);
return (0);
}
/* Dependent, so depends on shadow bit */
if ((flag == CV_SHD2MST) &&
((ip->bi_flags & DSW_SHDOFFLINE) != 0)) {
/*
* Writing master but shadow is offline, so
* no need to copy on write or set shadow bit
*/
DTRACE_PROBE(_ii_copy_on_write_end);
return (0);
}
if (ip->bi_flags & DSW_BMPOFFLINE) {
DTRACE_PROBE(_ii_copy_on_write_end);
return (EIO);
}
rc = II_TST_SHD_BIT(ip, chunk_num);
if (rc < 0) {
DTRACE_PROBE(_ii_copy_on_write_end);
return (EIO);
}
if (rc == 0) {
/* Shadow bit clear, copy master to shadow */
rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks);
}
} else {
/* Copying one way or the other */
if (ip->bi_flags & DSW_BMPOFFLINE) {
DTRACE_PROBE(_ii_copy_on_write_end);
return (EIO);
}
rc = II_TST_COPY_BIT(ip, chunk_num);
if (rc < 0) {
DTRACE_PROBE(_ii_copy_on_write_end);
return (EIO);
}
if (rc) {
/* Copy bit set, do a copy */
if ((ip->bi_flags & DSW_COPYINGS) == 0) {
/* Copy master to shadow */
rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks);
} else {
/* Copy shadow to master */
rc = _ii_copy_chunks(ip, CV_SHD2MST, chunk_num,
nchunks);
}
}
}
return (rc);
}
#ifdef DEBUG
int ii_maxchunks = 0;
#endif
/*
* _ii_copyvolp()
* Copy volume process.
*
* Calling/Exit State:
* Passes 0 back to caller when the copy is complete or has been aborted,
* otherwise error code.
*
* Description:
* According to the flag, copy the master to the shadow volume or the
* shadow to the master volume. Upon return wakeup all processes waiting
* for this copy.
*
*/
static void
_ii_copyvolp(struct copy_args *ca)
{
chunkid_t chunk_num;
int rc = 0;
chunkid_t max_chunk;
nsc_size_t nc_max;
int nc_try, nc_got;
nsc_size_t mst_max, shd_max;
_ii_info_t *ip;
int flag;
nsc_size_t bitmap_size;
nsc_size_t shadow_set, copy_set;
int chunkcount = 0;
int rsrv = 1;
spcs_s_info_t kstatus;
ip = ca->ip;
flag = ca->flag;
kstatus = ca->kstatus;
if (ip->bi_disabled) {
rc = DSW_EABORTED;
goto skip;
}
max_chunk = ip->bi_size / DSW_SIZE;
if ((ip->bi_size % DSW_SIZE) != 0)
++max_chunk;
if ((ip->bi_flags&DSW_TREEMAP))
nc_max = 1;
else {
mst_max = shd_max = 0;
(void) nsc_maxfbas(MSTFD(ip), 0, &mst_max);
(void) nsc_maxfbas(SHDFD(ip), 0, &shd_max);
nc_max = (mst_max < shd_max) ? mst_max : shd_max;
nc_max /= DSW_SIZE;
ASSERT(nc_max > 0 && nc_max < 1000);
}
#ifdef DEBUG
if (ii_maxchunks > 0)
nc_max = ii_maxchunks;
#endif
for (chunk_num = nc_got = 0; /* CSTYLED */; /* CSTYLED */) {
if ((flag & CV_SHD2MST) && NSHADOWS(ip))
nc_try = 1;
else
nc_try = (int)nc_max;
chunk_num = II_NEXT_COPY_BIT(ip, chunk_num + nc_got,
max_chunk, nc_try, &nc_got);
if (chunk_num >= max_chunk) /* loop complete */
break;
if (ip->bi_flags & DSW_COPYINGX) {
/* request to abort copy */
_ii_unlock_chunks(ip, chunk_num, nc_got);
rc = DSW_EABORTED;
break;
}
sema_p(&_ii_concopy_sema);
rc = _ii_copy_on_write(ip, (flag & CV_SHD2MST), chunk_num,
nc_got);
sema_v(&_ii_concopy_sema);
if (ip->bi_flags & DSW_TREEMAP)
ii_tdelete(ip, chunk_num);
_ii_unlock_chunks(ip, chunk_num, nc_got);
if (!II_SUCCESS(rc)) {
if (ca->wait)
spcs_s_add(kstatus, rc);
rc = DSW_EIO;
break;
}
if (ip->bi_release ||
(++chunkcount % ip->bi_throttle_unit) == 0) {
_ii_rlse_devs(ip, (ca->rtype&(~BMP)));
rsrv = 0;
delay(ip->bi_throttle_delay);
ca->rtype = MSTR|SHDR|(ca->rtype&BMP);
if ((rc = _ii_rsrv_devs(ip, (ca->rtype&(~BMP)),
II_INTERNAL)) != 0) {
if (ca->wait)
spcs_s_add(kstatus, rc);
rc = DSW_EIO;
break;
}
rsrv = 1;
if (nc_max > 1) {
/*
* maxfbas could have changed during the
* release/reserve, so recalculate the size
* of transfer we can do.
*/
(void) nsc_maxfbas(MSTFD(ip), 0, &mst_max);
(void) nsc_maxfbas(SHDFD(ip), 0, &shd_max);
nc_max = (mst_max < shd_max) ?
mst_max : shd_max;
nc_max /= DSW_SIZE;
}
}
}
skip:
mutex_enter(&ip->bi_mutex);
if (ip->bi_flags & DSW_COPYINGX)
II_FLAG_CLR(DSW_COPYINGP|DSW_COPYINGX, ip);
else
II_FLAG_CLR(DSW_COPY_FLAGS, ip);
if ((ip->bi_flags & DSW_TREEMAP) && (flag & CV_SHD2MST) &&
(ip->bi_flags & DSW_VOVERFLOW)) {
int rs;
bitmap_size = ip->bi_size / DSW_SIZE;
if ((ip->bi_size % DSW_SIZE) != 0)
++bitmap_size;
bitmap_size += 7;
bitmap_size /= 8;
/* Count the number of copy bits set */
rs = II_CNT_BITS(ip, ip->bi_copyfba, &copy_set, bitmap_size);
if ((rs == 0) && (copy_set == 0)) {
/*
* If we counted successfully and completed the copy
* see if any writes have forced the set into the
* overflow
*/
rs = II_CNT_BITS(ip, ip->bi_shdfba, &shadow_set,
bitmap_size);
if ((rs == 0) && (shadow_set <
(nsc_size_t)ip->bi_shdchks)) {
II_FLAG_CLR(DSW_VOVERFLOW, ip);
--iigkstat.spilled_over.value.ul;
}
}
}
ca->rc = rc;
cv_broadcast(&ip->bi_copydonecv);
mutex_exit(&ip->bi_mutex);
if (!ca->wait) {
if (rsrv)
_ii_rlse_devs(ip, ca->rtype);
kmem_free(ca, sizeof (*ca));
}
}
/*
* _ii_copyvol()
* Copy a volume.
*
* Calling/Exit State:
* Returns 0 when the copy is complete or has been aborted,
* otherwise error code.
*
* Description:
* According to the flag, copy the master to the shadow volume or the
* shadow to the master volume. Upon return wakeup all processes waiting
* for this copy. Uses a separate process (_ii_copyvolp) to allow the
* caller to be interrupted.
*/
static int
_ii_copyvol(_ii_info_t *ip, int flag, int rtype, spcs_s_info_t kstatus,
int wait)
{
struct copy_args *ca;
int rc;
/*
* start copy in separate process.
*/
ca = (struct copy_args *)kmem_alloc(sizeof (*ca), KM_SLEEP);
ca->ip = ip;
ca->flag = flag;
ca->rtype = rtype;
ca->kstatus = kstatus;
ca->wait = wait;
ca->rc = 0;
if (rc = nsc_create_process((void (*)(void *))_ii_copyvolp,
(void *)ca, FALSE)) {
mutex_enter(&ip->bi_mutex);
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
cmn_err(CE_NOTE, "!Can't create II copy process");
kmem_free(ca, sizeof (*ca));
return (rc);
}
mutex_enter(&ip->bi_mutex);
if (wait == 0) {
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
return (0);
}
while (ip->bi_flags & DSW_COPYINGP) {
(void) cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex);
}
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
rc = ca->rc;
kmem_free(ca, sizeof (*ca));
return (rc);
}
/*
* _ii_stopcopy
* Stops any copy process on ip.
*
* Calling/Exit State:
* Returns 0 if the copy was stopped, otherwise error code.
*
* Description:
* Stop an in-progress copy by setting the DSW_COPYINGX flag, then
* wait for the copy to complete.
*/
static int
_ii_stopcopy(_ii_info_t *ip)
{
mutex_enter(&ip->bi_mutex);
DTRACE_PROBE1(_ii_stopcopy_flags,
uint_t, ip->bi_flags);
while (ip->bi_flags & DSW_COPYINGP) {
DTRACE_PROBE(_ii_stopcopy);
II_FLAG_SET(DSW_COPYINGX, ip);
if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) {
/* Awoken by a signal */
mutex_exit(&ip->bi_mutex);
DTRACE_PROBE(_ii_stopcopy);
return (EINTR);
}
}
mutex_exit(&ip->bi_mutex);
return (0);
}
/*
* _ii_error
* Given the error type that occurred, and the current state of the
* shadowing, set the appropriate error condition(s).
*
*/
void
_ii_error(_ii_info_t *ip, int error_type)
{
int copy_flags;
int golden;
int flags;
int recursive_call = (error_type & DSW_OVERFLOW) != 0;
int offline_bits = DSW_OFFLINE;
_ii_info_t *xip;
int rc;
error_type &= ~DSW_OVERFLOW;
mutex_enter(&ip->bi_mutex);
flags = (ip->bi_flags) & offline_bits;
if ((flags ^ error_type) == 0) {
/* nothing new offline */
mutex_exit(&ip->bi_mutex);
return;
}
if (error_type == DSW_BMPOFFLINE &&
(ip->bi_flags & DSW_BMPOFFLINE) == 0) {
/* first, let nskerd know */
rc = _ii_report_bmp(ip);
if (rc) {
if (ii_debug > 0) {
cmn_err(CE_WARN, "!Unable to mark bitmap bad in"
" config DB; rc = %d", rc);
}
ip->bi_flags |= DSW_CFGOFFLINE;
}
}
flags = ip->bi_flags;
golden = ((flags & DSW_GOLDEN) == DSW_GOLDEN);
copy_flags = flags & DSW_COPYING;
switch (error_type) {
case DSW_BMPOFFLINE:
/* prevent further use of bitmap */
flags |= DSW_BMPOFFLINE;
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: Bitmap offline");
switch (copy_flags) {
case DSW_COPYINGM:
/* Bitmap offline, copying master to shadow */
flags |= DSW_SHDOFFLINE;
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: Implied shadow offline");
break;
case DSW_COPYINGS:
/* Bitmap offline, copying shadow to master */
if (golden) {
/* Shadow is still usable */
if (ii_debug > 0)
cmn_err(CE_NOTE,
"!ii: Implied master offline");
flags |= DSW_MSTOFFLINE;
} else {
/*
* Snapshot restore from shadow to master
* is a dumb thing to do anyway. Lose both.
*/
flags |= DSW_SHDOFFLINE | DSW_MSTOFFLINE;
if (ii_debug > 0)
cmn_err(CE_NOTE,
"ii: Implied master and "
"shadow offline");
}
break;
case 0:
/* Bitmap offline, no copying in progress */
if (!golden) {
if (ii_debug > 0)
cmn_err(CE_NOTE,
"!ii: Implied shadow offline");
flags |= DSW_SHDOFFLINE;
}
break;
}
break;
case DSW_OVROFFLINE:
flags |= DSW_OVROFFLINE;
ASSERT(ip->bi_overflow);
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: Overflow offline");
/* FALLTHRU */
case DSW_SHDOFFLINE:
flags |= DSW_SHDOFFLINE;
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: Shadow offline");
if (copy_flags == DSW_COPYINGS) {
/* Shadow offline, copying shadow to master */
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: Implied master offline");
flags |= DSW_MSTOFFLINE;
}
break;
case DSW_MSTOFFLINE:
flags |= DSW_MSTOFFLINE;
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: Master offline");
switch (copy_flags) {
case DSW_COPYINGM:
/* Master offline, copying master to shadow */
flags |= DSW_SHDOFFLINE;
if (ii_debug > 0)
cmn_err(CE_NOTE, "!ii: Implied shadow offline");
break;
case DSW_COPYINGS:
/* Master offline, copying shadow to master */
if (!golden) {
flags |= DSW_SHDOFFLINE;
if (ii_debug > 0)
cmn_err(CE_NOTE,
"!ii: Implied shadow offline");
}
break;
case 0:
/* Master offline, no copying in progress */
if (!golden) {
flags |= DSW_SHDOFFLINE;
if (ii_debug > 0)
cmn_err(CE_NOTE,
"!ii: Implied shadow offline");
}
break;
}
break;
default:
break;
}
II_FLAG_SET(flags, ip);
mutex_exit(&ip->bi_mutex);
if (!recursive_call &&
NSHADOWS(ip) && (flags&DSW_MSTOFFLINE) == DSW_MSTOFFLINE) {
/* take master offline for all other sibling shadows */
for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
if (xip == ip)
continue;
if (_ii_rsrv_devs(xip, BMP, II_INTERNAL) != 0)
continue;
/* overload DSW_OVERFLOW */
_ii_error(xip, DSW_MSTOFFLINE|DSW_OVERFLOW);
_ii_rlse_devs(xip, BMP);
}
}
}
/*
* _ii_lock_chunk
* Locks access to the specified chunk
*
*/
static void
_ii_lock_chunk(_ii_info_t *ip, chunkid_t chunk)
{
if (chunk == II_NULLCHUNK) {
DTRACE_PROBE(_ii_lock_chunk_type);
rw_enter(&ip->bi_busyrw, RW_WRITER);
} else {
DTRACE_PROBE(_ii_lock_chunk_type);
if (ip->bi_busy == NULL) {
DTRACE_PROBE(_ii_lock_chunk_end);
return;
}
rw_enter(&ip->bi_busyrw, RW_READER);
mutex_enter(&ip->bi_mutex);
while (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS],
chunk % DSW_BITS))
cv_wait(&ip->bi_busycv, &ip->bi_mutex);
DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS);
mutex_exit(&ip->bi_mutex);
}
}
/*
* _ii_trylock_chunk
* Tries to lock access to the specified chunk
* Returns non-zero on success.
*
*/
static int
_ii_trylock_chunk(_ii_info_t *ip, chunkid_t chunk)
{
int rc;
ASSERT(chunk != II_NULLCHUNK);
if (rw_tryenter(&ip->bi_busyrw, RW_READER) == 0) {
DTRACE_PROBE(_ii_trylock_chunk);
return (0);
}
if (ip->bi_busy == NULL) {
DTRACE_PROBE(_ii_trylock_chunk_end);
return (0);
}
mutex_enter(&ip->bi_mutex);
if (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS)) {
rw_exit(&ip->bi_busyrw); /* RW_READER */
rc = 0;
} else {
DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS);
rc = 1;
}
mutex_exit(&ip->bi_mutex);
return (rc);
}
/*
* _ii_unlock_chunks
* Unlocks access to the specified chunks
*
*/
static void
_ii_unlock_chunks(_ii_info_t *ip, chunkid_t chunk, int n)
{
if (chunk == II_NULLCHUNK) {
DTRACE_PROBE(_ii_unlock_chunks);
rw_exit(&ip->bi_busyrw); /* RW_WRITER */
} else {
if (ip->bi_busy == NULL) {
DTRACE_PROBE(_ii_unlock_chunks_end);
return;
}
mutex_enter(&ip->bi_mutex);
DTRACE_PROBE(_ii_unlock_chunks);
for (; n-- > 0; chunk++) {
ASSERT(DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS],
chunk % DSW_BITS));
DSW_BIT_CLR(ip->bi_busy[chunk / DSW_BITS],
chunk % DSW_BITS);
rw_exit(&ip->bi_busyrw); /* RW_READER */
}
cv_broadcast(&ip->bi_busycv);
mutex_exit(&ip->bi_mutex);
}
}
/*
* Copyout the bit map.
*/
static int
_ii_ab_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
int user_bm_size)
{
nsc_off_t last_fba;
nsc_buf_t *tmp;
nsc_vec_t *nsc_vecp;
nsc_off_t fba_pos;
int buf_fba_len;
int buf_byte_len;
size_t co_len;
int rc;
DTRACE_PROBE2(_ii_ab_co_bmp_start, nsc_off_t, bm_offset,
nsc_size_t, user_bm_size);
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
/* First calculate the size of the shadow and copy bitmaps */
co_len = DSW_BM_FBA_LEN(ip->bi_size);
ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len);
/* Are we in the ranges of the various bitmaps/indexes? */
if (bm_offset < ip->bi_shdfba)
return (EIO);
else if (bm_offset < (last_fba = ip->bi_shdfba + co_len))
/*EMPTY*/;
else if (bm_offset < (last_fba = ip->bi_copyfba + co_len))
/*EMPTY*/;
else if ((ip->bi_flags & DSW_TREEMAP) &&
(bm_offset < (last_fba = last_fba + (co_len * 32))))
/*EMPTY*/;
else return (EIO);
/* Are we within the size of the segment being copied? */
if (FBA_LEN(user_bm_size) > last_fba - bm_offset)
return (EIO);
for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0;
fba_pos += DSW_CBLK_FBA) {
tmp = NULL;
buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
DSW_CBLK_FBA : last_fba - fba_pos;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
NSC_RDBUF, &tmp);
II_READ_END(ip, bitmap, rc, buf_fba_len);
if (!II_SUCCESS(rc)) {
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (EIO);
}
/* copyout each nsc_vec's worth of data */
buf_byte_len = FBA_SIZE(buf_fba_len);
for (nsc_vecp = tmp->sb_vec;
buf_byte_len > 0 && user_bm_size > 0;
nsc_vecp++) {
co_len = (user_bm_size > nsc_vecp->sv_len) ?
nsc_vecp->sv_len : user_bm_size;
if (copyout(nsc_vecp->sv_addr, user_bm, co_len)) {
(void) nsc_free_buf(tmp);
return (EFAULT);
}
user_bm += co_len;
user_bm_size -= co_len;
buf_byte_len -= co_len;
}
(void) nsc_free_buf(tmp);
}
return (0);
}
/*
* Copyin a bit map and or with differences bitmap.
*/
static int
_ii_ab_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
int user_bm_size)
{
nsc_off_t last_fba;
nsc_buf_t *tmp;
nsc_vec_t *nsc_vecp;
nsc_off_t fba_pos;
int buf_fba_len;
int buf_byte_len;
size_t ci_len;
int rc;
int n;
unsigned char *tmp_buf, *tmpp, *tmpq;
DTRACE_PROBE2(_ii_ab_ci_bmp_start, nsc_off_t, bm_offset,
nsc_size_t, user_bm_size);
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
tmp_buf = NULL;
last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size);
for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0;
fba_pos += DSW_CBLK_FBA) {
tmp = NULL;
buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
DSW_CBLK_FBA : last_fba - fba_pos;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
NSC_RDWRBUF, &tmp);
II_READ_END(ip, bitmap, rc, buf_fba_len);
if (!II_SUCCESS(rc)) {
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (EIO);
}
/* copyin each nsc_vec's worth of data */
buf_byte_len = FBA_SIZE(buf_fba_len);
for (nsc_vecp = tmp->sb_vec;
buf_byte_len > 0 && user_bm_size > 0;
nsc_vecp++) {
ci_len = (user_bm_size > nsc_vecp->sv_len) ?
nsc_vecp->sv_len : user_bm_size;
tmpp = tmp_buf = kmem_alloc(ci_len, KM_SLEEP);
tmpq = nsc_vecp->sv_addr;
if (copyin(user_bm, tmpp, ci_len)) {
(void) nsc_free_buf(tmp);
kmem_free(tmp_buf, ci_len);
return (EFAULT);
}
for (n = ci_len; n-- > 0; /* CSTYLED */)
*tmpq++ |= *tmpp++;
user_bm += ci_len;
user_bm_size -= ci_len;
buf_byte_len -= ci_len;
kmem_free(tmp_buf, ci_len);
}
II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, buf_fba_len, 0);
if (!II_SUCCESS(rc)) {
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (EIO);
}
(void) nsc_free_buf(tmp);
}
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
/*
* Completely zero the bit map.
*
* Returns 0 if no error
* Returns non-zero if there was an error
*/
static int
_ii_ab_zerobm(_ii_info_t *ip)
{
nsc_off_t fba_pos;
int rc;
nsc_size_t len;
nsc_size_t size;
nsc_buf_t *tmp;
size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
for (fba_pos = ip->bi_shdfba; fba_pos < size; fba_pos += DSW_CBLK_FBA) {
tmp = NULL;
len = fba_pos + DSW_CBLK_FBA < size ?
DSW_CBLK_FBA : size - fba_pos;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, len, NSC_RDWRBUF,
&tmp);
II_READ_END(ip, bitmap, rc, len);
if (!II_SUCCESS(rc)) {
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
rc = nsc_zero(tmp, fba_pos, len, 0);
if (II_SUCCESS(rc)) {
II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, len, 0);
}
(void) nsc_free_buf(tmp);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
}
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
/*
* Copy shadow bitmap to copy bitmap
*/
static int
_ii_ab_copybm(_ii_info_t *ip)
{
nsc_off_t copy_fba_pos, shd_fba_pos;
int rc;
nsc_size_t len;
nsc_off_t size;
nsc_buf_t *copy_tmp, *shd_tmp;
size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
copy_fba_pos = ip->bi_copyfba;
for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size;
copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) {
shd_tmp = NULL;
len = shd_fba_pos + DSW_CBLK_FBA < size ?
DSW_CBLK_FBA : size - shd_fba_pos;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len, NSC_RDBUF,
&shd_tmp);
II_READ_END(ip, bitmap, rc, len);
if (!II_SUCCESS(rc)) {
if (shd_tmp)
(void) nsc_free_buf(shd_tmp);
_ii_error(ip, DSW_BMPOFFLINE);
if (ii_debug > 1)
cmn_err(CE_NOTE, "!ii: copybm failed 1 rc %d",
rc);
return (rc);
}
copy_tmp = NULL;
rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len, NSC_WRBUF,
&copy_tmp);
if (!II_SUCCESS(rc)) {
(void) nsc_free_buf(shd_tmp);
if (copy_tmp)
(void) nsc_free_buf(copy_tmp);
_ii_error(ip, DSW_BMPOFFLINE);
if (ii_debug > 1)
cmn_err(CE_NOTE, "!ii: copybm failed 2 rc %d",
rc);
return (rc);
}
rc = nsc_copy(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos,
len);
if (II_SUCCESS(rc)) {
II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos,
len, 0);
}
(void) nsc_free_buf(shd_tmp);
(void) nsc_free_buf(copy_tmp);
if (!II_SUCCESS(rc)) {
if (ii_debug > 1)
cmn_err(CE_NOTE, "!ii: copybm failed 4 rc %d",
rc);
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
}
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
/*
* stolen from nsc_copy_h()
*/
static int
_ii_nsc_or(nsc_buf_t *h1, nsc_buf_t *h2, nsc_off_t pos1, nsc_off_t pos2,
nsc_size_t len)
{
unsigned char *a1, *a2;
unsigned char *b1, *b2;
nsc_vec_t *v1, *v2;
int i, sz, l1, l2;
if (pos1 < h1->sb_pos || pos1 + len > h1->sb_pos + h1->sb_len ||
pos2 < h2->sb_pos || pos2 + len > h2->sb_pos + h2->sb_len)
return (EINVAL);
if (!len)
return (0);
/* find starting point in "from" vector */
v1 = h1->sb_vec;
pos1 -= h1->sb_pos;
for (; pos1 >= FBA_NUM(v1->sv_len); v1++)
pos1 -= FBA_NUM(v1->sv_len);
a1 = v1->sv_addr + FBA_SIZE(pos1);
l1 = v1->sv_len - FBA_SIZE(pos1);
/* find starting point in "to" vector */
v2 = h2->sb_vec;
pos2 -= h2->sb_pos;
for (; pos2 >= FBA_NUM(v2->sv_len); v2++)
pos2 -= FBA_NUM(v2->sv_len);
a2 = v2->sv_addr + FBA_SIZE(pos2);
l2 = v2->sv_len - FBA_SIZE(pos2);
/* copy required data */
len = FBA_SIZE(len);
while (len) {
sz = min(l1, l2);
sz = (int)min((nsc_size_t)sz, len);
b1 = a1;
b2 = a2;
for (i = sz; i-- > 0; /* CSTYLED */)
*b2++ |= *b1++;
l1 -= sz;
l2 -= sz;
a1 += sz;
a2 += sz;
len -= sz;
if (!l1) {
a1 = (++v1)->sv_addr;
l1 = v1->sv_len;
}
if (!l2) {
a2 = (++v2)->sv_addr;
l2 = v2->sv_len;
}
}
return (0);
}
/*
* Or the shadow bitmap in to the copy bitmap, clear the
* shadow bitmap.
*/
static int
_ii_ab_orbm(_ii_info_t *ip)
{
nsc_off_t copy_fba_pos, shd_fba_pos;
int rc;
nsc_size_t len;
size_t size;
nsc_buf_t *copy_tmp, *shd_tmp;
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
copy_fba_pos = ip->bi_copyfba;
for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size;
copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) {
shd_tmp = NULL;
len = shd_fba_pos + DSW_CBLK_FBA < size ?
DSW_CBLK_FBA : size - shd_fba_pos;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len,
NSC_RDBUF|NSC_WRBUF, &shd_tmp);
II_READ_END(ip, bitmap, rc, len);
if (!II_SUCCESS(rc)) {
if (shd_tmp)
(void) nsc_free_buf(shd_tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
copy_tmp = NULL;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len,
NSC_RDBUF|NSC_WRBUF, &copy_tmp);
II_READ_END(ip, bitmap, rc, len);
if (!II_SUCCESS(rc)) {
(void) nsc_free_buf(shd_tmp);
if (copy_tmp)
(void) nsc_free_buf(copy_tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
rc = _ii_nsc_or(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos,
len);
if (II_SUCCESS(rc)) {
II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos,
len, 0);
}
if (II_SUCCESS(rc))
rc = nsc_zero(shd_tmp, shd_fba_pos, len, 0);
if (II_SUCCESS(rc)) {
II_NSC_WRITE(ip, bitmap, rc, shd_tmp, shd_fba_pos, len,
0);
}
(void) nsc_free_buf(shd_tmp);
(void) nsc_free_buf(copy_tmp);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
}
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
/*
* _ii_ab_tst_shd_bit
* Determine if a chunk has been copied to the shadow device
* Relies on the alloc_buf/free_buf semantics for locking.
*
* Calling/Exit State:
* Returns 1 if the modified bit has been set for the shadow device,
* Returns 0 if the modified bit has not been set for the shadow device,
* Returns -1 if there was an error
*/
static int
_ii_ab_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk)
{
int rc;
nsc_off_t fba;
nsc_buf_t *tmp = NULL;
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS);
chunk %= FBA_SIZE(1) * DSW_BITS;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
II_READ_END(ip, bitmap, rc, 1);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_BMPOFFLINE);
if (tmp)
(void) nsc_free_buf(tmp);
return (-1);
}
rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
chunk%DSW_BITS);
(void) nsc_free_buf(tmp);
return (rc);
}
/*
* _ii_ab_set_shd_bit
* Records that a chunk has been copied to the shadow device
*
* Returns non-zero if an error is encountered
* Returns 0 if no error
*/
static int
_ii_ab_set_shd_bit(_ii_info_t *ip, chunkid_t chunk)
{
int rc;
nsc_off_t fba;
nsc_buf_t *tmp = NULL;
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS);
chunk %= FBA_SIZE(1) * DSW_BITS;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
II_READ_END(ip, bitmap, rc, 1);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_BMPOFFLINE);
if (tmp)
(void) nsc_free_buf(tmp);
return (rc);
}
if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
chunk%DSW_BITS) == 0) {
DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
chunk%DSW_BITS);
II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
if ((ip->bi_state & DSW_CNTSHDBITS) == 0)
ip->bi_shdbits++;
}
(void) nsc_free_buf(tmp);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
return (0);
}
/*
* _ii_ab_tst_copy_bit
* Determine if a chunk needs to be copied during updates.
*
* Calling/Exit State:
* Returns 1 if the copy bit for the chunk is set
* Returns 0 if the copy bit for the chunk is not set
* Returns -1 if an error is encountered
*/
static int
_ii_ab_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk)
{
int rc;
nsc_off_t fba;
nsc_buf_t *tmp = NULL;
if (ip->bi_flags & DSW_BMPOFFLINE)
return (-1);
fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
chunk %= FBA_SIZE(1) * DSW_BITS;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
II_READ_END(ip, bitmap, rc, 1);
if (!II_SUCCESS(rc)) {
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (-1);
}
rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
chunk%DSW_BITS);
(void) nsc_free_buf(tmp);
return (rc);
}
/*
* _ii_ab_set_copy_bit
* Records that a chunk has been copied to the shadow device
*
* Returns non-zero if an error is encountered
* Returns 0 if no error
*/
static int
_ii_ab_set_copy_bit(_ii_info_t *ip, chunkid_t chunk)
{
int rc;
nsc_off_t fba;
nsc_buf_t *tmp = NULL;
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
chunk %= FBA_SIZE(1) * DSW_BITS;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
II_READ_END(ip, bitmap, rc, 1);
if (!II_SUCCESS(rc)) {
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
chunk%DSW_BITS) == 0) {
DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
chunk%DSW_BITS);
if ((ip->bi_state & DSW_CNTCPYBITS) == 0)
ip->bi_copybits++;
II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
}
(void) nsc_free_buf(tmp);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
return (0);
}
/*
* _ii_ab_clr_copy_bits
* Records that a chunk has been cleared on the shadow device, this
* function assumes that the bits to clear are all in the same fba,
* as is the case when they were generated by _ii_ab_next_copy_bit().
*
* Returns non-zero if an error is encountered
* Returns 0 if no error
*/
static int
_ii_ab_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks)
{
int rc;
nsc_off_t fba;
nsc_buf_t *tmp = NULL;
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
chunk %= FBA_SIZE(1) * DSW_BITS;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
II_READ_END(ip, bitmap, rc, 1);
if (!II_SUCCESS(rc)) {
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
for (; nchunks-- > 0; chunk++) {
DSW_BIT_CLR(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
chunk%DSW_BITS);
if (ip->bi_copybits > 0)
ip->bi_copybits--;
}
II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
(void) nsc_free_buf(tmp);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
return (0);
}
/*
* _ii_ab_fill_copy_bmp
* Fills the copy bitmap with 1's.
*
* Returns non-zero if an error is encountered
* Returns 0 if no error
*/
static int
_ii_ab_fill_copy_bmp(_ii_info_t *ip)
{
int rc;
nsc_off_t fba;
nsc_buf_t *tmp;
unsigned char *p;
int i, j;
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
fba = ip->bi_copyfba;
for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) {
tmp = NULL;
rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_WRBUF, &tmp);
if (!II_SUCCESS(rc)) {
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
p = (unsigned char *)tmp->sb_vec->sv_addr;
for (j = FBA_SIZE(1); j-- > 0; p++)
*p = (unsigned char)0xff;
II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_BMPOFFLINE);
(void) nsc_free_buf(tmp);
return (rc);
}
(void) nsc_free_buf(tmp);
}
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
/*
* _ii_ab_load_bmp
* Load bitmap from persistent storage.
*/
static int
_ii_ab_load_bmp(_ii_info_t *ip, int flag)
/* ARGSUSED */
{
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
/*
* _ii_ab_next_copy_bit
* Find next set copy bit.
*
* Returns the next bits set in the copy bitmap, with the corresponding chunks
* locked. Used to avoid having to reread the same bit map block as each bit
* is tested.
*/
static chunkid_t
_ii_ab_next_copy_bit(_ii_info_t *ip, chunkid_t startchunk, chunkid_t maxchunk,
int wanted, int *got)
{
chunkid_t rc;
nsc_off_t fba;
chunkid_t chunk;
int bits_per_fba = FBA_SIZE(1) * DSW_BITS;
int high;
chunkid_t nextchunk;
nsc_buf_t *tmp = NULL;
*got = 0;
again:
if (ip->bi_flags & DSW_BMPOFFLINE)
return (maxchunk + 1);
while (startchunk < maxchunk) {
tmp = NULL;
fba = ip->bi_copyfba + startchunk / bits_per_fba;
chunk = startchunk % bits_per_fba;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
II_READ_END(ip, bitmap, rc, 1);
if (!II_SUCCESS(rc)) {
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (maxchunk + 1);
}
high = startchunk + bits_per_fba - startchunk%bits_per_fba;
if (high > maxchunk)
high = maxchunk;
for (; startchunk < high; chunk++, startchunk++) {
if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
chunk%DSW_BITS)) {
/*
* trylock won't sleep so can use while
* holding the buf.
*/
if (!_ii_trylock_chunk(ip, startchunk)) {
(void) nsc_free_buf(tmp);
_ii_lock_chunk(ip, startchunk);
if (_ii_ab_tst_copy_bit(ip, startchunk)
!= 1) {
/*
* another process copied this
* chunk while we were acquiring
* the chunk lock.
*/
_ii_unlock_chunk(ip,
startchunk);
DTRACE_PROBE(
_ii_ab_next_copy_bit_again);
goto again;
}
*got = 1;
DTRACE_PROBE(_ii_ab_next_copy_bit_end);
return (startchunk);
}
*got = 1;
nextchunk = startchunk + 1;
chunk++;
for (; --wanted > 0 && nextchunk < high;
nextchunk++, chunk++) {
if (!DSW_BIT_ISSET(tmp->sb_vec->sv_addr
[chunk/DSW_BITS], chunk%DSW_BITS)) {
break; /* end of bit run */
}
if (_ii_trylock_chunk(ip, nextchunk))
(*got)++;
else
break;
}
(void) nsc_free_buf(tmp);
DTRACE_PROBE(_ii_ab_next_copy_bit);
return (startchunk);
}
}
(void) nsc_free_buf(tmp);
}
return (maxchunk + 1);
}
/*
* _ii_ab_save_bmp
* Save bitmap to persistent storage.
*/
static int
_ii_ab_save_bmp(_ii_info_t *ip, int flag)
/* ARGSUSED */
{
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
return (0);
}
/*
* _ii_ab_change_bmp
* copy change bitmap to memory
*/
static int
_ii_ab_change_bmp(_ii_info_t *ip, unsigned char *ptr)
/* ARGSUSED */
{
int bm_size;
int i, j, fba;
int rc;
unsigned char *p;
nsc_buf_t *tmp = NULL;
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba,
ptr, bm_size);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
fba = ip->bi_copyfba;
for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) {
tmp = NULL;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
II_READ_END(ip, bitmap, rc, 1);
if (!II_SUCCESS(rc)) {
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (rc);
}
p = (unsigned char *)tmp->sb_vec->sv_addr;
for (j = FBA_SIZE(1); j-- > 0; p++)
*ptr |= *p;
(void) nsc_free_buf(tmp);
}
return (0);
}
/*
* Count bits set in the bit map.
*/
static int
_ii_ab_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter,
int bm_size)
{
nsc_size_t last_fba;
nsc_buf_t *tmp;
nsc_vec_t *sd_vecp;
nsc_off_t fba_pos;
int buf_fba_len;
int buf_byte_len;
int co_len;
int i;
unsigned int j, k;
unsigned char *cp;
int rc;
*counter = 0;
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size);
for (fba_pos = bm_offset; fba_pos < last_fba && bm_size > 0;
fba_pos += DSW_CBLK_FBA) {
tmp = NULL;
buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
DSW_CBLK_FBA : last_fba - fba_pos;
II_READ_START(ip, bitmap);
rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
NSC_RDBUF, &tmp);
II_READ_END(ip, bitmap, rc, 1);
if (!II_SUCCESS(rc)) {
if (tmp)
(void) nsc_free_buf(tmp);
_ii_error(ip, DSW_BMPOFFLINE);
return (EIO);
}
/* count each sd_vec's worth of data */
buf_byte_len = FBA_SIZE(buf_fba_len);
for (sd_vecp = tmp->sb_vec;
buf_byte_len > 0 && bm_size > 0;
sd_vecp++) {
co_len = (bm_size > sd_vecp->sv_len) ?
sd_vecp->sv_len : bm_size;
cp = sd_vecp->sv_addr;
for (i = k = 0; i < co_len; i++)
for (j = (unsigned)*cp++; j; j &= j - 1)
k++;
*counter += k;
bm_size -= co_len;
buf_byte_len -= co_len;
}
(void) nsc_free_buf(tmp);
}
return (0);
}
/*
* OR the bitmaps as part of a join operation
*/
static int
_ii_ab_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip)
{
int rc;
nsc_size_t len;
nsc_size_t size;
nsc_buf_t *dest_tmp, *src_tmp;
nsc_off_t src_fba_pos;
if ((src_ip->bi_flags & DSW_BMPOFFLINE) ||
(dest_ip->bi_flags & DSW_BMPOFFLINE))
return (EIO);
size = DSW_BM_FBA_LEN(src_ip->bi_size) + src_ip->bi_shdfba;
for (src_fba_pos = src_ip->bi_shdfba; src_fba_pos < size;
src_fba_pos += DSW_CBLK_FBA) {
src_tmp = NULL;
len = src_fba_pos + DSW_CBLK_FBA < size ?
DSW_CBLK_FBA : size - src_fba_pos;
II_READ_START(src_ip, bitmap);
rc = nsc_alloc_buf(src_ip->bi_bmpfd, src_fba_pos, len,
NSC_RDWRBUF, &src_tmp);
II_READ_END(src_ip, bitmap, rc, len);
if (!II_SUCCESS(rc)) {
if (src_tmp)
(void) nsc_free_buf(src_tmp);
_ii_error(src_ip, DSW_BMPOFFLINE);
return (rc);
}
dest_tmp = NULL;
II_READ_START(dest_ip, bitmap);
rc = nsc_alloc_buf(dest_ip->bi_bmpfd, src_fba_pos, len,
NSC_RDWRBUF, &dest_tmp);
II_READ_END(dest_ip, bitmap, rc, len);
if (!II_SUCCESS(rc)) {
(void) nsc_free_buf(src_tmp);
if (dest_tmp)
(void) nsc_free_buf(dest_tmp);
_ii_error(dest_ip, DSW_BMPOFFLINE);
return (rc);
}
rc = _ii_nsc_or(src_tmp, dest_tmp, src_fba_pos, src_fba_pos,
len);
if (II_SUCCESS(rc)) {
II_NSC_WRITE(dest_ip, bitmap, rc, dest_tmp,
src_fba_pos, len, 0);
}
(void) nsc_free_buf(src_tmp);
(void) nsc_free_buf(dest_tmp);
if (!II_SUCCESS(rc)) {
_ii_error(dest_ip, DSW_BMPOFFLINE);
return (rc);
}
}
dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
static _ii_bmp_ops_t alloc_buf_bmp = {
_ii_ab_co_bmp,
_ii_ab_ci_bmp,
_ii_ab_zerobm,
_ii_ab_copybm,
_ii_ab_orbm,
_ii_ab_tst_shd_bit,
_ii_ab_set_shd_bit,
_ii_ab_tst_copy_bit,
_ii_ab_set_copy_bit,
_ii_ab_clr_copy_bits,
_ii_ab_next_copy_bit,
_ii_ab_fill_copy_bmp,
_ii_ab_load_bmp,
_ii_ab_save_bmp,
_ii_ab_change_bmp,
_ii_ab_cnt_bits,
_ii_ab_join_bmp
};
/*
* Copyout the bit map.
*/
static int
_ii_km_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
int user_bm_size)
{
int start_offset;
int bm_size;
size_t co_len;
nsc_off_t last_fba;
/* First calculate the size of the shadow and copy bitmaps */
co_len = DSW_BM_FBA_LEN(ip->bi_size);
ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len);
/* Are we in the ranges of the various bitmaps/indexes? */
if (bm_offset < ip->bi_shdfba)
return (EIO);
else if (bm_offset < (last_fba = ip->bi_shdfba + co_len))
/*EMPTY*/;
else if (bm_offset < (last_fba = ip->bi_copyfba + co_len))
/*EMPTY*/;
else if ((ip->bi_flags & DSW_TREEMAP) &&
(bm_offset < (last_fba = last_fba + (co_len * 32))))
/*EMPTY*/;
else return (EIO);
if (FBA_LEN(user_bm_size) > last_fba - bm_offset)
return (EIO);
start_offset = FBA_SIZE(bm_offset);
bm_size = FBA_SIZE(last_fba);
co_len = (user_bm_size > bm_size) ? bm_size : user_bm_size;
if (copyout(ip->bi_bitmap + start_offset, user_bm, co_len))
return (EFAULT);
return (0);
}
/*
* Copyin a bit map and or with differences bitmap.
*/
static int
_ii_km_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
int user_bm_size)
{
unsigned char *tmp_buf;
unsigned char *dest;
unsigned char *p;
size_t tmp_size;
int n;
int start_offset;
int bm_size;
size_t ci_len;
int rc = 0;
start_offset = FBA_SIZE(bm_offset);
bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
tmp_buf = NULL;
tmp_size = FBA_SIZE(1);
tmp_buf = kmem_alloc(tmp_size, KM_SLEEP);
start_offset = FBA_SIZE(bm_offset);
dest = ip->bi_bitmap + start_offset;
bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
ci_len = (user_bm_size > bm_size) ? bm_size : user_bm_size;
while (ci_len > 0) {
n = (tmp_size > ci_len) ? ci_len : tmp_size;
if (copyin(user_bm, tmp_buf, n)) {
rc = EFAULT;
break;
}
user_bm += n;
for (p = tmp_buf; n--> 0; ci_len--)
*dest++ |= *p++;
}
if (tmp_buf)
kmem_free(tmp_buf, tmp_size);
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (rc);
}
/*
* Completely zero the bit map.
*/
static int
_ii_km_zerobm(_ii_info_t *ip)
{
int start_offset = FBA_SIZE(ip->bi_shdfba);
int len;
len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
mutex_enter(&ip->bi_bmpmutex);
bzero(ip->bi_bitmap+start_offset, len);
mutex_exit(&ip->bi_bmpmutex);
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
/*
* Copy shadow bitmap to copy bitmap
*/
static int
_ii_km_copybm(_ii_info_t *ip)
{
int copy_offset, shd_offset;
int len;
len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
shd_offset = FBA_SIZE(ip->bi_shdfba);
copy_offset = FBA_SIZE(ip->bi_copyfba);
mutex_enter(&ip->bi_bmpmutex);
bcopy(ip->bi_bitmap+shd_offset, ip->bi_bitmap+copy_offset, len);
mutex_exit(&ip->bi_bmpmutex);
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
/*
* Or the shadow bitmap in to the copy bitmap, clear the
* shadow bitmap.
*/
static int
_ii_km_orbm(_ii_info_t *ip)
{
unsigned char *copy, *shd;
int copy_offset, shd_offset;
int len;
len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
shd_offset = FBA_SIZE(ip->bi_shdfba);
copy_offset = FBA_SIZE(ip->bi_copyfba);
shd = ip->bi_bitmap + shd_offset;
copy = ip->bi_bitmap + copy_offset;
mutex_enter(&ip->bi_bmpmutex);
while (len-- > 0)
*copy++ |= *shd++;
mutex_exit(&ip->bi_bmpmutex);
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
/*
* _ii_km_tst_shd_bit
* Determine if a chunk has been copied to the shadow device
*
* Calling/Exit State:
* Returns 1 if the modified bit has been set for the shadow device,
* otherwise returns 0.
*/
static int
_ii_km_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk)
{
unsigned char *bmp;
int bmp_offset;
int rc;
bmp_offset = FBA_SIZE(ip->bi_shdfba);
bmp = ip->bi_bitmap + bmp_offset;
mutex_enter(&ip->bi_bmpmutex);
rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
mutex_exit(&ip->bi_bmpmutex);
return (rc);
}
/*
* _ii_km_set_shd_bit
* Records that a chunk has been copied to the shadow device
*/
static int
_ii_km_set_shd_bit(_ii_info_t *ip, chunkid_t chunk)
{
unsigned char *bmp;
int bmp_offset;
bmp_offset = FBA_SIZE(ip->bi_shdfba);
bmp = ip->bi_bitmap + bmp_offset;
mutex_enter(&ip->bi_bmpmutex);
if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) {
DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
if ((ip->bi_state & DSW_CNTSHDBITS) == 0)
ip->bi_shdbits++;
}
mutex_exit(&ip->bi_bmpmutex);
return (0);
}
/*
* _ii_km_tst_copy_bit
* Determine if a chunk needs to be copied during updates.
*
* Calling/Exit State:
* Returns 1 if the copy bit for the chunk is set,
* otherwise returns 0
*/
static int
_ii_km_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk)
{
unsigned char *bmp;
int bmp_offset;
int rc;
bmp_offset = FBA_SIZE(ip->bi_copyfba);
bmp = ip->bi_bitmap + bmp_offset;
mutex_enter(&ip->bi_bmpmutex);
rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
mutex_exit(&ip->bi_bmpmutex);
return (rc);
}
/*
* _ii_km_set_copy_bit
* Records that a chunk has been copied to the shadow device
*/
static int
_ii_km_set_copy_bit(_ii_info_t *ip, chunkid_t chunk)
{
unsigned char *bmp;
int bmp_offset;
bmp_offset = FBA_SIZE(ip->bi_copyfba);
bmp = ip->bi_bitmap + bmp_offset;
mutex_enter(&ip->bi_bmpmutex);
if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) {
DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
if ((ip->bi_state & DSW_CNTCPYBITS) == 0)
ip->bi_copybits++;
}
mutex_exit(&ip->bi_bmpmutex);
return (0);
}
/*
* _ii_km_clr_copy_bits
* Records that a chunk has been cleared on the shadow device
*/
static int
_ii_km_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks)
{
unsigned char *bmp;
int bmp_offset;
bmp_offset = FBA_SIZE(ip->bi_copyfba);
bmp = ip->bi_bitmap + bmp_offset;
mutex_enter(&ip->bi_bmpmutex);
for (; nchunks-- > 0; chunk++) {
DSW_BIT_CLR(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
if (ip->bi_copybits > 0)
ip->bi_copybits--;
}
mutex_exit(&ip->bi_bmpmutex);
return (0);
}
/*
* _ii_km_fill_copy_bmp
* Fills the copy bitmap with 1's.
*/
static int
_ii_km_fill_copy_bmp(_ii_info_t *ip)
{
int len;
unsigned char *bmp;
int bmp_offset;
bmp_offset = FBA_SIZE(ip->bi_copyfba);
bmp = ip->bi_bitmap + bmp_offset;
len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
mutex_enter(&ip->bi_bmpmutex);
while (len-- > 0)
*bmp++ = (unsigned char)0xff;
mutex_exit(&ip->bi_bmpmutex);
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
/*
* _ii_km_load_bmp
* Load bitmap from persistent storage.
*/
static int
_ii_km_load_bmp(_ii_info_t *ip, int flag)
{
nsc_off_t bmp_offset;
nsc_size_t bitmap_size;
int rc;
if (ip->bi_flags & DSW_BMPOFFLINE)
return (EIO);
if (ip->bi_bitmap == NULL) {
bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) +
ip->bi_shdfba);
ip->bi_bitmap = nsc_kmem_zalloc(bitmap_size, KM_SLEEP,
_ii_local_mem);
}
if (flag)
return (0); /* just create an empty bitmap */
bmp_offset = FBA_SIZE(ip->bi_shdfba);
rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba,
ip->bi_bitmap + bmp_offset,
2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba));
if (!II_SUCCESS(rc))
_ii_error(ip, DSW_BMPOFFLINE);
ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (rc);
}
/*
* _ii_km_save_bmp
* Save bitmap to persistent storage.
*/
static int
_ii_km_save_bmp(_ii_info_t *ip, int flag)
{
int bmp_offset;
int bitmap_size;
int rc;
bmp_offset = FBA_SIZE(ip->bi_shdfba);
if (ip->bi_flags & DSW_BMPOFFLINE)
rc = EIO;
else {
rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_WRBUF,
ip->bi_shdfba, ip->bi_bitmap + bmp_offset,
2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba));
if (!II_SUCCESS(rc))
_ii_error(ip, DSW_BMPOFFLINE);
}
if (flag && ip->bi_bitmap) { /* dispose of bitmap memory */
bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) +
ip->bi_shdfba);
nsc_kmem_free(ip->bi_bitmap, bitmap_size);
ip->bi_bitmap = NULL;
}
return (rc);
}
/*
* _ii_km_next_copy_bit
* Find next set copy bit.
*
* Returns the next bits set in the copy bitmap, with the corresponding chunks
* locked. Used to cut down on the number of times the bmpmutex is acquired.
*/
static chunkid_t
_ii_km_next_copy_bit(_ii_info_t *ip, chunkid_t chunk, chunkid_t maxchunk,
int want, int *got)
{
unsigned char *bmp;
int bmp_offset;
int nextchunk;
*got = 0;
bmp_offset = FBA_SIZE(ip->bi_copyfba);
bmp = ip->bi_bitmap + bmp_offset;
mutex_enter(&ip->bi_bmpmutex);
for (; chunk < maxchunk; chunk++) {
if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS)) {
/*
* trylock won't sleep so can use while
* holding bi_bmpmutex.
*/
if (!_ii_trylock_chunk(ip, chunk)) {
mutex_exit(&ip->bi_bmpmutex);
_ii_lock_chunk(ip, chunk);
*got = 1;
DTRACE_PROBE(_ii_km_next_copy_bit);
return (chunk);
}
*got = 1;
for (nextchunk = chunk + 1;
*got < want && nextchunk < maxchunk; nextchunk++) {
if (!DSW_BIT_ISSET(bmp[nextchunk/DSW_BITS],
nextchunk%DSW_BITS))
break;
if (_ii_trylock_chunk(ip, nextchunk))
(*got)++;
else
break;
}
mutex_exit(&ip->bi_bmpmutex);
DTRACE_PROBE(_ii_km_next_copy_bit);
return (chunk);
}
}
mutex_exit(&ip->bi_bmpmutex);
return (maxchunk + 1);
}
/*
* _ii_km_change_bmp
* copy change bitmap to memory
*/
static int
_ii_km_change_bmp(_ii_info_t *ip, unsigned char *ptr)
/* ARGSUSED */
{
int start_offset;
int bm_size;
unsigned char *q;
bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
start_offset = FBA_SIZE(ip->bi_shdfba);
bcopy(ip->bi_bitmap + start_offset, ptr, bm_size);
start_offset = FBA_SIZE(ip->bi_copyfba);
q = ip->bi_bitmap + start_offset;
while (bm_size-- > 0)
*ptr |= *q;
return (0);
}
/*
* Count bits set in the bit map.
*/
static int
_ii_km_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter,
int bm_size)
{
int start_offset;
int i;
nsc_size_t j, k;
unsigned char *cp;
start_offset = FBA_SIZE(bm_offset);
cp = ip->bi_bitmap + start_offset;
for (i = k = 0; i < bm_size; i++)
for (j = (unsigned)*cp++; j; j &= j - 1)
k++;
*counter = k;
return (0);
}
/*
* Or the shadow bitmap in to the copy bitmap, clear the
* shadow bitmap.
*/
static int
_ii_km_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip)
{
uchar_t *dest, *src;
nsc_size_t bm_size;
dest = dest_ip->bi_bitmap + FBA_SIZE(dest_ip->bi_shdfba);
src = src_ip->bi_bitmap + FBA_SIZE(src_ip->bi_shdfba);
bm_size = FBA_SIZE(DSW_BM_FBA_LEN(dest_ip->bi_size));
while (bm_size-- > 0)
*dest++ |= *src++;
dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
return (0);
}
static _ii_bmp_ops_t kmem_buf_bmp = {
_ii_km_co_bmp,
_ii_km_ci_bmp,
_ii_km_zerobm,
_ii_km_copybm,
_ii_km_orbm,
_ii_km_tst_shd_bit,
_ii_km_set_shd_bit,
_ii_km_tst_copy_bit,
_ii_km_set_copy_bit,
_ii_km_clr_copy_bits,
_ii_km_next_copy_bit,
_ii_km_fill_copy_bmp,
_ii_km_load_bmp,
_ii_km_save_bmp,
_ii_km_change_bmp,
_ii_km_cnt_bits,
_ii_km_join_bmp
};
static int
ii_read_volume(_ii_info_t *ip, int mst_src, nsc_buf_t *srcbuf,
nsc_buf_t *dstbuf, chunkid_t chunk_num, nsc_off_t fba, nsc_size_t len)
{
int rc;
nsc_buf_t *tmp;
nsc_off_t mapped_fba;
chunkid_t mapped_chunk;
int overflow;
if (mst_src || (ip->bi_flags&DSW_TREEMAP) == 0) {
/* simple read with optional copy */
if (mst_src) {
II_NSC_READ(ip, master, rc, srcbuf, fba, len, 0);
} else {
II_NSC_READ(ip, shadow, rc, srcbuf, fba, len, 0);
}
if (dstbuf && II_SUCCESS(rc)) {
rc = nsc_copy(srcbuf, dstbuf, fba, fba, len);
}
return (rc);
}
/* read from mapped shadow into final buffer */
mapped_chunk = ii_tsearch(ip, chunk_num);
if (mapped_chunk == II_NULLNODE)
return (EIO);
overflow = II_ISOVERFLOW(mapped_chunk);
if (overflow)
mapped_chunk = II_2OVERFLOW(mapped_chunk);
/* convert chunk number from tsearch into final fba */
mapped_fba = DSW_CHK2FBA(mapped_chunk) + (fba % DSW_SIZE);
tmp = NULL;
if (overflow) {
(void) nsc_reserve(OVRFD(ip), NSC_MULTI);
II_READ_START(ip, overflow);
rc = nsc_alloc_buf(OVRFD(ip), mapped_fba, len, NSC_RDBUF, &tmp);
II_READ_END(ip, overflow, rc, len);
} else {
II_READ_START(ip, shadow);
rc = nsc_alloc_buf(SHDFD(ip), mapped_fba, len, NSC_RDBUF, &tmp);
II_READ_END(ip, shadow, rc, len);
}
if (II_SUCCESS(rc)) {
if (dstbuf == NULL)
dstbuf = srcbuf;
rc = nsc_copy(tmp, dstbuf, mapped_fba, fba, len);
(void) nsc_free_buf(tmp);
}
if (overflow)
nsc_release(OVRFD(ip));
return (rc);
}
/*
* _ii_fill_buf
* Read data from the required device
*
* Calling/Exit State:
* Returns 0 if the data was read successfully, otherwise
* error code.
*
* Description:
* Reads the data from fba_pos for length fba_len from the
* required device. This data may be a mix of data from the master
* device and the shadow device, depending on the state of the
* bitmaps.
*/
static int
_ii_fill_buf(ii_fd_t *bfd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
nsc_buf_t **handle, nsc_buf_t **handle2)
{
_ii_info_t *ip = bfd->ii_info;
_ii_info_t *xip;
int second_shd = 0;
nsc_off_t temp_fba;
nsc_size_t temp_len;
nsc_size_t bmp_len;
chunkid_t chunk_num;
int rc;
int fill_from_pair;
int rtype = SHDR|BMP;
nsc_buf_t *second_buf = NULL;
if (flag&NSC_RDAHEAD)
return (NSC_DONE);
chunk_num = fba_pos / DSW_SIZE;
temp_fba = fba_pos;
temp_len = fba_len;
/*
* If the master is being updated from a shadow we need to fill from
* the correct shadow volume.
*/
if (NSHADOWS(ip) && bfd->ii_shd == 0) {
for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
if (xip == ip)
continue;
if (xip->bi_flags &DSW_COPYINGS) {
second_shd = 1;
ip = xip;
if ((rc = _ii_rsrv_devs(ip, rtype,
II_INTERNAL)) != 0)
return (EIO);
rc = nsc_alloc_buf(SHDFD(ip), fba_pos, fba_len,
(flag&NSC_RDAHEAD)|NSC_MIXED, &second_buf);
if (!II_SUCCESS(rc)) {
rc = EIO;
goto out;
}
handle2 = &second_buf;
break;
}
}
}
while (temp_len > 0) {
if ((temp_fba + temp_len) > DSW_CHK2FBA(chunk_num + 1)) {
bmp_len = DSW_CHK2FBA(chunk_num + 1) - temp_fba;
temp_len -= bmp_len;
} else {
bmp_len = temp_len;
temp_len = 0;
}
fill_from_pair = 0;
if ((ip->bi_flags & DSW_COPYINGM) == DSW_COPYINGM) {
rc = II_TST_COPY_BIT(ip, chunk_num);
/* Treat a failed bitmap volume as a clear bit */
if (rc > 0) {
/* Copy bit set */
if (bfd->ii_shd) {
if (*handle2)
fill_from_pair = 1;
else {
rc = EIO;
goto out;
}
}
}
}
if ((ip->bi_flags & DSW_COPYINGS) == DSW_COPYINGS) {
rc = II_TST_COPY_BIT(ip, chunk_num);
/* Treat a failed bitmap volume as a clear bit */
if (rc > 0) {
/* Copy bit set */
if (bfd->ii_shd == 0) {
if (*handle2 ||
(ip->bi_flags&DSW_TREEMAP))
fill_from_pair = 1;
else {
rc = EIO;
goto out;
}
}
}
}
if (((ip->bi_flags & DSW_GOLDEN) == 0) && bfd->ii_shd) {
/* Dependent shadow read */
rc = II_TST_SHD_BIT(ip, chunk_num);
if (rc < 0) {
rc = EIO;
goto out;
}
if (rc == 0) {
/* Shadow bit clear */
if (*handle2)
fill_from_pair = 1;
else {
rc = EIO;
goto out;
}
}
}
if (fill_from_pair) {
/* it matters now */
if (ip->bi_flags & (DSW_MSTOFFLINE | DSW_SHDOFFLINE)) {
rc = EIO;
goto out;
}
if (*handle2 == NULL &&
(ip->bi_flags&DSW_TREEMAP) == 0) {
rc = EIO;
goto out;
}
rc = ii_read_volume(ip, bfd->ii_shd,
*handle2, *handle, chunk_num, temp_fba, bmp_len);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_MSTOFFLINE);
_ii_error(ip, DSW_SHDOFFLINE);
goto out;
}
} else {
if (bfd->ii_shd && (ip->bi_flags & DSW_SHDOFFLINE)) {
rc = EIO;
goto out;
}
if ((bfd->ii_shd == 0) &&
(ip->bi_flags & DSW_MSTOFFLINE)) {
rc = EIO;
goto out;
}
rc = ii_read_volume(ip, !(bfd->ii_shd), *handle, NULL,
chunk_num, temp_fba, bmp_len);
if (!II_SUCCESS(rc)) {
if (bfd->ii_shd)
_ii_error(ip, DSW_SHDOFFLINE);
else
_ii_error(ip, DSW_MSTOFFLINE);
goto out;
}
}
temp_fba += bmp_len;
chunk_num++;
}
rc = 0;
out:
if (second_buf)
(void) nsc_free_buf(second_buf);
if (second_shd)
_ii_rlse_devs(ip, rtype);
return (rc);
}
/*
* _ii_shadow_write
* Perform any copy on write required by a write buffer request
*
* Calling/Exit State:
* Returns 0 on success, otherwise error code.
*
*/
static int
_ii_shadow_write(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len)
{
_ii_info_t *ip = bfd->ii_info;
chunkid_t chunk_num;
int rc;
int flag;
int hanging;
DTRACE_PROBE2(_ii_shadow_write_start, nsc_off_t, pos, nsc_size_t, len);
/* fail immediately if config DB is unavailable */
if ((ip->bi_flags & DSW_CFGOFFLINE) == DSW_CFGOFFLINE) {
return (EIO);
}
chunk_num = pos / DSW_SIZE;
if (bfd->ii_shd)
flag = 0; /* To shadow */
else
flag = CV_SHD2MST; /* To master */
mutex_enter(&ip->bi_mutex);
ip->bi_shdref++;
mutex_exit(&ip->bi_mutex);
hanging = (ip->bi_flags&DSW_HANGING) != 0;
for (; (chunk_num >= 0) &&
DSW_CHK2FBA(chunk_num) < (pos + len); chunk_num++) {
if (!hanging)
_ii_lock_chunk(ip, chunk_num);
rc = _ii_copy_on_write(ip, flag, chunk_num, 1);
/*
* Set the shadow bit when a small shadow has overflowed so
* that ii_read_volume can return an error if an attempt is
* made to read that chunk.
*/
if (!hanging) {
if (rc == 0 ||
(rc == EIO && (ip->bi_flags&DSW_OVERFLOW) != 0))
(void) II_SET_SHD_BIT(ip, chunk_num);
_ii_unlock_chunk(ip, chunk_num);
}
}
mutex_enter(&ip->bi_mutex);
ip->bi_shdref--;
if (ip->bi_state & DSW_CLOSING) {
if (total_ref(ip) == 0) {
cv_signal(&ip->bi_closingcv);
}
}
mutex_exit(&ip->bi_mutex);
/* did the bitmap fail during this process? */
return (ip->bi_flags & DSW_CFGOFFLINE? EIO : 0);
}
/*
* _ii_alloc_buf
* Allocate a buffer of data
*
* Calling/Exit State:
* Returns 0 for success, < 0 for async I/O, > 0 is an error code.
*
* Description:
* For a write buffer, calls dsw_shadow_write to perform any necessary
* copy on write operations, then allocates the real buffers from the
* underlying devices.
* For a read buffer, allocates the real buffers from the underlying
* devices, then calls _ii_fill_buf to fill the required buffer.
* For a buffer that is neither read nor write, just allocate the
* buffers so that a _ii_fill_buf can be done later by _ii_read.
*/
static int
_ii_alloc_buf(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len, int flag,
ii_buf_t **ptr)
{
_ii_info_t *ip = bfd->ii_info;
ii_buf_t *h;
int raw = II_RAW(bfd);
int rc = 0;
int ioflag;
int fbuf = 0, fbuf2 = 0, abuf = 0;
int rw_ent = 0;
if (bfd->ii_bmp) {
DTRACE_PROBE(_ii_alloc_buf_end);
/* any I/O to the bitmap device is barred */
return (EIO);
}
if (len == 0) {
DTRACE_PROBE(_ii_alloc_buf_end);
return (EINVAL);
}
/* Bounds checking */
if (pos + len > ip->bi_size) {
if (ii_debug > 1)
cmn_err(CE_NOTE,
"!ii: Attempt to access beyond end of ii volume");
DTRACE_PROBE(_ii_alloc_buf_end);
return (EIO);
}
h = *ptr;
if (h == NULL) {
h = (ii_buf_t *)_ii_alloc_handle(NULL, NULL, NULL, bfd);
if (h == NULL) {
DTRACE_PROBE(_ii_alloc_buf_end);
return (ENOMEM);
}
}
/*
* Temporary nsc_reserve of bitmap and other device.
* This device has already been reserved by the preceding _ii_attach.
* Corresponding nsc_release is in _ii_free_buf.
*/
h->ii_rsrv = BMP | (raw ? (bfd->ii_shd ? MSTR : SHDR)
: (bfd->ii_shd ? MST : SHD));
if (!bfd->ii_shd)
ip = ip->bi_master;
rw_enter(&ip->bi_linkrw, RW_READER);
rw_ent = 1;
if (ip->bi_shdfd == NULL || (ip->bi_flags & DSW_SHDEXPORT) ==
DSW_SHDEXPORT)
h->ii_rsrv &= ~(SHD|SHDR);
if ((rc = _ii_rsrv_devs(ip, h->ii_rsrv, II_EXTERNAL)) != 0) {
rw_exit(&ip->bi_linkrw);
rw_ent = 0;
h->ii_rsrv = NULL;
goto error;
}
if (flag & NSC_WRBUF) {
rc = _ii_shadow_write(bfd, pos, len);
if (!II_SUCCESS(rc))
goto error;
}
if (!(flag & NSC_RDAHEAD))
ioflag = flag & ~(NSC_RDBUF);
else
ioflag = flag;
if (bfd->ii_shd) {
/*
* SHADOW
*/
if (ip->bi_flags & DSW_SHDEXPORT) {
rc = EIO;
goto error;
}
/*
* The master device buffer has to be allocated first
* so that deadlocks are avoided.
*/
DTRACE_PROBE(AllocBufFor_SHADOW);
if ((ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) == 0) {
rc = nsc_alloc_buf(MSTFD(ip), pos, len,
(flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2);
if (!II_SUCCESS(rc)) {
if (ii_debug > 2)
cmn_err(CE_WARN, "!ii: "
"Join/write-S race detected\n");
if (h->ii_bufp2)
(void) nsc_free_buf(h->ii_bufp2);
h->ii_bufp2 = NULL;
/*
* Carry on as this will not matter if
* _ii_fill_buf is not called, or if
* it is called but doesn't need to read this
* volume.
*/
rc = 0;
}
fbuf2 = 1;
}
if (ip->bi_flags & DSW_SHDOFFLINE) {
rc = EIO;
goto error;
}
if ((ip->bi_flags)&DSW_TREEMAP) {
rc = nsc_alloc_abuf(pos, len, 0, &h->ii_abufp);
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_SHDOFFLINE);
goto error;
}
abuf = 1;
} else {
II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), pos, len,
ioflag, &h->ii_bufp); /* do not read yet */
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_SHDOFFLINE);
goto error;
}
fbuf = 1;
}
} else {
/*
* MASTER
*/
/*
* The master device buffer has to be allocated first
* so that deadlocks are avoided.
*/
if (ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) {
rc = EIO;
goto error;
}
DTRACE_PROBE(AllocBufFor_MASTER);
II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, ioflag,
&h->ii_bufp); /* do not read yet */
if (!II_SUCCESS(rc)) {
_ii_error(ip, DSW_MSTOFFLINE);
goto error;
}
fbuf = 1;
/*
* If shadow FD and (dependent set OR copying) and
* not (compact dependent && shadow offline && shadow exported)
*/
if ((ip->bi_shdfd) &&
((ip->bi_flags & DSW_COPYINGP) ||
(!(ip->bi_flags & DSW_GOLDEN))) &&
(!(ip->bi_flags &
(DSW_TREEMAP|DSW_SHDOFFLINE|DSW_SHDEXPORT)))) {
rc = nsc_alloc_buf(SHDFD(ip), pos, len,
(flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2);
if (!II_SUCCESS(rc)) {
if (ii_debug > 2)
cmn_err(CE_WARN, "!ii: "
"Join/write-M race detected\n");
if (h->ii_bufp2)
(void) nsc_free_buf(h->ii_bufp2);
h->ii_bufp2 = NULL;
/*
* Carry on as this will not matter if
* _ii_fill_buf is not called, or if
* it is called but doesn't need to read this
* volume.
*/
rc = 0;
}
fbuf2 = 1;
}
}
if (flag & NSC_RDBUF)
rc = _ii_fill_buf(bfd, pos, len, flag,
h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2);
error:
if (II_SUCCESS(rc)) {
h->ii_bufh.sb_vec = h->ii_abufp ? h->ii_abufp->sb_vec :
h->ii_bufp->sb_vec;
h->ii_bufh.sb_error = 0;
h->ii_bufh.sb_flag |= flag;
h->ii_bufh.sb_pos = pos;
h->ii_bufh.sb_len = len;
} else {
h->ii_bufh.sb_error = rc;
if (h->ii_bufp2 && fbuf2) {
(void) nsc_free_buf(h->ii_bufp2);
h->ii_bufp2 = NULL;
}
if (h->ii_bufp && fbuf) {
(void) nsc_free_buf(h->ii_bufp);
h->ii_bufp = NULL;
}
if (h->ii_abufp && abuf) {
(void) nsc_free_buf(h->ii_abufp);
h->ii_abufp = NULL;
}
if (h->ii_rsrv) {
/*
* Release temporary reserve - reserved above.
*/
_ii_rlse_devs(ip, h->ii_rsrv);
h->ii_rsrv = NULL;
}
if (rw_ent)
rw_exit(&ip->bi_linkrw);
}
return (rc);
}
/*
* _ii_free_buf
*/
static int
_ii_free_buf(ii_buf_t *h)
{
ii_fd_t *bfd;
int rsrv;
int rc;
if (h->ii_abufp == NULL) {
rc = nsc_free_buf(h->ii_bufp);
} else {
rc = nsc_free_buf(h->ii_abufp);
h->ii_abufp = NULL;
}
if (!II_SUCCESS(rc))
return (rc);
if (h->ii_bufp2) {
rc = nsc_free_buf(h->ii_bufp2);
h->ii_bufp2 = NULL;
if (!II_SUCCESS(rc))
return (rc);
}
bfd = h->ii_fd;
rsrv = h->ii_rsrv;
if ((h->ii_bufh.sb_flag & NSC_HALLOCATED) == 0) {
rc = _ii_free_handle(h, h->ii_fd);
if (!II_SUCCESS(rc))
return (rc);
} else {
h->ii_bufh.sb_flag = NSC_HALLOCATED;
h->ii_bufh.sb_vec = NULL;
h->ii_bufh.sb_error = 0;
h->ii_bufh.sb_pos = 0;
h->ii_bufh.sb_len = 0;
h->ii_rsrv = NULL;
}
/*
* Release temporary reserve - reserved in _ii_alloc_buf.
*/
if (rsrv)
_ii_rlse_devs(bfd->ii_info, rsrv);
rw_exit(&bfd->ii_info->bi_linkrw);
return (0);
}
/*
* _ii_open
* Open a device
*
* Calling/Exit State:
* Returns a token to identify the shadow device.
*
* Description:
* Performs the housekeeping operations associated with an upper layer
* of the nsc stack opening a shadowed device.
*/
/* ARGSUSED */
static int
_ii_open(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
{
_ii_info_t *ip;
_ii_overflow_t *op;
ii_fd_t *bfd;
int is_mst = 0;
int is_shd = 0;
int raw = (flag & NSC_CACHE) == 0;
bfd = nsc_kmem_zalloc(sizeof (*bfd), KM_SLEEP, _ii_local_mem);
if (!bfd)
return (ENOMEM);
DTRACE_PROBE1(_ii_open_mutex,
ii_fd_t *, bfd);
mutex_enter(&_ii_info_mutex);
for (ip = _ii_info_top; ip; ip = ip->bi_next) {
if (strcmp(path, ii_pathname(ip->bi_mstfd)) == 0) {
is_mst = 1;
break;
} else if (strcmp(path, ip->bi_keyname) == 0) {
is_shd = 1;
break;
} else if (strcmp(path, ii_pathname(ip->bi_bmpfd)) == 0)
break;
}
if (is_mst)
ip = ip->bi_master;
if (ip && ip->bi_disabled && !(ip->bi_state & DSW_MULTIMST)) {
DTRACE_PROBE(_ii_open_Disabled);
mutex_exit(&_ii_info_mutex);
return (EINTR);
}
if (!ip) {
/* maybe it's an overflow */
mutex_exit(&_ii_info_mutex);
mutex_enter(&_ii_overflow_mutex);
for (op = _ii_overflow_top; op; op = op->ii_next) {
if (strcmp(path, op->ii_volname) == 0)
break;
}
mutex_exit(&_ii_overflow_mutex);
if (!op) {
nsc_kmem_free(bfd, sizeof (*bfd));
DTRACE_PROBE(_ii_open_end_EINVAL);
return (EINVAL);
}
bfd->ii_ovr = 1;
bfd->ii_oflags = flag;
bfd->ii_optr = op;
*cdp = (blind_t)bfd;
DTRACE_PROBE(_ii_open_end_overflow);
return (0);
}
mutex_enter(&ip->bi_mutex);
ip->bi_ioctl++;
mutex_exit(&_ii_info_mutex);
if (is_mst) {
if (raw) {
ip->bi_mstr_iodev = NULL; /* set in attach */
ip->bi_mstrref++;
} else {
ip->bi_mst_iodev = NULL; /* set in attach */
ip->bi_mstref++;
}
ip->bi_master->bi_iifd = bfd;
} else if (is_shd) {
if (raw) {
ip->bi_shdr_iodev = NULL; /* set in attach */
ip->bi_shdrref++;
} else {
ip->bi_shd_iodev = NULL; /* set in attach */
ip->bi_shdref++;
}
bfd->ii_shd = 1;
} else {
ip->bi_bmpref++;
ip->bi_bmp_iodev = NULL; /* set in attach */
bfd->ii_bmp = 1;
}
_ii_ioctl_done(ip);
mutex_exit(&ip->bi_mutex);
bfd->ii_info = ip;
bfd->ii_oflags = flag;
*cdp = (blind_t)bfd;
return (0);
}
static int
_ii_openc(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
{
return (_ii_open(path, NSC_CACHE|flag, cdp, iodev));
}
static int
_ii_openr(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
{
return (_ii_open(path, NSC_DEVICE|flag, cdp, iodev));
}
/*
* _ii_close
* Close a device
*
* Calling/Exit State:
* Always succeeds - returns 0
*
* Description:
* Performs the housekeeping operations associated with an upper layer
* of the nsc stack closing a shadowed device.
*/
static int
_ii_close(bfd)
ii_fd_t *bfd;
{
_ii_info_t *ip = bfd->ii_info;
_ii_info_dev_t *dip;
int raw;
if (!ip) {
ASSERT(bfd->ii_ovr);
return (0);
}
raw = II_RAW(bfd);
mutex_enter(&ip->bi_mutex);
if (bfd->ii_shd && raw) {
dip = &ip->bi_shdrdev;
} else if (bfd->ii_shd) {
dip = &ip->bi_shddev;
} else if (bfd->ii_bmp) {
dip = &ip->bi_bmpdev;
} else if (raw) {
dip = ip->bi_mstrdev;
} else {
dip = ip->bi_mstdev;
}
if (dip) {
dip->bi_ref--;
if (dip->bi_ref == 0)
dip->bi_iodev = NULL;
}
if (ip->bi_state & DSW_CLOSING) {
if (total_ref(ip) == 0) {
cv_signal(&ip->bi_closingcv);
}
} else if ((ip->bi_flags & DSW_HANGING) &&
(ip->bi_head->bi_state & DSW_CLOSING))
cv_signal(&ip->bi_head->bi_closingcv);
if (!(bfd->ii_shd || bfd->ii_bmp)) /* is master device */
ip->bi_master->bi_iifd = NULL;
mutex_exit(&ip->bi_mutex);
nsc_kmem_free(bfd, sizeof (*bfd));
return (0);
}
/*
* _ii_alloc_handle
* Allocate a handle
*
*/
static nsc_buf_t *
_ii_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), ii_fd_t *bfd)
{
ii_buf_t *h;
if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
return (NULL);
h = kmem_alloc(sizeof (*h), KM_SLEEP);
if (!h)
return (NULL);
h->ii_abufp = NULL;
h->ii_bufp = nsc_alloc_handle(II_FD(bfd), d_cb, r_cb, w_cb);
if (!h->ii_bufp) {
kmem_free(h, sizeof (*h));
return (NULL);
}
h->ii_bufp2 = NULL;
h->ii_bufh.sb_flag = NSC_HALLOCATED;
h->ii_fd = bfd;
h->ii_rsrv = NULL;
return ((nsc_buf_t *)h);
}
/*
* _ii_free_handle
* Free a handle
*
*/
static int /*ARGSUSED*/
_ii_free_handle(ii_buf_t *h, ii_fd_t *bfd)
{
int rc;
if (h->ii_abufp)
(void) nsc_free_buf(h->ii_abufp);
rc = nsc_free_handle(h->ii_bufp);
if (!II_SUCCESS(rc)) {
return (rc);
}
kmem_free(h, sizeof (ii_buf_t));
return (0);
}
/*
* _ii_attach
* Attach
*
* Calling/Exit State:
* Returns 0 for success, errno on failure.
*
* Description:
*/
static int
_ii_attach(ii_fd_t *bfd, nsc_iodev_t *iodev)
{
_ii_info_t *ip;
int dev;
int raw;
int rc;
_ii_info_dev_t *infop;
raw = II_RAW(bfd);
DTRACE_PROBE2(_ii_attach_info,
char *, bfd->ii_shd? "shadow" : "master",
int, raw);
if (bfd->ii_ovr)
return (EINVAL);
ip = bfd->ii_info;
if (ip == NULL)
return (EINVAL);
mutex_enter(&ip->bi_mutex);
if (bfd->ii_bmp) {
infop = &ip->bi_bmpdev;
} else if (bfd->ii_shd) {
if (raw) {
infop = &ip->bi_shdrdev;
} else {
infop = &ip->bi_shddev;
}
} else if (!bfd->ii_ovr) {
if (raw) {
infop = ip->bi_mstrdev;
} else {
infop = ip->bi_mstdev;
}
}
if (iodev) {
infop->bi_iodev = iodev;
nsc_set_owner(infop->bi_fd, infop->bi_iodev);
}
mutex_exit(&ip->bi_mutex);
if (bfd->ii_bmp)
return (EINVAL);
if (raw)
dev = bfd->ii_shd ? SHDR : MSTR;
else
dev = bfd->ii_shd ? SHD : MST;
rc = _ii_rsrv_devs(ip, dev, II_EXTERNAL);
return (rc);
}
/*
* _ii_detach
* Detach
*
* Calling/Exit State:
* Returns 0 for success, always succeeds
*
* Description:
*/
static int
_ii_detach(bfd)
ii_fd_t *bfd;
{
int dev;
int raw;
raw = II_RAW(bfd);
DTRACE_PROBE2(_ii_detach_info,
char *, bfd->ii_shd? "shadow" : "master",
int, raw);
if (bfd->ii_bmp)
return (0);
ASSERT(bfd->ii_info);
dev = bfd->ii_shd ? (raw ? SHDR : SHD) : (raw ? MSTR : MST);
_ii_rlse_devs(bfd->ii_info, dev);
return (0);
}
/*
* _ii_get_pinned
*
*/
static int
_ii_get_pinned(ii_fd_t *bfd)
{
int rc;
if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
return (EIO);
rc = nsc_get_pinned(II_FD(bfd));
return (rc);
}
/*
* _ii_discard_pinned
*
*/
static int
_ii_discard_pinned(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len)
{
int rc;
if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
return (EIO);
rc = nsc_discard_pinned(II_FD(bfd), pos, len);
return (rc);
}
/*
* _ii_partsize
*
*/
static int
_ii_partsize(ii_fd_t *bfd, nsc_size_t *ptr)
{
/* Always return saved size */
*ptr = bfd->ii_info->bi_size;
return (0);
}
/*
* _ii_maxfbas
*
*/
static int
_ii_maxfbas(ii_fd_t *bfd, int flag, nsc_size_t *ptr)
{
int rc;
int rs;
int dev;
_ii_info_t *ip;
ip = bfd->ii_info;
if (REMOTE_VOL(bfd->ii_shd, ip))
return (EIO);
dev = ((ip->bi_flags)&DSW_SHDIMPORT) ? SHDR : MSTR;
DTRACE_PROBE1(_ii_maxfbas_info,
char *, dev == SHDR? "shadow" : "master");
rs = _ii_rsrv_devs(ip, dev, II_INTERNAL);
rc = nsc_maxfbas((dev == MSTR) ? MSTFD(ip) : SHDFD(ip), flag, ptr);
if (rs == 0)
_ii_rlse_devs(ip, dev);
return (rc);
}
/*
* ii_get_group_list
*/
_ii_info_t **
ii_get_group_list(char *group, int *count)
{
int i;
int nip;
uint64_t hash;
_ii_info_t **ipa;
_ii_lsthead_t *head;
_ii_lstinfo_t *np;
hash = nsc_strhash(group);
for (head = _ii_group_top; head; head = head->lst_next) {
if (hash == head->lst_hash && strncmp(head->lst_name,
group, DSW_NAMELEN) == 0)
break;
}
if (!head) {
return (NULL);
}
/* Count entries */
for (nip = 0, np = head->lst_start; np; np = np->lst_next)
++nip;
ASSERT(nip > 0);
ipa = kmem_zalloc(sizeof (_ii_info_t *) * nip, KM_SLEEP);
np = head->lst_start;
for (i = 0; i < nip; i++) {
ASSERT(np != 0);
ipa[i] = np->lst_ip;
np = np->lst_next;
}
*count = nip;
return (ipa);
}
/*
* _ii_pinned
*
*/
static void
_ii_pinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
{
DTRACE_PROBE3(_ii_pinned_start, nsc_iodev_t, dip->bi_iodev,
nsc_off_t, pos, nsc_size_t, len);
nsc_pinned_data(dip->bi_iodev, pos, len);
}
/*
* _ii_unpinned
*
*/
static void
_ii_unpinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
{
nsc_unpinned_data(dip->bi_iodev, pos, len);
}
/*
* _ii_read
*/
static int
_ii_read(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
{
int rc;
void *sb_vec;
nsc_vec_t **src;
if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info))
rc = EIO;
else {
src = h->ii_abufp? &h->ii_abufp->sb_vec : &h->ii_bufp->sb_vec;
sb_vec = *src;
*src = h->ii_bufh.sb_vec;
rc = _ii_fill_buf(h->ii_fd, pos, len, flag,
h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2);
*src = sb_vec;
}
if (!II_SUCCESS(rc))
h->ii_bufh.sb_error = rc;
return (rc);
}
/*
* _ii_write
*/
static int
_ii_write(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
{
int rc;
ii_fd_t *bfd = h->ii_fd;
_ii_info_t *ip = bfd->ii_info;
chunkid_t chunk_num;
nsc_size_t copy_len;
nsc_off_t mapped_fba;
chunkid_t mapped_chunk;
int overflow;
nsc_buf_t *tmp;
void *sb_vec;
if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info))
rc = EIO;
else if ((ip->bi_flags&DSW_TREEMAP) == 0 || !bfd->ii_shd) {
sb_vec = h->ii_bufp->sb_vec;
h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
if (bfd->ii_shd) {
II_NSC_WRITE(ip, shadow, rc, h->ii_bufp, pos, len,
flag);
} else {
II_NSC_WRITE(ip, master, rc, h->ii_bufp, pos, len,
flag);
}
h->ii_bufp->sb_vec = sb_vec;
} else {
/* write of mapped shadow buffer */
rc = 0;
chunk_num = pos / DSW_SIZE;
while (len > 0 && II_SUCCESS(rc)) {
/*
* don't need to test bitmaps as allocating the
* write buffer will c-o-write the chunk.
*/
mapped_chunk = ii_tsearch(ip, chunk_num);
if (mapped_chunk == II_NULLNODE) {
rc = EIO;
break;
}
overflow = II_ISOVERFLOW(mapped_chunk);
if (overflow)
mapped_chunk = II_2OVERFLOW(mapped_chunk);
mapped_fba = DSW_CHK2FBA(mapped_chunk) +
(pos % DSW_SIZE);
copy_len = DSW_SIZE - (pos % DSW_SIZE);
if (copy_len > len)
copy_len = len;
tmp = NULL;
if (overflow) {
(void) nsc_reserve(OVRFD(ip), NSC_MULTI);
rc = nsc_alloc_buf(OVRFD(ip), mapped_fba,
copy_len, NSC_WRBUF, &tmp);
} else
rc = nsc_alloc_buf(SHDFD(ip), mapped_fba,
copy_len, NSC_WRBUF, &tmp);
sb_vec = h->ii_abufp->sb_vec;
h->ii_abufp->sb_vec = h->ii_bufh.sb_vec;
if (II_SUCCESS(rc)) {
rc = nsc_copy(h->ii_abufp, tmp, pos,
mapped_fba, copy_len);
}
if (overflow) {
II_NSC_WRITE(ip, overflow, rc, tmp, mapped_fba,
copy_len, flag);
} else {
II_NSC_WRITE(ip, shadow, rc, tmp, mapped_fba,
copy_len, flag);
}
h->ii_abufp->sb_vec = sb_vec;
(void) nsc_free_buf(tmp);
if (overflow)
nsc_release(OVRFD(ip));
/* move on to next chunk */
pos += copy_len;
len -= copy_len;
chunk_num++;
}
}
if (!II_SUCCESS(rc))
h->ii_bufh.sb_error = rc;
return (rc);
}
/*
* _ii_zero
*/
static int
_ii_zero(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
{
int rc;
void *sb_vec;
sb_vec = h->ii_bufp->sb_vec;
h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
rc = nsc_zero(h->ii_bufp, pos, len, flag);
h->ii_bufp->sb_vec = sb_vec;
if (!II_SUCCESS(rc))
h->ii_bufh.sb_error = rc;
return (rc);
}
/*
* _ii_uncommit
*/
static int
_ii_uncommit(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
{
int rc;
void *sb_vec;
sb_vec = h->ii_bufp->sb_vec;
h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
rc = nsc_uncommit(h->ii_bufp, pos, len, flag);
h->ii_bufp->sb_vec = sb_vec;
if (!II_SUCCESS(rc))
h->ii_bufh.sb_error = rc;
return (rc);
}
/*
* _ii_trksize
*/
static int
_ii_trksize(ii_fd_t *bfd, int trksize)
{
int rc;
rc = nsc_set_trksize(II_FD(bfd), trksize);
return (rc);
}
/*
* _ii_register_path
*/
static nsc_path_t *
_ii_register_path(char *path, int type, nsc_io_t *io)
{
nsc_path_t *tok;
tok = nsc_register_path(path, type, io);
return (tok);
}
/*
* _ii_unregister_path
*/
/*ARGSUSED*/
static int
_ii_unregister_path(nsc_path_t *sp, int flag, char *type)
{
int rc;
rc = nsc_unregister_path(sp, flag);
return (rc);
}
int
_ii_ll_add(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char *name,
char **key)
{
_ii_lsthead_t **head;
_ii_lstinfo_t *node;
uint64_t hash;
ASSERT(key && !*key);
ASSERT(ip && mutex && lst && name);
node = kmem_zalloc(sizeof (_ii_lstinfo_t), KM_SLEEP);
if (!node) {
cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM");
DTRACE_PROBE(_ii_ll_add_end_ENOMEM);
return (ENOMEM);
}
node->lst_ip = ip;
/* find out where we should insert it */
hash = nsc_strhash(name);
mutex_enter(mutex);
for (head = lst; *head; head = &((*head)->lst_next)) {
if (((*head)->lst_hash == hash) &&
strncmp(name, (*head)->lst_name, DSW_NAMELEN) == 0) {
node->lst_next = (*head)->lst_start;
(*head)->lst_start = node;
break;
}
}
if (!*head) {
/* create a new entry */
*head = kmem_zalloc(sizeof (_ii_lsthead_t), KM_SLEEP);
if (!*head) {
/* bother */
cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM");
kmem_free(node, sizeof (_ii_lstinfo_t));
DTRACE_PROBE(_ii_ll_add_end_2);
return (ENOMEM);
}
(*head)->lst_hash = hash;
(void) strncpy((*head)->lst_name, name, DSW_NAMELEN);
(*head)->lst_start = node;
}
mutex_exit(mutex);
*key = (*head)->lst_name;
return (0);
}
int
_ii_ll_remove(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char **key)
{
_ii_lsthead_t **head, *oldhead = 0;
_ii_lstinfo_t **node, *oldnode = 0;
uint64_t hash;
int found;
ASSERT(key && *key);
ASSERT(ip && lst);
hash = nsc_strhash(*key);
mutex_enter(mutex);
for (head = lst; *head; head = &((*head)->lst_next)) {
if (((*head)->lst_hash == hash) &&
strncmp(*key, (*head)->lst_name, DSW_NAMELEN) == 0)
break;
}
if (!*head) {
/* no such link (!) */
mutex_exit(mutex);
return (0);
}
found = 0;
for (node = &(*head)->lst_start; *node; node = &((*node)->lst_next)) {
if (ip == (*node)->lst_ip) {
oldnode = *node;
*node = (*node)->lst_next;
kmem_free(oldnode, sizeof (_ii_lstinfo_t));
found = 1;
break;
}
}
ASSERT(found);
if (!found) {
mutex_exit(mutex);
return (0);
}
/* did we just delete the last set in this resource group? */
if (!(*head)->lst_start) {
oldhead = *head;
*head = (*head)->lst_next;
kmem_free(oldhead, sizeof (_ii_lsthead_t));
}
mutex_exit(mutex);
*key = NULL;
return (0);
}
static nsc_def_t _ii_fd_def[] = {
"Pinned", (uintptr_t)_ii_pinned, 0,
"Unpinned", (uintptr_t)_ii_unpinned, 0,
0, 0, 0
};
static nsc_def_t _ii_io_def[] = {
"Open", (uintptr_t)_ii_openc, 0,
"Close", (uintptr_t)_ii_close, 0,
"Attach", (uintptr_t)_ii_attach, 0,
"Detach", (uintptr_t)_ii_detach, 0,
"AllocHandle", (uintptr_t)_ii_alloc_handle, 0,
"FreeHandle", (uintptr_t)_ii_free_handle, 0,
"AllocBuf", (uintptr_t)_ii_alloc_buf, 0,
"FreeBuf", (uintptr_t)_ii_free_buf, 0,
"GetPinned", (uintptr_t)_ii_get_pinned, 0,
"Discard", (uintptr_t)_ii_discard_pinned, 0,
"PartSize", (uintptr_t)_ii_partsize, 0,
"MaxFbas", (uintptr_t)_ii_maxfbas, 0,
"Read", (uintptr_t)_ii_read, 0,
"Write", (uintptr_t)_ii_write, 0,
"Zero", (uintptr_t)_ii_zero, 0,
"Uncommit", (uintptr_t)_ii_uncommit, 0,
"TrackSize", (uintptr_t)_ii_trksize, 0,
"Provide", 0, 0,
0, 0, 0
};
static nsc_def_t _ii_ior_def[] = {
"Open", (uintptr_t)_ii_openr, 0,
"Close", (uintptr_t)_ii_close, 0,
"Attach", (uintptr_t)_ii_attach, 0,
"Detach", (uintptr_t)_ii_detach, 0,
"AllocHandle", (uintptr_t)_ii_alloc_handle, 0,
"FreeHandle", (uintptr_t)_ii_free_handle, 0,
"AllocBuf", (uintptr_t)_ii_alloc_buf, 0,
"FreeBuf", (uintptr_t)_ii_free_buf, 0,
"GetPinned", (uintptr_t)_ii_get_pinned, 0,
"Discard", (uintptr_t)_ii_discard_pinned, 0,
"PartSize", (uintptr_t)_ii_partsize, 0,
"MaxFbas", (uintptr_t)_ii_maxfbas, 0,
"Read", (uintptr_t)_ii_read, 0,
"Write", (uintptr_t)_ii_write, 0,
"Zero", (uintptr_t)_ii_zero, 0,
"Uncommit", (uintptr_t)_ii_uncommit, 0,
"TrackSize", (uintptr_t)_ii_trksize, 0,
"Provide", 0, 0,
0, 0, 0
};