zvol.c revision a2eea2e101e6a163a537dcc6d4e3c4da2a0ea5b2
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* ZFS volume emulation driver.
*
* Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
* Volumes are accessed through the symbolic links named:
*
*
* These links are created by the ZFS-specific devfsadm link generator.
* Volumes are persistent through reboot. No user command needs to be
* run before opening and using a device.
*/
#include <sys/dsl_prop.h>
#include <sys/efi_partition.h>
#include <sys/byteorder.h>
#include <sys/pathname.h>
#include <sys/zfs_ioctl.h>
#include <sys/refcount.h>
#include "zfs_namecheck.h"
#define ZVOL_OBJ 1ULL
#define ZVOL_ZAP_OBJ 2ULL
static void *zvol_state;
/*
* This lock protects the zvol_state structure from being modified
* while it's being used, e.g. an open that comes in before a create
* finishes. It also protects temporary opens of the dataset so that,
* e.g., an open doesn't get a spurious EBUSY.
*/
static kmutex_t zvol_state_lock;
static uint32_t zvol_minors;
/*
* The in-core state of each volume.
*/
typedef struct zvol_state {
} zvol_state_t;
static void
{
}
int
{
if (volsize == 0)
return (EINVAL);
return (EINVAL);
#ifdef _ILP32
return (EOVERFLOW);
#endif
return (0);
}
int
{
if (volblocksize < SPA_MINBLOCKSIZE ||
!ISP2(volblocksize))
return (EDOM);
return (0);
}
static void
{
}
int
{
int error;
if (error)
return (error);
if (error == 0) {
}
return (error);
}
/*
* Find a free minor number.
*/
static minor_t
zvol_minor_alloc(void)
{
return (minor);
return (0);
}
static zvol_state_t *
zvol_minor_lookup(const char *name)
{
continue;
break;
}
return (zv);
}
void
{
int error;
/*
* These properites must be removed from the list so the generic
* property setting step won't apply to them.
*/
zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
DMU_OT_NONE, 0, tx);
DMU_OT_NONE, 0, tx);
}
/*
* Replay a TX_WRITE ZIL transaction that didn't get committed
* after a system failure
*/
static int
{
int error;
if (byteswap)
if (error) {
} else {
}
return (error);
}
/* ARGSUSED */
static int
{
return (ENOTSUP);
}
/*
* Callback vectors for replaying records.
* Only TX_WRITE is needed for zvol.
*/
zvol_replay_err, /* 0 no such transaction type */
zvol_replay_err, /* TX_CREATE */
zvol_replay_err, /* TX_MKDIR */
zvol_replay_err, /* TX_MKXATTR */
zvol_replay_err, /* TX_SYMLINK */
zvol_replay_err, /* TX_REMOVE */
zvol_replay_err, /* TX_RMDIR */
zvol_replay_err, /* TX_LINK */
zvol_replay_err, /* TX_RENAME */
zvol_replay_write, /* TX_WRITE */
zvol_replay_err, /* TX_TRUNCATE */
zvol_replay_err, /* TX_SETATTR */
zvol_replay_err, /* TX_ACL */
};
/*
* Create a minor node for the specified volume.
*/
int
{
int ds_mode = DS_MODE_PRIMARY;
char *devpath;
int error;
return (EEXIST);
}
if (error) {
return (error);
}
if (error) {
return (error);
}
/*
* same minor number we used last time.
*/
if (error == 0) {
if (error == 0) {
}
}
}
/*
* If we found a minor but it's already in use, we must pick a new one.
*/
minor = 0;
if (minor == 0)
minor = zvol_minor_alloc();
if (minor == 0) {
return (ENXIO);
}
return (EAGAIN);
}
(char *)name);
return (EAGAIN);
}
return (EAGAIN);
}
/* XXX this should handle the possible i/o error */
zvol_minors++;
return (0);
}
/*
* Remove minor node for the specified volume.
*/
int
zvol_remove_minor(const char *name)
{
char namebuf[30];
return (ENXIO);
}
if (zv->zv_total_opens != 0) {
return (EBUSY);
}
zvol_minors--;
return (0);
}
int
{
int error;
return (ENXIO);
}
doi.doi_data_block_size)) != 0) {
return (error);
}
return (EROFS);
}
if (error) {
return (error);
}
if (error == 0) {
DMU_OBJECT_END, tx);
}
if (error == 0) {
}
return (error);
}
int
{
int error;
return (ENXIO);
}
return (EROFS);
}
if (error) {
} else {
volblocksize, 0, tx);
}
return (error);
}
/*ARGSUSED*/
int
{
if (minor == 0) /* This is the control device */
return (0);
return (ENXIO);
}
return (EROFS);
}
zv->zv_total_opens++;
}
return (0);
}
/*ARGSUSED*/
int
{
if (minor == 0) /* This is the control device */
return (0);
return (ENXIO);
}
/*
* The next statement is a workaround for the following DDI bug:
* 6343604 specfs race: multiple "last-close" of the same device
*/
if (zv->zv_total_opens == 0) {
return (0);
}
/*
* If the open count is zero, this is a spurious close.
* That indicates a bug in the kernel / DDI framework.
*/
/*
* You may get multiple opens, but only one close.
*/
zv->zv_total_opens--;
return (0);
}
/*
* Create and return an immediate write ZIL transaction.
*/
itx_t *
{
lr_write_t *lr;
return (itx);
}
/*
* zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
*
* We store data in the log buffers if it's small enough.
* Otherwise we flush the data out via dmu_sync().
*/
int
char *addr)
{
lr_write_t *lr;
int error;
/* handle common case */
if (len <= zvol_immediate_write_sz) {
return (0);
}
/*
* We need to dmu_sync() each block in the range.
* For this we need the blocksize.
*/
if (error)
return (error);
/*
* We need to immediate write or dmu_sync() each block in the range.
*/
while (len) {
if (nbytes <= zvol_immediate_write_sz) {
} else {
/* XXX - we should do these IOs in parallel */
if (error) {
return (error);
}
}
}
return (0);
}
int
{
char *addr;
int error = 0;
int sync;
int reading;
int txg_sync_needed = B_FALSE;
return (0);
}
return (0);
}
return (0);
}
/*
* There must be no buffer changes when doing a dmu_sync() because
* we can't change the data whilst calculating the checksum.
* A better approach than a per zvol rwlock would be to lock ranges.
*/
else
if (reading) {
} else {
if (error) {
} else {
if (sync) {
/* use the ZIL to commit this write */
addr) != 0) {
}
}
}
}
if (error)
break;
}
if (sync) {
if (txg_sync_needed)
else
}
return (0);
}
/*ARGSUSED*/
int
{
}
/*ARGSUSED*/
int
{
}
/*ARGSUSED*/
int
{
}
/*ARGSUSED*/
int
{
}
/*
* Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I).
*/
/*ARGSUSED*/
int
{
int error = 0;
return (ENXIO);
}
switch (cmd) {
case DKIOCINFO:
return (error);
case DKIOCGMEDIAINFO:
return (error);
case DKIOCGETEFI:
return (EFAULT);
}
return (EINVAL);
}
return (error);
default:
break;
}
return (error);
}
int
zvol_busy(void)
{
return (zvol_minors != 0);
}
void
zvol_init(void)
{
}
void
zvol_fini(void)
{
}