vds.c revision 1ae0874509b6811fdde1dfd46f0d93fd09867a3f
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Virtual disk server
*/
#include <sys/pathname.h>
#include <sys/sysmacros.h>
#include <sys/vio_common.h>
#include <sys/vdsk_mailbox.h>
#include <sys/vdsk_common.h>
/* Virtual disk server initialization flags */
#define VDS_LOCKING 0x01
#define VDS_LDI 0x02
#define VDS_MDEG 0x04
/* Virtual disk server tunable parameters */
#define VDS_LDC_RETRIES 3
#define VDS_NCHAINS 32
/* Identification parameters for MD, synthetic dkio(7i) structures, etc. */
#define VDS_NAME "virtual-disk-server"
#define VD_NAME "vd"
#define VD_VOLUME_NAME "vdisk"
#define VD_ASCIILABEL "Virtual Disk"
#define VD_CHANNEL_ENDPOINT "channel-endpoint"
#define VD_ID_PROP "id"
#define VD_BLOCK_DEVICE_PROP "vds-block-device"
/* Virtual disk initialization flags */
#define VD_LOCKING 0x01
#define VD_TASKQ 0x02
#define VD_LDC 0x04
#define VD_DRING 0x08
#define VD_SID 0x10
#define VD_SEQ_NUM 0x20
/*
* unfortunately, this convention does not appear to be codified.
*/
#define VD_ENTIRE_DISK_SLICE 2
/* Return a cpp token as a string */
/*
* Print a message prefixed with the current function name to the message log
* (and optionally to the console for verbose boots); these macros use cpp's
* concatenation of string literals and C99 variable-length-argument-list
* macros
*/
/* Return a pointer to the "i"th vdisk dring element */
#define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \
/* Return the virtual disk client's type as a string (for use in messages) */
"unsupported client")))
/* Debugging macros */
#ifdef DEBUG
#define VD_DUMP_DRING_ELEM(elem) \
PRN("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \
#else /* !DEBUG */
#define PR0(...)
#define PR1(...)
#define PR2(...)
#define VD_DUMP_DRING_ELEM(elem)
#endif /* DEBUG */
typedef struct vds {
} vds_t;
typedef struct vd {
} vd_t;
typedef struct vds_operation {
typedef struct ioctl {
const char *operation_name;
int cmd;
const char *cmd_name;
} ioctl_t;
static int vds_ldc_retries = VDS_LDC_RETRIES;
static void *vds_state;
static int vd_open_flags = VD_OPEN_FLAGS;
#ifdef DEBUG
static int vd_msglevel;
#endif /* DEBUG */
static int
{
int status;
return (EINVAL); /* no service for trivial requests */
if ((status == 0) &&
LDC_COPY_OUT)) != 0)) {
PRN("ldc_mem_copy() returned errno %d copying to client",
status);
}
return (status);
}
static int
{
int status;
/* Get data from client */
PRN("ldc_mem_copy() returned errno %d copying from client",
status);
return (status);
}
return (status);
}
static int
{
int status;
return (EINVAL); /* no service for trivial requests */
return (status);
}
static int
{
switch (cmd) {
case DKIOCGGEOM:
return (0);
case DKIOCGVTOC:
return (0);
default:
return (ENOTSUP);
}
}
static int
{
/* Get data from client, if necessary */
PR1("Getting \"arg\" data from client");
LDC_COPY_IN)) != 0) {
PRN("ldc_mem_copy() returned errno %d "
"copying from client", status);
return (status);
}
}
/*
* Handle single-slice block devices internally; otherwise, have the
* real driver perform the ioctl()
*/
return (status);
return (status);
}
#ifdef DEBUG
if (rval != 0) {
PRN("%s set rval = %d, which is not being returned to client",
}
#endif /* DEBUG */
/* Send data to client, if necessary */
PR1("Sending \"arg\" data to client");
LDC_COPY_OUT)) != 0) {
PRN("ldc_mem_copy() returned errno %d "
"copying to client", status);
return (status);
}
}
return (status);
}
static int
{
/* Command (no-copy) operations */
STRINGIZE(DKIOCFLUSHWRITECACHE), 0, 0},
/* "Get" (copy-out) operations */
/* "Set" (copy-in) operations */
};
int i, status;
/*
* Determine ioctl corresponding to caller's "operation" and
* validate caller's "nbytes"
*/
for (i = 0; i < nioctls; i++) {
PRN("%s: Expected <= %lu \"nbytes\", "
return (EINVAL);
PRN("%s: nbytes = %lu not a multiple of %lu",
sizeof (uint64_t));
return (EINVAL);
}
break;
}
}
return (status);
}
/*
* Define the supported operations once the functions for performing them have
* been defined
*/
static const vds_operation_t vds_operation[] = {
{VD_OP_BREAD, vd_bread},
{VD_OP_FLUSH, vd_ioctl},
};
static const size_t vds_noperations =
(sizeof (vds_operation))/(sizeof (vds_operation[0]));
/*
* Process a request using a defined operation
*/
static int
{
int i;
PR1("Entered");
/* Range-check slice */
PRN("Invalid \"slice\" %u (max %u) for virtual disk",
return (EINVAL);
}
/* Perform the requested operation */
for (i = 0; i < vds_noperations; i++)
/* No matching operation found */
return (ENOTSUP);
}
static int
{
retry++) {
}
if (status != 0) {
return (status);
PRN("ldc_write() performed only partial write");
return (EIO);
}
return (0);
}
/*
* Return 1 if the "type", "subtype", and "env" fields of the "tag" first
* argument match the corresponding remaining arguments; otherwise, return 0
*/
int
{
}
static int
{
VIO_VER_INFO)) {
return (ENOMSG); /* not a version message */
}
PRN("Expected %lu-byte version message; "
return (EBADMSG);
}
PRN("Expected device class %u (disk); received %u",
return (EBADMSG);
}
/* Unsupported version; send back supported version */
return (EBADMSG);
}
/* Valid message, version accepted */
return (0);
}
static int
{
PR0("Entered");
VIO_ATTR_INFO)) {
return (ENOMSG); /* not an attribute message */
}
PRN("Expected %lu-byte attribute message; "
return (EBADMSG);
}
if (attr_msg->max_xfer_sz == 0) {
PRN("Received maximum transfer size of 0 from client");
return (EBADMSG);
}
PRN("Client requested unsupported transfer mode");
return (EBADMSG);
}
/* Success: valid message and transfer mode */
/*
* The vd_dring_inband_msg_t contains one cookie; need room
* for up to n-1 more cookies, where "n" is the number of full
* pages plus possibly one partial page required to cover
* "max_xfer_sz". Add room for one more cookie if
* "max_xfer_sz" isn't an integral multiple of the page size.
* Must first get the maximum transfer size in bytes.
*/
#if 1 /* NEWOBP */
sizeof (vd_dring_inband_msg_t) +
(sizeof (ldc_mem_cookie_t)));
#else /* NEWOBP */
sizeof (vd_dring_inband_msg_t) +
(sizeof (ldc_mem_cookie_t)));
#endif /* NEWOBP */
/*
* Set the maximum expected message length to
* accommodate in-band-descriptor messages with all
* their cookies
*/
}
return (0);
}
static int
{
int status;
PR0("Entered");
VIO_DRING_REG)) {
return (ENOMSG); /* not a register-dring message */
}
PRN("Expected at least %lu-byte register-dring message; "
return (EBADMSG);
}
PRN("Expected %lu-byte register-dring message; "
return (EBADMSG);
}
PRN("A dring was previously registered; only support one");
return (EBADMSG);
}
/*
* In addition to fixing the assertion in the success case
* below, supporting drings which require more than one
* "cookie" requires increasing the value of vd->max_msglen
* somewhere in the code path prior to receiving the message
* which results in calling this function. Note that without
* making this change, the larger message size required to
* accommodate multiple cookies cannot be successfully
* received, so this function will not even get called.
* Gracefully accommodating more dring cookies might
* reasonably demand exchanging an additional attribute or
* making a minor protocol adjustment
*/
return (EBADMSG);
}
if (status != 0) {
return (status);
}
/*
* To remove the need for this assertion, must call
* ldc_mem_dring_nextcookie() successfully ncookies-1 times after a
* successful call to ldc_mem_dring_map()
*/
if ((status =
return (status);
}
PRN("Descriptor ring virtual address is NULL");
return (EBADMSG); /* FIXME appropriate status? */
}
/* Valid message and dring mapped */
PR1("descriptor size = %u, dring length = %u",
return (0);
}
static int
{
PR0("Entered");
VIO_DRING_UNREG)) {
return (ENOMSG); /* not an unregister-dring message */
}
PRN("Expected %lu-byte unregister-dring message; "
return (EBADMSG);
}
PRN("Expected dring ident %lu; received %lu",
return (EBADMSG);
}
/* FIXME set ack in unreg_msg? */
return (0);
}
static int
{
PR0("Entered");
return (ENOMSG); /* not an RDX message */
if (msglen != sizeof (vio_rdx_msg_t)) {
PRN("Expected %lu-byte RDX message; received %lu bytes",
sizeof (vio_rdx_msg_t), msglen);
return (EBADMSG);
}
return (0);
}
static void
{
int status = 0;
}
static int
{
PRN("Received seq_num %lu; expected %lu",
return (1);
}
return (0);
}
/*
* Return the expected size of an inband-descriptor message with all the
* cookies it claims to include
*/
static size_t
{
return ((sizeof (*msg)) +
}
/*
* Process an in-band descriptor message: used with clients like OBP, with
* which vds exchanges descriptors within VIO message payloads, rather than
* operating on them within a descriptor ring
*/
static int
{
PR1("Entered");
return (ENOMSG); /* not an in-band-descriptor message */
PRN("Expected at least %lu-byte descriptor message; "
return (EBADMSG);
}
PRN("Expected %lu-byte descriptor message; "
return (EBADMSG);
}
return (EBADMSG);
}
/* Valid message; process the request */
return (0);
}
static boolean_t
{
uint32_t i, n;
/* Check descriptor states */
PRN("descriptor %u not ready", i);
return (B_FALSE);
}
}
/* Descriptors are valid; accept them */
return (B_TRUE);
}
static int
{
int status;
/* Validate descriptor range */
PRN("\"start\" = %u, \"end\" = %u; both must be less than %u",
return (EINVAL);
}
/* Acquire updated dring elements */
return (status);
}
/* Accept updated dring elements */
/* Release dring elements */
return (status);
}
/* If a descriptor was in the wrong state, return an error */
if (!accepted)
return (EINVAL);
/* Process accepted dring elements */
PR1("Processing dring element %u", i);
/* Re-acquire client's dring element */
i, i)) != 0) {
PRN("ldc_mem_dring_acquire() returned errno %d",
status);
return (status);
}
/* Update processed element */
} else {
/* Perhaps client timed out waiting for I/O... */
PRN("element %u no longer \"accepted\"", i);
}
/* Release updated processed element */
i, i)) != 0) {
PRN("ldc_mem_dring_release() returned errno %d",
status);
return (status);
}
/* If the descriptor was in the wrong state, return an error */
if (!accepted)
return (EINVAL);
}
return (0);
}
static int
{
PR1("Entered");
VIO_DRING_DATA)) {
return (ENOMSG); /* not a dring-data message */
}
PRN("Expected %lu-byte dring message; received %lu bytes",
return (EBADMSG);
}
return (EBADMSG);
}
PRN("Expected dring ident %lu; received ident %lu",
return (EBADMSG);
}
/* Valid message; process dring */
}
static int
{
/* FIXME work around interrupt problem */
return (ENOMSG);
retry++) {
}
if (status != 0) {
return (status);
} else if (*nbytes == 0) {
PR1("ldc_read() returned 0 and no message read");
return (ENOMSG);
}
return (0);
}
static int
{
int status;
/*
* Validate session ID up front, since it applies to all messages
* once set
*/
return (EBADMSG);
}
/*
* Process the received message based on connection state
*/
case VD_STATE_INIT: /* expect version message */
return (status);
/* The first version message sets the SID */
/* Version negotiated, move to that state */
return (0);
case VD_STATE_VER: /* expect attribute message */
return (status);
/* Attributes exchanged, move to that state */
return (0);
case VD_STATE_ATTR:
case VIO_DESC_MODE: /* expect RDX message */
return (status);
/* Ready to receive in-band descriptors */
return (0);
case VIO_DRING_MODE: /* expect register-dring message */
if ((status =
return (status);
/* One dring negotiated, move to that state */
return (0);
default:
ASSERT("Unsupported transfer mode");
PRN("Unsupported transfer mode");
return (ENOTSUP);
}
case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */
/* Ready to receive data */
return (0);
return (status);
}
/*
* If another register-dring message is received, stay in
* dring state in case the client sends RDX; although the
* protocol allows multiple drings, this server does not
* support using more than one
*/
if ((status =
return (status);
/*
* Acknowledge an unregister-dring message, but reset the
* connection anyway: Although the protocol allows
* unregistering drings, this server cannot serve a vdisk
* without its only dring
*/
case VD_STATE_DATA:
case VIO_DESC_MODE: /* expect in-band-descriptor message */
case VIO_DRING_MODE: /* expect dring-data or unreg-dring */
/*
* Typically expect dring-data messages, so handle
* them first
*/
return (status);
/*
* Acknowledge an unregister-dring message, but reset
* the connection anyway: Although the protocol
* allows unregistering drings, this server cannot
* serve a vdisk without its only dring
*/
default:
ASSERT("Unsupported transfer mode");
PRN("Unsupported transfer mode");
return (ENOTSUP);
}
default:
ASSERT("Invalid client connection state");
PRN("Invalid client connection state");
return (ENOTSUP);
}
}
static void
{
int status;
/*
* Check that the message is at least big enough for a "tag", so that
* message processing can proceed based on tag-specified message type
*/
if (msglen < sizeof (vio_msg_tag_t)) {
/* Can't "nack" short message, so drop the big hammer */
return;
}
/*
* Process the message
*/
case 0:
/* "ack" valid, successfully-processed messages */
break;
case ENOMSG:
PRN("Received unexpected message");
case EBADMSG:
case ENOTSUP:
/* "nack" invalid messages */
break;
default:
/* "nack" failed messages */
/* An LDC error probably occurred, so try resetting it */
break;
}
/* "ack" or "nack" the message */
PR1("Sending %s",
/* Reset the connection for nack'ed or failed messages */
}
static void
vd_process_queue(void *arg)
{
PR2("Entered");
PR2("Returning");
}
static uint_t
{
if (event & LDC_EVT_READ) {
PR1("New packet(s) available");
/* Queue a task to process the new data */
PRN("Unable to dispatch vd_process_queue()");
} else if (event & LDC_EVT_RESET) {
PR0("Attempting to bring up reset channel");
(status != ECONNREFUSED)) {
}
} else if (event & LDC_EVT_UP) {
/* Reset the connection state when channel comes (back) up */
}
return (LDC_SUCCESS);
}
static uint_t
{
return (MH_WALK_TERMINATE);
}
static int
{
uint_t vd_present = 0;
PR0("Entered");
switch (cmd) {
case DDI_DETACH:
/* the real work happens below */
break;
case DDI_SUSPEND:
/* nothing to do for this non-device */
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
/* Do no detach when serving any vdisks */
if (vd_present) {
PR0("Not detaching because serving vdisks");
return (DDI_FAILURE);
}
PR0("Detaching");
return (DDI_SUCCESS);
}
static boolean_t
{
return (B_TRUE);
}
return (B_FALSE);
}
static int
{
/* Get block device's device number, otyp, and size */
PRN("ldi_get_dev() returned errno %d for %s",
return (status);
}
PRN("ldi_get_otyp() returned errno %d for %s",
return (status);
}
return (ENOTBLK);
}
return (EIO);
}
/* Determine if backing block device is a pseudo device */
return (EIO);
}
return (0); /* ...and we're done */
}
/* Get dk_cinfo to determine slice of backing block device */
PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s",
return (status);
}
PRN("slice %u >= maximum slice %u for %s",
return (EIO);
}
/* If block device slice is entire disk, fill in all slice devices */
return (0); /* ...and we're done */
}
/* Otherwise, we have a (partial) slice of a block device */
/* Initialize dk_geom structure for single-slice block device */
PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s",
return (status);
}
return (EIO);
}
return (EIO);
}
/* Initialize vtoc structure for single-slice block device */
PRN("ldi_ioctl(DKIOCGVTOC) returned errno %d for %s",
return (status);
}
return (0);
}
static int
{
char tq_name[TASKQ_NAMELEN];
int param_status, status;
PRN("No memory for virtual disk");
return (EAGAIN);
}
/* Get device parameters */
return (status);
}
return (status);
}
if (param_status != 0)
return (param_status);
PR0("vdisk_type = %s, pseudo = %s, nslices = %u",
/* Initialize locking */
&iblock) != DDI_SUCCESS) {
PRN("Could not get iblock cookie.");
return (EIO);
}
/* Open the backing-device slices */
PR0("Opening device %u, minor %u = slice %u",
PRN("ldi_open_by_dev() returned errno %d for slice %u",
/* vds_destroy_vd() will close any open slices */
#if 0 /* FIXME */
return (status);
#endif
}
}
/* Create the task queue for the vdisk */
TASKQ_DEFAULTPRI, 0)) == NULL) {
PRN("Could not create task queue");
return (EIO);
}
/* Bring up LDC */
return (status);
}
return (status);
}
return (status);
}
(status != ECONNREFUSED)) {
return (status);
}
/* Add the successfully-initialized vdisk to the server's table */
return (EIO);
}
return (0);
}
/*
* Destroy the state associated with a virtual disk
*/
static void
vds_destroy_vd(void *arg)
{
PR0("Entered");
return;
/* Disable queuing requests for the vdisk */
}
/* Drain and destroy the task queue (*before* shutting down LDC) */
/* Shut down LDC */
}
/* Close any open backing-device slices */
}
}
/* Free lock */
/* Finally, free the vdisk structure itself */
}
static int
{
int status;
#ifdef lint
(void) vd;
#endif /* lint */
return (status);
}
static int
{
int num_channels;
/* Look for channel endpoint child(ren) of the vdisk MD node */
return (-1);
}
/* Get the "id" value for the first channel endpoint node */
PRN("No \"%s\" property found for \"%s\" of vdisk",
return (-1);
}
if (num_channels > 1) {
PRN("Using ID of first of multiple channels for this vdisk");
}
return (0);
}
static int
{
PRN("Invalid node count in Machine Description subtree");
return (-1);
}
return (status);
}
static void
{
char *block_device = NULL;
return;
}
&block_device) != 0) {
return;
}
return;
}
return;
}
}
static void
{
PRN("Unable to get \"%s\" property from vdisk's MD node",
return;
}
}
static void
{
/* Validate that vdisk ID has not changed */
PRN("Error getting previous vdisk \"%s\" property",
return;
}
return;
}
PRN("Not changing vdisk: ID changed from %lu to %lu",
return;
}
/* Validate that LDC ID has not changed */
return;
}
return;
}
if (curr_ldc_id != prev_ldc_id) {
PRN("Not changing vdisk: "
return;
}
/* Determine whether device path has changed */
&prev_dev) != 0) {
PRN("Error getting previous vdisk \"%s\"",
return;
}
&curr_dev) != 0) {
return;
}
return; /* no relevant (supported) change */
/* Remove old state, which will close vdisk and reset */
/* Re-initialize vdisk with new state */
return;
}
}
static int
{
int i;
return (MDEG_FAILURE);
return (MDEG_SUCCESS);
}
static int
{
/* MDEG specification for a (particular) vds node */
static mdeg_prop_spec_t vds_prop_spec[] = {
{MDET_PROP_VAL, "cfg-handle", {0}},
{MDET_LIST_END, NULL, {0}}};
/* MDEG specification for matching a vd node */
static md_prop_match_t vd_prop_spec[] = {
{MDET_LIST_END, NULL}};
int status;
/*
* The "cfg-handle" property of a vds node in an MD contains the MD's
* notion of "instance", or unique identifier, for that node; OBP
* stores the value of the "cfg-handle" MD property as the value of
* the "reg" property on the node in the device tree it builds from
* the MD and passes to Solaris. Thus, we look up the devinfo node's
* "reg" property value to uniquely identify this device instance when
* registering with the MD event-generation framework. If the "reg"
* property cannot be found, the device tree state is presumably so
* broken that there is no point in continuing.
*/
return (DDI_FAILURE);
}
/* Get the MD instance for later MDEG registration */
reg_prop, -1);
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
sizeof (void *));
return (DDI_FAILURE);
}
/* Register for MD updates */
PRN("Unable to register for MD updates");
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
int status;
PR0("Entered");
switch (cmd) {
case DDI_ATTACH:
return (status);
case DDI_RESUME:
/* nothing to do for this non-device */
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
}
DEVO_REV, /* devo_rev */
0, /* devo_refcnt */
ddi_no_info, /* devo_getinfo */
nulldev, /* devo_identify */
nulldev, /* devo_probe */
vds_attach, /* devo_attach */
vds_detach, /* devo_detach */
nodev, /* devo_reset */
NULL, /* devo_cb_ops */
NULL, /* devo_bus_ops */
nulldev /* devo_power */
};
"virtual disk server v%I%",
&vds_ops,
};
static struct modlinkage modlinkage = {
&modldrv,
};
int
_init(void)
{
int i, status;
return (status);
return (status);
}
/* Fill in the bit-mask of server-supported operations */
for (i = 0; i < vds_noperations; i++)
return (0);
}
int
{
}
int
_fini(void)
{
int status;
PR0("Entered");
return (status);
return (0);
}