vds.c revision 4bac220845f606f60663ed6f3a2b88caa00ae87e
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Virtual disk server
*/
#include <sys/pathname.h>
#include <sys/sysmacros.h>
#include <sys/vio_common.h>
#include <sys/vdsk_mailbox.h>
#include <sys/vdsk_common.h>
/* Virtual disk server initialization flags */
#define VDS_LDI 0x01
#define VDS_MDEG 0x02
/* Virtual disk server tunable parameters */
#define VDS_LDC_RETRIES 3
#define VDS_NCHAINS 32
/* Identification parameters for MD, synthetic dkio(7i) structures, etc. */
#define VDS_NAME "virtual-disk-server"
#define VD_NAME "vd"
#define VD_VOLUME_NAME "vdisk"
#define VD_ASCIILABEL "Virtual Disk"
#define VD_CHANNEL_ENDPOINT "channel-endpoint"
#define VD_ID_PROP "id"
#define VD_BLOCK_DEVICE_PROP "vds-block-device"
/* Virtual disk initialization flags */
#define VD_LOCKING 0x01
#define VD_LDC 0x02
#define VD_DRING 0x04
#define VD_SID 0x08
#define VD_SEQ_NUM 0x10
/*
* unfortunately, this convention does not appear to be codified.
*/
#define VD_ENTIRE_DISK_SLICE 2
/* Return a cpp token as a string */
/*
* Print a message prefixed with the current function name to the message log
* (and optionally to the console for verbose boots); these macros use cpp's
* concatenation of string literals and C99 variable-length-argument-list
* macros
*/
/* Return a pointer to the "i"th vdisk dring element */
#define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \
/* Return the virtual disk client's type as a string (for use in messages) */
"unsupported client")))
/* Debugging macros */
#ifdef DEBUG
#define VD_DUMP_DRING_ELEM(elem) \
PRN("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \
#else /* !DEBUG */
#define PR0(...)
#define PR1(...)
#define PR2(...)
#define VD_DUMP_DRING_ELEM(elem)
#endif /* DEBUG */
/*
* Soft state structure for a vds instance
*/
typedef struct vds {
} vds_t;
/*
* Types of descriptor-processing tasks
*/
typedef enum vd_task_type {
VD_NONFINAL_RANGE_TASK, /* task for intermediate descriptor in range */
VD_FINAL_RANGE_TASK, /* task for last in a range of descriptors */
/*
* Structure describing the task for processing a descriptor
*/
typedef struct vd_task {
int index; /* dring elem index for task */
} vd_task_t;
/*
* Soft state structure for a virtual disk instance
*/
typedef struct vd {
} vd_t;
typedef struct vds_operation {
typedef struct vd_ioctl {
const char *operation_name; /* vdisk operation name */
int cmd; /* corresponding ioctl cmd */
const char *cmd_name; /* ioctl cmd name */
void *arg; /* ioctl cmd argument */
/* convert input vd_buf to output ioctl_arg */
/* convert input ioctl_arg to output vd_buf */
} vd_ioctl_t;
#define VD_IDENTITY ((void (*)(void *, void *))-1)
static int vds_ldc_retries = VDS_LDC_RETRIES;
static void *vds_state;
static int vd_open_flags = VD_OPEN_FLAGS;
/*
* Supported protocol version pairs, from highest (newest) to lowest (oldest)
*
* Each supported major version should appear only once, paired with (and only
* with) its highest supported minor version number (as the protocol requires
* supporting all lower minor version numbers as well)
*/
static const size_t vds_num_versions =
sizeof (vds_version)/sizeof (vds_version[0]);
#ifdef DEBUG
static int vd_msglevel;
#endif /* DEBUG */
static int
{
return (EINVAL); /* no service for trivial requests */
PR1("%s %lu bytes at block %lu",
/* Map memory exported by client */
if (status != 0) {
return (status);
}
if (status != 0) {
return (status);
}
/* Start the block I/O */
return (EINPROGRESS); /* will complete on completionq */
/* Clean up after error */
if (rv) {
}
if (rv) {
}
return (status);
}
static int
{
retry++) {
}
if (status != 0) {
return (status);
PRN("ldc_write() performed only partial write");
return (EIO);
}
return (0);
}
static void
{
}
/*
* Reset the state of the connection with a client, if needed; reset the LDC
* transport as well, if needed. This function should only be called from the
* "startq", as it waits for tasks on the "completionq" and will deadlock if
* called from that queue.
*/
static void
{
int status = 0;
if (!vd->reset_state) {
return;
}
/*
* Let any asynchronous I/O complete before possibly pulling the rug
* out from under it; defer checking vd->reset_ldc, as one of the
* asynchronous tasks might set it
*/
/* Free all dring_task memory handles */
}
}
static int
{
int status;
/* Acquire the element */
return (status);
}
/* Set the element's status and mark it done */
if (accepted) {
} else {
/* Perhaps client timed out waiting for I/O... */
}
/* Release the element */
return (status);
}
}
static void
vd_complete_bio(void *arg)
{
int status = 0;
/* Wait for the I/O to complete */
/* Release the buffer */
if (status) {
PRN("ldc_mem_release() returned errno %d copying to client",
status);
}
/* Unmap the memory */
if (status) {
PRN("ldc_mem_unmap() returned errno %d copying to client",
status);
}
/* Update the dring element for a dring client */
/*
* If a transport error occurred, arrange to "nack" the message when
* the final task in the descriptor element range completes
*/
if (status != 0)
/*
* Only the final task for a range of elements will respond to and
* free the message
*/
return;
/*
* Send the "ack" or "nack" back to the client; if sending the message
* via LDC fails, arrange to reset both the connection state and LDC
* itself
*/
PR1("Sending %s",
/* Free the message now that it has been used for the reply */
}
static void
{
}
static void
{
}
static void
{
}
static void
{
}
static void
{
}
static void
{
int len;
}
static void
{
}
static void
{
}
static int
{
if (status == 0) {
return (0);
return (status);
}
if (status) {
return (status);
}
return (0);
}
static int
{
switch (vd->vdisk_label) {
case VD_DISK_LABEL_VTOC:
switch (cmd) {
case DKIOCGGEOM:
return (0);
case DKIOCGVTOC:
return (0);
default:
return (ENOTSUP);
}
case VD_DISK_LABEL_EFI:
switch (cmd) {
case DKIOCGETEFI:
return (EINVAL);
return (0);
default:
return (ENOTSUP);
}
default:
return (ENOTSUP);
}
}
static int
{
/* Get data from client and convert, if necessary */
PR1("Getting \"arg\" data from client");
LDC_COPY_IN)) != 0) {
PRN("ldc_mem_copy() returned errno %d "
"copying from client", status);
return (status);
}
/* Convert client's data, if necessary */
else /* convert client vdisk operation data to ioctl data */
}
/*
* Handle single-slice block devices internally; otherwise, have the
* real driver perform the ioctl()
*/
return (status);
return (status);
}
#ifdef DEBUG
if (rval != 0) {
PRN("%s set rval = %d, which is not being returned to client",
}
#endif /* DEBUG */
/* Convert data and send to client, if necessary */
PR1("Sending \"arg\" data to client");
/* Convert ioctl data to vdisk operation data, if necessary */
LDC_COPY_OUT)) != 0) {
PRN("ldc_mem_copy() returned errno %d "
"copying to client", status);
return (status);
}
}
return (status);
}
/*
* Open any slices which have become non-empty as a result of performing a
* set-VTOC operation for the client.
*
* When serving a full disk, vds attempts to exclusively open all of the
* disk's slices to prevent another thread or process in the service domain
* from "stealing" a slice or from performing I/O to a slice while a vds
* client is accessing it. Unfortunately, underlying drivers, such as sd(7d)
* and cmdk(7d), return an error when attempting to open the device file for a
* slice which is currently empty according to the VTOC. This driver behavior
* means that vds must skip opening empty slices when initializing a vdisk for
* full-disk service and try to open slices that become non-empty (via a
* set-VTOC operation) during use of the full disk in order to begin serving
* such slices to the client. This approach has an inherent (and therefore
* unavoidable) race condition; it also means that failure to open a
* newly-non-empty slice has different semantics than failure to open an
* initially-non-empty slice: Due to driver bahavior, opening a
* newly-non-empty slice is a necessary side effect of vds performing a
* (successful) set-VTOC operation for a client on an in-service (and in-use)
* disk in order to begin serving the slice; failure of this side-effect
* operation does not mean that the client's set-VTOC operation failed or that
* operations on other slices must fail. Therefore, this function prints an
* error message on failure to open a slice, but does not return an error to
* its caller--unlike failure to open a slice initially, which results in an
* error that prevents serving the vdisk (and thereby requires an
* administrator to resolve the problem). Note that, apart from another
* thread or process opening a new slice during the race-condition window,
* failure to open a slice in this function will likely indicate an underlying
* drive problem, which will also likely become evident in errors returned by
* operations on other slices, and which will require administrative
* intervention and possibly servicing the drive.
*/
static void
{
int status;
/* Get the (new) partitions for updated slice sizes */
&vd->vdisk_label)) != 0) {
return;
}
/* Open any newly-non-empty slices */
/* Skip zero-length slices */
continue;
}
/* Skip already-open slices */
continue;
PRN("ldi_open_by_dev() returned errno %d "
}
}
}
static int
{
int i, status;
vd_ioctl_t ioctl[] = {
/* Command (no-copy) operations */
/* "Get" (copy-out) operations */
/* "Set" (copy-in) operations */
};
/*
* Determine ioctl corresponding to caller's "operation" and
* validate caller's "nbytes"
*/
for (i = 0; i < nioctls; i++) {
/* LDC memory operations require 8-byte multiples */
break;
PRN("%s: Expected at least nbytes = %lu, "
return (EINVAL);
}
PRN("%s: Expected nbytes = %lu, got %lu",
return (EINVAL);
}
break;
}
}
return (status);
}
static int
{
PR1("Get Device ID");
/* the most common failure is that no devid is available */
return (ENOENT);
}
/* LDC memory operations require 8-byte multiples */
LDC_COPY_OUT)) != 0) {
PRN("ldc_mem_copy() returned errno %d copying to client",
status);
}
return (status);
}
/*
* Define the supported operations once the functions for performing them have
* been defined
*/
static const vds_operation_t vds_operation[] = {
};
static const size_t vds_noperations =
(sizeof (vds_operation))/(sizeof (vds_operation[0]));
/*
* Process a task specifying a client I/O request
*/
static int
{
int i, status;
/* Range-check slice */
PRN("Invalid \"slice\" %u (max %u) for virtual disk",
return (EINVAL);
}
/* Find the requested operation */
for (i = 0; i < vds_noperations; i++)
break;
if (i == vds_noperations) {
return (ENOTSUP);
}
/* Start the operation */
return (0); /* but request completed */
}
PRN("Unexpected return of EINPROGRESS "
"with no I/O completion handler");
return (0); /* but request completed */
}
/* Queue a task to complete the operation */
/* ddi_taskq_dispatch(9f) guarantees success with DDI_SLEEP */
PR1("Operation in progress");
return (EINPROGRESS); /* completion handler will finish request */
}
/*
* Return true if the "type", "subtype", and "env" fields of the "tag" first
* argument match the corresponding remaining arguments; otherwise, return false
*/
{
}
/*
* by this server.
*/
static boolean_t
{
for (int i = 0; i < vds_num_versions; i++) {
ASSERT((i == 0) ||
/*
* If the major versions match, adjust the minor version, if
* necessary, down to the highest value supported by this
* server and return true so this message will get "ack"ed;
* the client should also support all minor versions lower
* than the value it sent
*/
PR0("Adjusting minor version from %u to %u",
}
return (B_TRUE);
}
/*
* If the message contains a higher major version number, set
* and return false, so this message will get "nack"ed with
* these values, and the client will potentially try again
* with the same or a lower version
*/
return (B_FALSE);
}
/*
* Otherwise, the message's major version is less than the
* current major version, so continue the loop to the next
* (lower) supported version
*/
}
/*
* No common version was found; "ground" the version pair in the
* message to terminate negotiation
*/
return (B_FALSE);
}
/*
* Process a version message from a client. vds expects to receive version
* messages from clients seeking service, but never issues version messages
* itself; therefore, vds can ACK or NACK client version messages, but does
* not expect to receive version-message ACKs or NACKs (and will treat such
* messages as invalid).
*/
static int
{
VIO_VER_INFO)) {
return (ENOMSG); /* not a version message */
}
PRN("Expected %lu-byte version message; "
return (EBADMSG);
}
PRN("Expected device class %u (disk); received %u",
return (EBADMSG);
}
/*
* We're talking to the expected kind of client; set our device class
*/
/*
* Check whether the (valid) version message specifies a version
* supported by this server. If the version is not supported, return
* EBADMSG so the message will get "nack"ed; vds_supported_version()
* will have updated the message with a supported version for the
* client to consider
*/
if (!vds_supported_version(ver_msg))
return (EBADMSG);
/*
* A version has been agreed upon; use the client's SID for
* communication on this channel now
*/
/*
* When multiple versions are supported, this function should store
* the negotiated major and minor version values in the "vd" data
* structure to govern further communication; in particular, note that
* the client might have specified a lower minor version for the
* agreed major version than specifed in the vds_version[] array. The
* following assertions should help remind future maintainers to make
* the appropriate changes to support multiple versions.
*/
PR0("Using major version %u, minor version %u",
return (0);
}
static int
{
VIO_ATTR_INFO)) {
PR0("Message is not an attribute message");
return (ENOMSG);
}
PRN("Expected %lu-byte attribute message; "
return (EBADMSG);
}
if (attr_msg->max_xfer_sz == 0) {
PRN("Received maximum transfer size of 0 from client");
return (EBADMSG);
}
PRN("Client requested unsupported transfer mode");
return (EBADMSG);
}
/* Success: valid message and transfer mode */
/*
* The vd_dring_inband_msg_t contains one cookie; need room
* for up to n-1 more cookies, where "n" is the number of full
* pages plus possibly one partial page required to cover
* "max_xfer_sz". Add room for one more cookie if
* "max_xfer_sz" isn't an integral multiple of the page size.
* Must first get the maximum transfer size in bytes.
*/
sizeof (vd_dring_inband_msg_t) +
(sizeof (ldc_mem_cookie_t)));
/*
* Set the maximum expected message length to
* accommodate in-band-descriptor messages with all
* their cookies
*/
/*
* Initialize the data structure for processing in-band I/O
* request descriptors
*/
}
/* Return the device's block size and max transfer size to the client */
return (0);
}
static int
{
int status;
VIO_DRING_REG)) {
PR0("Message is not a register-dring message");
return (ENOMSG);
}
PRN("Expected at least %lu-byte register-dring message; "
return (EBADMSG);
}
PRN("Expected %lu-byte register-dring message; "
return (EBADMSG);
}
PRN("A dring was previously registered; only support one");
return (EBADMSG);
}
PRN("reg_msg->num_descriptors = %u; must be <= %u (%s)",
return (EBADMSG);
}
/*
* In addition to fixing the assertion in the success case
* below, supporting drings which require more than one
* "cookie" requires increasing the value of vd->max_msglen
* somewhere in the code path prior to receiving the message
* which results in calling this function. Note that without
* making this change, the larger message size required to
* accommodate multiple cookies cannot be successfully
* received, so this function will not even get called.
* Gracefully accommodating more dring cookies might
* reasonably demand exchanging an additional attribute or
* making a minor protocol adjustment
*/
return (EBADMSG);
}
if (status != 0) {
return (status);
}
/*
* To remove the need for this assertion, must call
* ldc_mem_dring_nextcookie() successfully ncookies-1 times after a
* successful call to ldc_mem_dring_map()
*/
if ((status =
return (status);
}
PRN("Descriptor ring virtual address is NULL");
return (ENXIO);
}
/* Initialize for valid message and mapped dring */
PR1("descriptor size = %u, dring length = %u",
/*
* Allocate and initialize a "shadow" array of data structures for
* tasks to process I/O requests in dring elements
*/
vd->dring_task =
if (status) {
return (ENXIO);
}
}
return (0);
}
static int
{
VIO_DRING_UNREG)) {
PR0("Message is not an unregister-dring message");
return (ENOMSG);
}
PRN("Expected %lu-byte unregister-dring message; "
return (EBADMSG);
}
PRN("Expected dring ident %lu; received %lu",
return (EBADMSG);
}
return (0);
}
static int
{
PR0("Message is not an RDX message");
return (ENOMSG);
}
if (msglen != sizeof (vio_rdx_msg_t)) {
PRN("Expected %lu-byte RDX message; received %lu bytes",
sizeof (vio_rdx_msg_t), msglen);
return (EBADMSG);
}
PR0("Valid RDX message");
return (0);
}
static int
{
PRN("Received seq_num %lu; expected %lu",
return (1);
}
return (0);
}
/*
* Return the expected size of an inband-descriptor message with all the
* cookies it claims to include
*/
static size_t
{
return ((sizeof (*msg)) +
}
/*
* Process an in-band descriptor message: used with clients like OBP, with
* which vds exchanges descriptors within VIO message payloads, rather than
* operating on them within a descriptor ring
*/
static int
{
VIO_DESC_DATA)) {
PR1("Message is not an in-band-descriptor message");
return (ENOMSG);
}
PRN("Expected at least %lu-byte descriptor message; "
return (EBADMSG);
}
PRN("Expected %lu-byte descriptor message; "
return (EBADMSG);
}
return (EBADMSG);
/*
* Valid message: Set up the in-band descriptor task and process the
* request. Arrange to acknowledge the client's message, unless an
* error processing the descriptor task results in setting
* VIO_SUBTYPE_NACK
*/
PR1("Valid in-band-descriptor message");
}
static int
{
int status;
/* Accept the updated dring element */
return (status);
}
if (ready) {
} else {
}
return (status);
}
if (!ready)
return (EBUSY);
/* Initialize a task and process the accepted element */
return (status);
}
static int
{
/*
* Arrange to acknowledge the client's message, unless an error
* processing one of the dring elements results in setting
* VIO_SUBTYPE_NACK
*/
/*
* Process the dring elements in the range
*/
if (status == EINPROGRESS)
inprogress = B_TRUE;
else if (status != 0)
break;
}
/*
* If some, but not all, operations of a multi-element range are in
* progress, wait for other operations to complete before returning
* (which will result in "ack" or "nack" of the message). Note that
* all outstanding operations will need to complete, not just the ones
* corresponding to the current range of dring elements; howevever, as
* this situation is an error case, performance is less critical.
*/
return (status);
}
static int
{
VIO_DRING_DATA)) {
PR1("Message is not a dring-data message");
return (ENOMSG);
}
PRN("Expected %lu-byte dring message; received %lu bytes",
return (EBADMSG);
}
return (EBADMSG);
PRN("Expected dring ident %lu; received ident %lu",
return (EBADMSG);
}
PRN("\"start_idx\" = %u; must be less than %u",
return (EBADMSG);
}
PRN("\"end_idx\" = %u; must be >= 0 and less than %u",
return (EBADMSG);
}
/* Valid message; process range of updated dring elements */
PR1("Processing descriptor range, start = %u, end = %u",
}
static int
{
retry++) {
}
if (status != 0) {
return (status);
} else if (*nbytes == 0) {
PR1("ldc_read() returned 0 and no message read");
return (ENOMSG);
}
return (0);
}
static int
{
int status;
/*
* Validate session ID up front, since it applies to all messages
* once set
*/
return (EBADMSG);
}
/*
* Process the received message based on connection state
*/
case VD_STATE_INIT: /* expect version message */
return (status);
/* Version negotiated, move to that state */
return (0);
case VD_STATE_VER: /* expect attribute message */
return (status);
/* Attributes exchanged, move to that state */
return (0);
case VD_STATE_ATTR:
case VIO_DESC_MODE: /* expect RDX message */
return (status);
/* Ready to receive in-band descriptors */
return (0);
case VIO_DRING_MODE: /* expect register-dring message */
if ((status =
return (status);
/* One dring negotiated, move to that state */
return (0);
default:
ASSERT("Unsupported transfer mode");
PRN("Unsupported transfer mode");
return (ENOTSUP);
}
case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */
/* Ready to receive data */
return (0);
return (status);
}
/*
* If another register-dring message is received, stay in
* dring state in case the client sends RDX; although the
* protocol allows multiple drings, this server does not
* support using more than one
*/
if ((status =
return (status);
/*
* Acknowledge an unregister-dring message, but reset the
* connection anyway: Although the protocol allows
* unregistering drings, this server cannot serve a vdisk
* without its only dring
*/
case VD_STATE_DATA:
case VIO_DESC_MODE: /* expect in-band-descriptor message */
case VIO_DRING_MODE: /* expect dring-data or unreg-dring */
/*
* Typically expect dring-data messages, so handle
* them first
*/
return (status);
/*
* Acknowledge an unregister-dring message, but reset
* the connection anyway: Although the protocol
* allows unregistering drings, this server cannot
* serve a vdisk without its only dring
*/
default:
ASSERT("Unsupported transfer mode");
PRN("Unsupported transfer mode");
return (ENOTSUP);
}
default:
ASSERT("Invalid client connection state");
PRN("Invalid client connection state");
return (ENOTSUP);
}
}
static int
{
int status;
/*
* Check that the message is at least big enough for a "tag", so that
* message processing can proceed based on tag-specified message type
*/
if (msglen < sizeof (vio_msg_tag_t)) {
/* Can't "nack" short message, so drop the big hammer */
return (EBADMSG);
}
/*
* Process the message
*/
case 0:
/* "ack" valid, successfully-processed messages */
break;
case EINPROGRESS:
/* The completion handler will "ack" or "nack" the message */
return (EINPROGRESS);
case ENOMSG:
PRN("Received unexpected message");
case EBADMSG:
case ENOTSUP:
/* "nack" invalid messages */
break;
default:
/* "nack" failed messages */
/* An LDC error probably occurred, so try resetting it */
break;
}
/* Send the "ack" or "nack" to the client */
PR1("Sending %s",
/* Arrange to reset the connection for nack'ed or failed messages */
return (status);
}
static boolean_t
{
return (enabled);
}
static void
vd_recv_msg(void *arg)
{
int status = 0;
PR2("New task to receive incoming message(s)");
/*
* Receive and process a message
*/
0) {
continue; /* handler will free msg */
/* Probably an LDC failure; arrange to reset it */
}
}
PR2("Task finished");
}
static uint_t
{
if (!vd_enabled(vd))
return (LDC_SUCCESS);
if (event & LDC_EVT_RESET) {
PR0("LDC channel was reset");
return (LDC_SUCCESS);
}
if (event & LDC_EVT_UP) {
PR0("LDC channel came up: Resetting client connection state");
}
if (event & LDC_EVT_READ) {
int status;
PR1("New data available");
/* Queue a task to receive the new data */
/* ddi_taskq_dispatch(9f) guarantees success with DDI_SLEEP */
}
return (LDC_SUCCESS);
}
static uint_t
{
return (MH_WALK_TERMINATE);
}
static int
{
uint_t vd_present = 0;
switch (cmd) {
case DDI_DETACH:
/* the real work happens below */
break;
case DDI_SUSPEND:
PR0("No action required for DDI_SUSPEND");
return (DDI_SUCCESS);
default:
PRN("Unrecognized \"cmd\"");
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
/* Do no detach when serving any vdisks */
if (vd_present) {
PR0("Not detaching because serving vdisks");
return (DDI_FAILURE);
}
PR0("Detaching");
return (DDI_SUCCESS);
}
static boolean_t
{
return (B_TRUE);
}
return (B_FALSE);
}
static int
{
/*
* At this point, vdisk_size is set to the size of partition 2 but
* this does not represent the size of the disk because partition 2
* may not cover the entire disk and its size does not include reserved
* blocks. So we update vdisk_size to be the size of the entire disk.
*/
PRN("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d",
status);
return (status);
}
/* Set full-disk parameters */
/* Move dev number and LDI handle to entire-disk-slice array elements */
/* Initialize device numbers for remaining slices and open them */
/*
* Skip the entire-disk slice, as it's already open and its
* device known
*/
if (slice == VD_ENTIRE_DISK_SLICE)
continue;
/*
* Construct the device number for the current slice
*/
/*
* At least some underlying drivers refuse to open
* devices for (currently) zero-length slices, so skip
* them for now
*/
continue;
}
/*
* Open all non-empty slices of the disk to serve them to the
* client. Slices are opened exclusively to prevent other
* threads or processes in the service domain from performing
* I/O to slices being accessed by a client. Failure to open
* a slice results in vds not serving this disk, as the client
* could attempt (and should be able) to access any non-empty
* slice immediately. Any slices successfully opened before a
* failure will get closed by vds_destroy_vd() as a result of
* the error returned by this function.
*/
PR0("Opening device major %u, minor %u = slice %u",
PRN("ldi_open_by_dev() returned errno %d "
/* vds_destroy_vd() will close any open slices */
return (status);
}
}
return (0);
}
static int
{
int length;
return (0);
}
static int
{
/*
* We need to open with FNDELAY so that opening an empty partition
* does not fail.
*/
return (status);
}
/*
* nslices must be updated now so that vds_destroy_vd() will close
* the slice we have just opened in case of an error.
*/
/* Get device number and size of backing device */
PRN("ldi_get_dev() returned errno %d for %s",
return (status);
}
return (EIO);
}
/* Verify backing device supports dk_cinfo, dk_geom, and vtoc */
&rval)) != 0) {
PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s",
return (status);
}
PRN("slice %u >= maximum slice %u for %s",
return (EIO);
}
if (status != 0) {
PRN("vd_read_vtoc returned errno %d for %s",
return (status);
}
PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s",
return (status);
}
/* Store the device's max transfer size for return to the client */
/* Determine if backing device is a pseudo device */
return (EIO);
}
return (0); /* ...and we're done */
}
/* If slice is entire-disk slice, initialize for full disk */
return (vd_setup_full_disk(vd));
/* Otherwise, we have a non-entire slice of a device */
return (status);
}
/* Initialize dk_geom structure for single-slice device */
return (EIO);
}
return (EIO);
}
/* Initialize vtoc structure for single-slice device */
return (0);
}
static int
{
char tq_name[TASKQ_NAMELEN];
int status;
PRN("No memory for virtual disk");
return (EAGAIN);
}
/* Open vdisk and initialize parameters */
return (status);
PR0("vdisk_type = %s, pseudo = %s, nslices = %u",
/* Initialize locking */
&iblock) != DDI_SUCCESS) {
PRN("Could not get iblock cookie.");
return (EIO);
}
/* Create start and completion task queues for the vdisk */
TASKQ_DEFAULTPRI, 0)) == NULL) {
PRN("Could not create task queue");
return (EIO);
}
TASKQ_DEFAULTPRI, 0)) == NULL) {
PRN("Could not create task queue");
return (EIO);
}
/* Bring up LDC */
return (status);
}
return (status);
}
return (status);
}
/* Allocate the inband task memory handle */
if (status) {
return (ENXIO);
}
/* Add the successfully-initialized vdisk to the server's table */
return (EIO);
}
return (0);
}
/*
* Destroy the state associated with a virtual disk
*/
static void
vds_destroy_vd(void *arg)
{
return;
PR0("Destroying vdisk state");
/* Disable queuing requests for the vdisk */
}
/* Drain and destroy start queue (*before* destroying completionq) */
/* Drain and destroy completion queue (*before* shutting down LDC) */
/* Free all dring_task memory handles */
}
/* Free the inband task memory handle */
/* Shut down LDC */
}
/* Close any open backing-device slices */
}
}
/* Free lock */
/* Finally, free the vdisk structure itself */
}
static int
{
int status;
#ifdef lint
(void) vd;
#endif /* lint */
return (status);
}
static int
{
int num_channels;
/* Look for channel endpoint child(ren) of the vdisk MD node */
return (-1);
}
/* Get the "id" value for the first channel endpoint node */
PRN("No \"%s\" property found for \"%s\" of vdisk",
return (-1);
}
if (num_channels > 1) {
PRN("Using ID of first of multiple channels for this vdisk");
}
return (0);
}
static int
{
PRN("Invalid node count in Machine Description subtree");
return (-1);
}
return (status);
}
static void
{
char *device_path = NULL;
return;
}
&device_path) != 0) {
return;
}
return;
}
return;
}
}
static void
{
PRN("Unable to get \"%s\" property from vdisk's MD node",
return;
}
}
static void
{
/* Validate that vdisk ID has not changed */
PRN("Error getting previous vdisk \"%s\" property",
return;
}
return;
}
PRN("Not changing vdisk: ID changed from %lu to %lu",
return;
}
/* Validate that LDC ID has not changed */
return;
}
return;
}
if (curr_ldc_id != prev_ldc_id) {
PRN("Not changing vdisk: "
return;
}
/* Determine whether device path has changed */
&prev_dev) != 0) {
PRN("Error getting previous vdisk \"%s\"",
return;
}
&curr_dev) != 0) {
return;
}
return; /* no relevant (supported) change */
/* Remove old state, which will close vdisk and reset */
/* Re-initialize vdisk with new state */
return;
}
}
static int
{
int i;
return (MDEG_FAILURE);
return (MDEG_SUCCESS);
}
static int
{
/* MDEG specification for a (particular) vds node */
static mdeg_prop_spec_t vds_prop_spec[] = {
{MDET_PROP_VAL, "cfg-handle", {0}},
{MDET_LIST_END, NULL, {0}}};
/* MDEG specification for matching a vd node */
static md_prop_match_t vd_prop_spec[] = {
{MDET_LIST_END, NULL}};
int status;
/*
* The "cfg-handle" property of a vds node in an MD contains the MD's
* notion of "instance", or unique identifier, for that node; OBP
* stores the value of the "cfg-handle" MD property as the value of
* the "reg" property on the node in the device tree it builds from
* the MD and passes to Solaris. Thus, we look up the devinfo node's
* "reg" property value to uniquely identify this device instance when
* registering with the MD event-generation framework. If the "reg"
* property cannot be found, the device tree state is presumably so
* broken that there is no point in continuing.
*/
return (DDI_FAILURE);
}
/* Get the MD instance for later MDEG registration */
reg_prop, -1);
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
sizeof (void *));
return (DDI_FAILURE);
}
/* Register for MD updates */
PRN("Unable to register for MD updates");
return (DDI_FAILURE);
}
/* Prevent auto-detaching so driver is available whenever MD changes */
PRN("failed to set \"%s\" property for instance %u",
}
return (DDI_SUCCESS);
}
static int
{
int status;
switch (cmd) {
case DDI_ATTACH:
PR0("Attaching");
return (status);
case DDI_RESUME:
PR0("No action required for DDI_RESUME");
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
}
DEVO_REV, /* devo_rev */
0, /* devo_refcnt */
ddi_no_info, /* devo_getinfo */
nulldev, /* devo_identify */
nulldev, /* devo_probe */
vds_attach, /* devo_attach */
vds_detach, /* devo_detach */
nodev, /* devo_reset */
NULL, /* devo_cb_ops */
NULL, /* devo_bus_ops */
nulldev /* devo_power */
};
"virtual disk server v%I%",
&vds_ops,
};
static struct modlinkage modlinkage = {
&modldrv,
};
int
_init(void)
{
int i, status;
return (status);
return (status);
}
/* Fill in the bit-mask of server-supported operations */
for (i = 0; i < vds_noperations; i++)
return (0);
}
int
{
}
int
_fini(void)
{
int status;
return (status);
return (0);
}