vdc.c revision 1ae0874509b6811fdde1dfd46f0d93fd09867a3f
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* LDoms virtual disk client (vdc) device driver
*
* This driver runs on a guest logical domain and communicates with the virtual
* disk server (vds) driver running on the service domain which is exporting
* virtualized "disks" to the guest logical domain.
*
* The driver can be divided into four sections:
*
* 1) generic device driver housekeeping
* _init, _fini, attach, detach, ops structures, etc.
*
* 2) communication channel setup
* Setup the communications link over the LDC channel that vdc uses to
* talk to the vDisk server. Initialise the descriptor ring which
* allows the LDC clients to transfer data via memory mappings.
*
* 3) Support exported to upper layers (filesystems, etc)
* The upper layers call into vdc via strategy(9E) and DKIO(7I)
* ioctl calls. vdc will copy the data to be written to the descriptor
* ring or maps the buffer to store the data read by the vDisk
* server into the descriptor ring. It then sends a message to the
* vDisk server requesting it to complete the operation.
*
* 4) Handling responses from vDisk server.
* The vDisk server will ACK some or all of the messages vdc sends to it
* (this is configured during the handshake). Upon receipt of an ACK
* vdc will check the descriptor ring and signal to the upper layer
* code waiting on the IO.
*/
#include <sys/efi_partition.h>
#include <sys/mach_descrip.h>
#include <sys/archsystm.h>
#include <sys/sysmacros.h>
#include <sys/vio_common.h>
#include <sys/vio_mailbox.h>
#include <sys/vdsk_common.h>
#include <sys/vdsk_mailbox.h>
/*
* function prototypes
*/
/* standard driver functions */
/* setup */
/* handshake with vds */
static void vdc_init_handshake_negotiation(void *arg);
static int vdc_handle_ver_negotiate();
static int vdc_handle_attr_negotiate();
/* processing */
static void vdc_process_msg(void *arg);
num_msgs);
/* dkio */
/*
* Module variables
*/
static int vdc_retries = VDC_RETRIES;
/* Soft state pointer */
static void *vdc_state;
int vdc_msglevel = 0;
static void
{
}
static struct cb_ops vdc_cb_ops = {
vdc_open, /* cb_open */
vdc_close, /* cb_close */
vdc_strategy, /* cb_strategy */
vdc_print, /* cb_print */
vdc_dump, /* cb_dump */
vdc_read, /* cb_read */
vdc_write, /* cb_write */
vdc_ioctl, /* cb_ioctl */
nodev, /* cb_devmap */
nodev, /* cb_mmap */
nodev, /* cb_segmap */
nochpoll, /* cb_chpoll */
ddi_prop_op, /* cb_prop_op */
NULL, /* cb_str */
CB_REV, /* cb_rev */
vdc_aread, /* cb_aread */
vdc_awrite /* cb_awrite */
};
DEVO_REV, /* devo_rev */
0, /* devo_refcnt */
vdc_getinfo, /* devo_getinfo */
nulldev, /* devo_identify */
nulldev, /* devo_probe */
vdc_attach, /* devo_attach */
vdc_detach, /* devo_detach */
nodev, /* devo_reset */
&vdc_cb_ops, /* devo_cb_ops */
NULL, /* devo_bus_ops */
nulldev /* devo_power */
};
"virtual disk client %I%",
&vdc_ops,
};
static struct modlinkage modlinkage = {
&modldrv,
};
/* -------------------------------------------------------------------------- */
/*
* Device Driver housekeeping and setup
*/
int
_init(void)
{
int status;
return (status);
return (status);
}
int
{
}
int
_fini(void)
{
int status;
return (status);
return (0);
}
static int
{
switch (cmd) {
case DDI_INFO_DEVT2DEVINFO:
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
case DDI_INFO_DEVT2INSTANCE:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
}
static int
{
int instance;
int rv;
switch (cmd) {
case DDI_DETACH:
/* the real work happens below */
break;
case DDI_SUSPEND:
/* nothing to do for this non-device */
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
vdc_msg("%s[%d]: Could not get state structure.",
return (DDI_FAILURE);
}
PR0("%s[%d]: Cannot detach: device is open",
return (DDI_FAILURE);
}
/*
* try and disable callbacks to prevent another handshake
*/
/*
* Prevent any more attempts to start a handshake with the vdisk
* server and tear down the existing connection.
*/
PR0("%s[%d]: Waiting for thread to exit\n",
break;
}
}
}
}
return (DDI_SUCCESS);
}
static int
{
int instance;
int status;
vdc_msg("%s:(%d): Couldn't alloc state structure",
return (DDI_FAILURE);
}
vdc_msg("%s:(%d): Could not get state structure.",
return (DDI_FAILURE);
}
/*
* We assign the value to initialized in this case to zero out the
* variable and then set bits in it to indicate what has been done
*/
vdc->session_id = 0;
instance);
return (DDI_FAILURE);
}
/* initialise LDC channel which will be used to communicate with vds */
if (vdc_do_ldc_init(vdc) != 0) {
return (DDI_FAILURE);
}
/* Bring up connection with vds via LDC */
if (status != 0) {
return (DDI_FAILURE);
}
/*
* We need to wait until the handshake has completed before leaving
* the attach(). This is to allow the device node(s) to be created
* and the first usage of the filesystem to succeed.
*/
PR0("%s[%d] handshake in progress [VD %d (LDC %d)]\n",
VD_GET_TIMEOUT_HZ(1));
if (status == -1) {
if (retries >= vdc_retries) {
PR0("%s[%d] Give up handshake wait.\n",
return (DDI_FAILURE);
} else {
PR0("%s[%d] Retry #%d for handshake.\n",
retries++;
}
}
}
VD_OP_GET_VTOC, FKIOCTL, 0);
if (status) {
return (status);
}
/*
* Now that we have the device info we can create the
* device nodes and properties
*/
if (status) {
instance);
return (status);
}
if (status) {
" properties", instance);
return (status);
}
return (status);
}
static int
{
int status;
switch (cmd) {
case DDI_ATTACH:
return (status);
case DDI_RESUME:
/* nothing to do for this non-device */
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
}
static int
{
int status = 0;
return (EIO);
}
if (status != 0) {
return (status);
}
}
if (status != 0) {
return (status);
}
if (status != 0) {
return (status);
}
}
/*
* At this stage we have initialised LDC, we will now try and open
* the connection.
*/
if (status != 0) {
return (status);
}
}
return (status);
}
static int
{
int status = 0;
return (0);
}
switch (status) {
case ECONNREFUSED: /* listener not ready at other end */
PR0("%s: ldc_up(%d,...) return %d\n",
status = 0;
break;
default:
"channel=%ld, err=%d",
}
}
return (status);
}
/*
* Function:
* vdc_create_device_nodes
*
* Description:
* This function creates the block and character device nodes under
* /devices along with the node properties. It is called as part of
* the attach(9E) of the instance during the handshake with vds after
* vds has sent the attributes to vdc.
*
* If the device is of type VD_DISK_TYPE_SLICE then the minor node
* of 2 is used in keeping with the Solaris convention that slice 2
* refers to a whole disk. Slices start at 'a'
*
* Parameters:
* vdc - soft state pointer
*
* Return Values
* 0 - Success
* EIO - Failed to create node
* EINVAL - Unknown type of disk exported
*/
static int
{
/* uses NNNN which is OK as long as # of disks <= 10000 */
char name[sizeof ("disk@NNNN:s,raw")];
int instance;
int num_slices = 1;
int i;
switch (vdc->vdisk_type) {
case VD_DISK_TYPE_DISK:
break;
case VD_DISK_TYPE_SLICE:
num_slices = 1;
break;
case VD_DISK_TYPE_UNK:
default:
return (EINVAL);
}
for (i = 0; i < num_slices; i++) {
vdc_msg("%s[%d]: Couldn't add block node %s.",
return (EIO);
}
/* if any device node is created we set this flag */
'a' + i, ",raw");
vdc_msg("%s[%d]: Could not add raw node %s.",
return (EIO);
}
}
return (0);
}
/*
* Function:
* vdc_create_device_nodes_props
*
* Description:
* This function creates the block and character device nodes under
* /devices along with the node properties. It is called as part of
* the attach(9E) of the instance during the handshake with vds after
* vds has sent the attributes to vdc.
*
* Parameters:
* vdc - soft state pointer
*
* Return Values
* 0 - Success
* EIO - Failed to create device node property
* EINVAL - Unknown type of disk exported
*/
static int
{
int instance;
int num_slices = 1;
int rv;
int i;
" No VTOC available", instance);
return (ENXIO);
}
switch (vdc->vdisk_type) {
case VD_DISK_TYPE_DISK:
break;
case VD_DISK_TYPE_SLICE:
num_slices = 1;
break;
case VD_DISK_TYPE_UNK:
default:
return (EINVAL);
}
for (i = 0; i < num_slices; i++) {
VD_MAKE_DEV(instance, i));
PR0("%s[%d] sz %ld (%ld Mb) p_size %lx\n",
if (rv != DDI_PROP_SUCCESS) {
vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n",
return (EIO);
}
if (rv != DDI_PROP_SUCCESS) {
return (EIO);
}
}
return (0);
}
static int
{
int instance;
int status = 0;
return (EINVAL);
return (ENXIO);
}
/*
* Check to see if we can communicate with vds
*/
return (ENOLINK);
}
return (0);
}
static int
{
int instance;
return (EINVAL);
return (ENXIO);
}
/*
* Check to see if we can communicate with vds
*/
return (ETIMEDOUT);
}
if (vdc->dkio_flush_pending) {
PR0("%s[%d]: Cannot detach: %d outstanding DKIO flushes",
return (EBUSY);
}
/*
* Should not need the mutex here, since the framework should protect
* against more opens on this device, but just in case.
*/
return (0);
}
static int
{
}
static int
{
return (0);
}
static int
{
int rv = 0;
return (ENXIO);
}
return (rv);
}
/* -------------------------------------------------------------------------- */
/*
* Disk access routines
*
*/
/*
* vdc_strategy()
*
* Return Value:
* 0: As per strategy(9E), the strategy() function must return 0
* [ bioerror(9f) sets b_flags to the proper error code ]
*/
static int
{
int rv = -1;
PR1("%s: %s %ld bytes at block %ld : b_addr=0x%p",
return (0);
}
return (0);
}
return (0);
}
static int
{
PR1("vdc_read(): Entered");
}
static int
{
PR1("vdc_write(): Entered");
}
static int
{
PR1("vdc_aread(): Entered");
}
static int
{
PR1("vdc_awrite(): Entered");
}
/* -------------------------------------------------------------------------- */
/*
* Handshake support
*/
/*
* vdc_init_handshake_negotiation
*
* Description:
* This function is called to trigger the handshake negotiations between
* the client (vdc) and the server (vds). It may be called multiple times.
*
* Parameters:
* vdc - soft state pointer
*/
static void
{
/*
* Do not continue if another thread has triggered a handshake which
* is in progress or detach() has stopped further handshakes.
*/
PR0("%s[%d] Negotiation not triggered. [init=%x]\n",
return;
}
PR0("Initializing vdc<->vds handshake\n");
if (state == VD_STATE_INIT) {
(void) vdc_init_ver_negotiation(vdc);
} else if (state == VD_STATE_VER) {
(void) vdc_init_attr_negotiation(vdc);
} else if (state == VD_STATE_ATTR) {
(void) vdc_init_dring_negotiate(vdc);
} else if (state == VD_STATE_DATA) {
/*
* nothing to do - we have already completed the negotiation
* and we can transmit data when ready.
*/
PR0("%s[%d] Negotiation triggered after handshake completed",
}
}
static int
{
int status = -1;
/*
* set the Session ID to a unique value
* (the lower 32 bits of the clock tick)
*/
PR0("%s[%d] vdc_send failed: id(%lx) rv(%d) size(%d)\n",
if (msglen != sizeof (vio_ver_msg_t))
}
return (status);
}
static int
{
int status;
/* fill in tag */
/* fill in payload */
PR0("%s[%d] ldc_write failed: id(%lx) rv(%d) size (%d)\n",
if (msglen != sizeof (vio_ver_msg_t))
}
return (status);
}
static int
{
int status = -1;
PR0("%s[%d] Init of descriptor ring completed (status = %d)\n",
if (status != 0) {
return (status);
}
/* fill in tag */
/* fill in payload */
pkt.dring_ident = 0;
if (status != 0) {
PR0("%s[%d] Failed to register DRing (status = %d)\n",
}
return (status);
}
/* -------------------------------------------------------------------------- */
/*
* LDC helper routines
*/
/*
* Function:
* vdc_send()
*
* Description:
* The function encapsulates the call to write a message using LDC.
* If LDC indicates that the call failed due to the queue being full,
* we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise
* we return the error returned by LDC.
*
* Arguments:
* ldc_handle - LDC handle for the channel this instance of vdc uses
* pkt - address of LDC message to be sent
* msglen - the size of the message being sent. When the function
* returns, this contains the number of bytes written.
*
* Return Code:
* 0 - Success.
* EINVAL - pkt or msglen were NULL
* ECONNRESET - The connection was not up.
* EWOULDBLOCK - LDC queue is full
* xxx - other error codes returned by ldc_write
*/
static int
{
int retries = 0;
int status = 0;
do {
/* return the last size written */
return (status);
}
/*
* Function:
* vdc_get_ldc_id()
*
* Description:
* This function gets the 'ldc-id' for this particular instance of vdc.
* The id returned is the guest domain channel endpoint LDC uses for
* communication with vds.
*
* Arguments:
* dip - dev info pointer for this instance of the device driver.
* ldc_id - pointer to variable used to return the 'ldc-id' found.
*
* Return Code:
* 0 - Success.
* ENOENT - Expected node or property did not exist.
* ENXIO - Unexpected error communicating with MD framework
*/
static int
{
int num_nodes;
int num_vdevs;
int num_chans;
int listsz;
int idx;
int obp_inst;
*ldc_id = 0;
/*
* Get the OBP instance number for comparison with the MD instance
*
* The "cfg-handle" property of a vdc node in an MD contains the MD's
* notion of "instance", or unique identifier, for that node; OBP
* stores the value of the "cfg-handle" MD property as the value of
* the "reg" property on the node in the device tree it builds from
* the MD and passes to Solaris. Thus, we look up the devinfo node's
* "reg" property value to uniquely identify this device instance.
* If the "reg" property cannot be found, the device tree state is
* presumably so broken that there is no point in continuing.
*/
return (ENOENT);
}
OBP_REG, -1);
/*
* We now walk the MD nodes and if an instance of a vdc node matches
* the instance got from OBP we get the ldc-id property.
*/
return (ENXIO);
}
/* allocate memory for nodes */
/*
* Search for all the virtual devices, we will then check to see which
* ones are disk nodes.
*/
if (num_vdevs <= 0) {
goto done;
}
continue;
}
PR1("%s[%d] vdc inst# in MD=%d\n",
found_inst = B_TRUE;
break;
}
}
}
if (found_inst == B_FALSE) {
goto done;
}
/* get the channels for this node */
/* expecting at least one channel */
if (num_chans <= 0) {
goto done;
} else if (num_chans != 1) {
PR0("%s[%d] Expected 1 '%s' node for '%s' port, found %d\n",
}
/*
* We use the first channel found (index 0), irrespective of how
* many are there in total.
*/
}
done:
if (chanp)
if (listp)
(void) md_fini_handle(mdp);
return (status);
}
/*
* vdc_is_able_to_tx_data()
*
* Description:
* This function checks if we are able to send data to the
* vDisk server (vds). The LDC connection needs to be up and
* vdc & vds need to have completed the handshake negotiation.
*
* Parameters:
* vdc - soft state pointer
* flag - flag to indicate if we can block or not
* [ If O_NONBLOCK or O_NDELAY (which are defined in
* open(2)) are set then do not block)
*
* Return Values
* B_TRUE - can talk to vds
* B_FALSE - unable to talk to vds
*/
static boolean_t
{
int rv = -1;
return (B_TRUE);
PR0("%s[%d] Not ready to tx - state %d LDC state %d\n",
return (B_FALSE);
}
/*
* We want to check and see if any negotiations triggered earlier
* have succeeded. We are prepared to wait a little while in case
* they are still in progress.
*/
PR0("%s: Waiting for connection at state %d (LDC state %d)\n",
/*
* An rv of -1 indicates that we timed out without the LDC
* state changing so it looks like the other side (vdc) is
* not yet ready/responding.
*
* Any other value of rv indicates that the LDC triggered an
* interrupt so we just loop again, check the handshake state
* and keep waiting if necessary.
*/
if (rv == -1) {
if (retries >= vdc_retries) {
PR0("%s[%d] handshake wait timed out.\n",
return (B_FALSE);
} else {
PR1("%s[%d] Retry #%d for handshake timedout\n",
retries++;
}
}
}
return (B_TRUE);
}
static void
{
}
}
}
}
static void
{
int status;
PR0("%s[%d] ldc_reset() = %d\n",
}
}
/* -------------------------------------------------------------------------- */
/*
* Descriptor Ring helper routines
*/
static int
{
int status = -1;
int i;
&vdc->ldc_dring_hdl);
return (status);
}
if (status != 0) {
PR0("%s: Failed to bind descriptor ring (%p) to channel (%p)\n",
return (status);
}
if (status != 0) {
PR0("%s: Failed to get info for descriptor ring (%p)\n",
return (status);
}
/* Allocate the local copy of this dring */
KM_SLEEP);
/*
* Mark all DRing entries as free and init priv desc memory handles
* If any entry is initialized, we need to free it later so we set
* the bit in 'initialized' at the start.
*/
for (i = 0; i < VD_DRING_LEN; i++) {
if (status != 0) {
return (status);
}
}
/*
* We init the index of the last DRing entry used. Since the code to
* get the next available entry increments it before selecting one,
* we set it to the last DRing entry so that it wraps around to zero
* for the 1st entry to be used.
*/
return (status);
}
static void
{
int status = -1;
int i; /* loop */
for (i = 0; i < VD_DRING_LEN; i++) {
(void) ldc_mem_free_handle(mhdl);
sizeof (ldc_mem_handle_t));
}
}
VD_DRING_LEN * sizeof (vdc_local_desc_t));
}
if (status == 0) {
} else {
vdc_msg("%s: Failed to unbind Descriptor Ring (%lx)\n",
vdc->ldc_dring_hdl);
}
}
if (status == 0) {
} else {
vdc_msg("%s: Failed to destroy Descriptor Ring (%lx)\n",
vdc->ldc_dring_hdl);
}
}
}
/*
* vdc_get_next_dring_entry_idx()
*
* Description:
* This function gets the index of the next Descriptor Ring entry available
*
* Return Value:
* 0 <= rv < VD_DRING_LEN Next available slot
* -1 DRing is full
*/
static int
{
int idx = -1;
int start_idx = 0;
/* Start at the last entry used */
/*
* Loop through Descriptor Ring checking for a free entry until we reach
* the entry we started at. We should never come close to filling the
* Ring at any stage, instead this is just to prevent an entry which
* gets into an inconsistent state (e.g. due to a request timing out)
* from blocking progress.
*/
do {
/* Get the next entry after the last known index tried */
return (idx);
PR0("%s: Entry %d waiting to be accepted\n",
continue;
PR0("%s: Entry %d waiting to be processed\n",
continue;
PR0("%s: Entry %d done but not marked free\n",
/*
* If we are currently panicking, interrupts are
* disabled and we will not be getting ACKs from the
* vDisk server so we mark the descriptor ring entries
* as FREE here instead of in the ACK handler.
*/
if (panicstr) {
}
continue;
} else {
vdc_msg("Public Descriptor Ring entry corrupted");
return (-1);
}
return (-1);
}
/*
* Function:
* vdc_populate_descriptor
*
* Description:
* This routine writes the data to be transmitted to vds into the
* descriptor, notifies vds that the ring has been updated and
* then waits for the request to be processed.
*
* Arguments:
* vdc - the soft state pointer
* addr - start address of memory region.
* operation - operation we want vds to perform (VD_OP_XXX)
* arg - parameter to be sent to server (depends on VD_OP_XXX type)
* . mode for ioctl(9e)
* . LP64 diskaddr_t (block I/O)
* slice - the disk slice this request is for
*
* Return Codes:
* 0
* EAGAIN
* EFAULT
* ENXIO
* EIO
*/
static int
{
int idx = 0; /* Index of DRing entry used */
int status = 0;
int rv;
int retries = 0;
/*
* Get next available DRing entry.
*/
if (idx == -1) {
vdc_msg("%s[%d]: no descriptor ring entry avail, seq=%d\n",
/*
* Since strategy should not block we don't wait for the DRing
* to empty and instead return
*/
return (EAGAIN);
}
/*
* Wait for anybody still using the DRing entry to finish.
* (e.g. still waiting for vds to respond to a request)
*/
switch (operation) {
case VD_OP_BREAD:
case VD_OP_BWRITE:
break;
case VD_OP_FLUSH:
case VD_OP_GET_VTOC:
case VD_OP_SET_VTOC:
case VD_OP_GET_DISKGEOM:
case VD_OP_SET_DISKGEOM:
case VD_OP_SCSICMD:
if (nbytes > 0) {
}
break;
default:
}
if (rv != 0) {
return (rv);
}
/*
* fill in the data details into the DRing
*/
/*
* Send a msg with the DRing details to vds
*/
PR1("ident=0x%llx, st=%d, end=%d, seq=%d req=%d dep=%p\n",
if (status != 0) {
return (EAGAIN);
}
/*
* XXX - potential performance enhancement (Investigate at a later date)
*
* for calls from strategy(9E), instead of waiting for a response from
* vds, we could return at this stage and let the ACK handling code
* trigger the biodone(9F)
*/
/*
* When a guest is panicking, the completion of requests needs to be
* handled differently because interrupts are disabled and vdc
* will not get messages. We have to poll for the messages instead.
*/
if (ddi_in_panic()) {
int start = 0;
retries = 0;
for (;;) {
&msglen);
if (status) {
break;
}
/*
* if there are no packets wait and check again
*/
if (retries++ > vdc_dump_retries) {
PR0("[%d] Giving up waiting, idx %d\n",
break;
}
continue;
}
/*
* DRing requests.
*/
PR0("discarding pkt: type=%d sub=%d env=%d\n",
continue;
}
/*
* set the appropriate return value for the
* current request.
*/
case VIO_SUBTYPE_ACK:
status = 0;
break;
case VIO_SUBTYPE_NACK:
break;
default:
continue;
}
if (start >= VD_DRING_LEN) {
PR0("[%d] Bogus ack data : start %d\n",
continue;
}
PR1("[%d] Dumping start=%d idx=%d state=%d\n",
PR0("[%d] Entry @ %d - state !DONE %d\n",
continue;
}
/*
* We want to process all Dring entries up to
* the current one so that we can return an
* error with the correct request.
*/
PR0("[%d] Looping: start %d, idx %d\n",
continue;
}
/* exit - all outstanding requests are completed */
break;
}
return (status);
}
/*
* Now watch the DRing entries we modified to get the response
* from vds.
*/
/* debug info when dumping state on vds side */
}
return (status);
}
static int
{
int retries = 0;
int rv = 0;
if (rv == -1) {
/*
* If they persist in ignoring us we'll storm off in a
* huff and return ETIMEDOUT to the upper layers.
*/
if (retries >= vdc_retries) {
PR0("%s: Finished waiting on entry %d\n",
break;
} else {
retries++;
PR0("%s[%d]: Timeout #%d on entry %d "
"[seq %d][req %d]\n", __func__,
}
PR0("%s[%d]: vds has accessed entry %d [seq %d]"
"[req %d] but not ack'ed it yet\n",
continue;
}
/*
* we resend the message as it may have been dropped
* and have never made it to the other side (vds).
* (We reuse the original message but update seq ID)
*/
retries = 0;
&msglen);
if (status != 0) {
vdc_msg("%s: Error (%d) while resending after "
break;
}
}
}
return (status);
}
static int
{
int idx = -1;
return (0);
case VD_OP_BREAD:
case VD_OP_BWRITE:
/* call bioxxx */
break;
default:
/* signal waiter */
break;
}
/* Clear the DRing entry */
/* loop accounting to get next DRing entry */
idx++;
}
return (status);
}
static int
{
/*
* If the upper layer passed in a misaligned address we copied the
* data into an aligned buffer before sending it to LDC - we now
* copy it back to the original buffer.
*/
if (ldep->align_addr) {
}
if (status != 0) {
}
return (status);
}
static int
int operation)
{
int perm = LDC_MEM_RW;
int rv = 0;
int i;
switch (operation) {
case VD_OP_BREAD:
break;
case VD_OP_BWRITE:
break;
case VD_OP_FLUSH:
case VD_OP_GET_VTOC:
case VD_OP_SET_VTOC:
case VD_OP_GET_DISKGEOM:
case VD_OP_SET_DISKGEOM:
case VD_OP_SCSICMD:
perm = LDC_MEM_RW;
break;
default:
ASSERT(0); /* catch bad programming in vdc */
}
/*
* LDC expects any addresses passed in to be 8-byte aligned. We need
* to copy the contents of any misaligned buffers to a newly allocated
* buffer and bind it instead (and copy the the contents back to the
* original buffer passed in when depopulating the descriptor)
*/
ldep->align_addr =
PR0("%s[%d] Misaligned address %lx reallocating "
"(buf=%lx entry=%d)\n",
}
PR1("%s[%d] bound mem handle; ncookies=%d\n",
if (rv != 0) {
vdc_msg("%s[%d] failed to ldc_mem_bind_handle "
"(mhdl=%lx, buf=%lx entry=%d err=%d)\n",
if (ldep->align_addr) {
}
return (EAGAIN);
}
/*
* Get the other cookies (if any).
*/
if (rv != 0) {
(void) ldc_mem_unbind_handle(mhdl);
vdc_msg("%s: failed to get next cookie(mhdl=%lx "
if (ldep->align_addr) {
}
return (EAGAIN);
}
}
return (rv);
}
/*
* Interrupt handlers for messages from LDC
*/
static uint_t
{
int rv = 0;
PR1("%s[%d] event=%x seqID=%d\n",
/*
* Depending on the type of event that triggered this callback,
* we modify the handhske state or read the data.
*
* NOTE: not done as a switch() as event could be triggered by
* a state change and a read request. Also the ordering of the
* check for the event types is deliberate.
*/
if (event & LDC_EVT_UP) {
/* get LDC state */
if (rv != 0) {
return (LDC_SUCCESS);
}
/*
* Reset the transaction sequence numbers when LDC comes up.
* We then kick off the handshake negotiation with the vDisk
* server.
*/
vdc->seq_num_reply = 0;
}
if (event & LDC_EVT_READ) {
/*
* Wake up the worker thread to process the message
*/
return (LDC_SUCCESS);
}
if (event & LDC_EVT_RESET) {
}
if (event & LDC_EVT_DOWN) {
/* get LDC state */
if (rv != 0) {
}
}
return (LDC_SUCCESS);
}
/* -------------------------------------------------------------------------- */
/*
* The following functions process the incoming messages from vds
*/
static void
{
int status = 0;
/* check if there is data */
if ((status != 0) &&
" server. Cannot check LDC queue: %d",
continue;
}
if (q_is_empty == B_FALSE) {
}
}
PR0("Message processing thread stopped\n");
thread_exit();
}
/*
* Function:
* vdc_process_msg()
*
* Description:
* This function is called by the message processing thread each time it
* is triggered when LDC sends an interrupt to indicate that there are
* more packets on the queue. When it is called it will continue to loop
* and read the messages until there are no more left of the queue. If it
* encounters an invalid sized message it will drop it and check the next
* message.
*
* Arguments:
* arg - soft state pointer for this instance of the device driver.
*
* Return Code:
* None.
*/
static void
vdc_process_msg(void *arg)
{
int status;
for (;;) {
/* read all messages - until no more left */
if (status) {
/* if status is ECONNRESET --- reset vdc state */
}
return;
}
return;
}
if (nbytes == 0) {
PR2("%s[%d]: ldc_read() done..\n",
return;
}
/*
* Verify the Session ID of the message
*
* Every message after the Version has been negotiated should
* have the correct session ID set.
*/
PR0("%s: Incorrect SID 0x%x msg 0x%lx, expected 0x%x\n",
vdc->session_id);
return;
}
case VIO_TYPE_CTRL:
break;
case VIO_TYPE_DATA:
break;
case VIO_TYPE_ERR:
break;
default:
break;
}
if (status != 0) {
PR0("%s[%d] Error (%d) occcurred processing msg\n",
}
}
}
/*
* Function:
* vdc_process_ctrl_msg()
*
* Description:
* This function is called by the message processing thread each time
* an LDC message with a msgtype of VIO_TYPE_CTRL is received.
*
* Arguments:
* vdc - soft state pointer for this instance of the device driver.
* msg - the LDC message sent by vds
*
* Return Codes:
* 0 - Success.
* EPROTO - A message was received which shouldn't have happened according
* to the protocol
* ENOTSUP - An action which is allowed according to the protocol but which
* isn't (or doesn't need to be) implemented yet.
* EINVAL - An invalid value was returned as part of a message.
*/
static int
{
int status = -1;
/* Depending on which state we are in; process the message */
case VD_STATE_INIT:
break;
}
case VIO_SUBTYPE_ACK:
break;
case VIO_SUBTYPE_NACK:
/*
* For now there is only one version number so we
* cannot step back to an earlier version but in the
* future we may need to add further logic here
* to try negotiating an earlier version as the VIO
* design allow for it.
*/
/*
* vds could not handle the version we sent so we just
* stop negotiating.
*/
break;
case VIO_SUBTYPE_INFO:
/*
* Handle the case where vds starts handshake
* (for now only vdc is the instigatior)
*/
break;
default:
break;
}
break;
case VD_STATE_VER:
break;
}
case VIO_SUBTYPE_ACK:
/*
* We now verify the attributes sent by vds.
*/
/*
* Future support: step down to the block size
* and max transfer size suggested by the
* server. (If this value is less than 128K
* then multiple Dring entries per request
* would need to be implemented)
*/
break;
}
vdc_msg("%s[%d] Couldn't process attrs "
break;
}
break;
case VIO_SUBTYPE_NACK:
/*
* vds could not handle the attributes we sent so we
* stop negotiating.
*/
break;
case VIO_SUBTYPE_INFO:
/*
* Handle the case where vds starts the handshake
* (for now; vdc is the only supported instigatior)
*/
break;
default:
break;
}
break;
case VD_STATE_ATTR:
break;
}
case VIO_SUBTYPE_ACK:
/* Verify that we have sent all the descr. ring info */
/* nop for now as we have just 1 dring */
/* save the received dring_ident */
PR0("%s[%d] Received dring ident=0x%lx\n",
/*
* Send an RDX message to vds to indicate we are ready
* to send data
*/
&msglen);
if (status != 0) {
break;
}
if (status != 0) {
break;
}
break;
case VIO_SUBTYPE_NACK:
/*
* vds could not handle the DRing info we sent so we
* stop negotiating.
*/
break;
case VIO_SUBTYPE_INFO:
/*
* Handle the case where vds starts handshake
* (for now only vdc is the instigatior)
*/
break;
default:
}
break;
case VD_STATE_RDX:
break;
}
status = 0;
break;
default:
break;
}
return (status);
}
/*
* Function:
* vdc_process_data_msg()
*
* Description:
* This function is called by the message processing thread each time it
* a message with a msgtype of VIO_TYPE_DATA is received. It will either
* be an ACK or NACK from vds[1] which vdc handles as follows.
* ACK - wake up the waiting thread
* NACK - resend any messages necessary
*
* [1] Although the message format allows it, vds should not send a
* VIO_SUBTYPE_INFO message to vdc asking it to read data; if for
* some bizarre reason it does, vdc will reset the connection.
*
* Arguments:
* vdc - soft state pointer for this instance of the device driver.
* msg - the LDC message sent by vds
*
* Return Code:
* 0 - Success.
* > 0 - error value returned by LDC
*/
static int
{
int status = 0;
uint_t i;
/*
* Check to see if the message has bogus data
*/
vdc_msg("%s: Bogus ACK data : start %d, end %d\n",
return (EPROTO);
}
/*
* calculate the number of messages that vds ACK'ed
*
* Assumes, (like the rest of vdc) that there is a 1:1 mapping
* between requests and Dring entries.
*/
/*
* Verify that the sequence number is what vdc expects.
*/
return (ENXIO);
}
case VIO_SUBTYPE_ACK:
/*
* Wake the thread waiting for each DRing entry ACK'ed
*/
for (i = 0; i < num_msgs; i++) {
}
break;
case VIO_SUBTYPE_NACK:
/* Resend necessary messages */
for (i = 0; i < num_msgs; i++) {
PR0("%s[%d]: Won't resend entry %d [flag=%d]\n",
break;
}
/* we'll reuse the message passed in */
&msglen);
if (status != 0) {
break;
}
}
break;
case VIO_SUBTYPE_INFO:
default:
break;
}
return (status);
}
/*
* Function:
* vdc_process_err_msg()
*
* NOTE: No error messages are used as part of the vDisk protocol
*/
static int
{
return (status);
}
/*
* Function:
* vdc_verify_seq_num()
*
* Description:
* This functions verifies that the sequence number sent back by vds with
* the latest message correctly follows the last request processed.
*
* Arguments:
* vdc - soft state pointer for this instance of the driver.
* dring_msg - pointer to the LDC message sent by vds
* num_msgs - the number of requests being acknowledged
*
* Return Code:
* B_TRUE - Success.
* B_FALSE - The seq numbers are so out of sync, vdc cannot deal with them
*/
static boolean_t
{
/*
* Check to see if the messages were responded to in the correct
* order by vds. There are 3 possible scenarios:
* - the seq_num we expected is returned (everything is OK)
* - a seq_num earlier than the last one acknowledged is returned,
* if so something is seriously wrong so we reset the connection
* - a seq_num greater than what we expected is returned.
*/
vdc_msg("%s[%d]: Bogus seq_num %d, expected %d\n",
return (B_FALSE);
} else {
/*
* vds has responded with a seq_num greater than what we
* expected
*/
return (B_FALSE);
}
}
return (B_TRUE);
}
/* -------------------------------------------------------------------------- */
/*
* DKIO(7) support
*
* XXX FIXME - needs to be converted to use the structures defined in the
* latest VIO spec to communicate with the vDisk server.
*/
typedef struct vdc_dk_arg {
struct dk_callback dkc;
int mode;
} vdc_dk_arg_t;
/*
* Function:
* vdc_dkio_flush_cb()
*
* Description:
* This routine is a callback for DKIOCFLUSHWRITECACHE which can be called
* by kernel code.
*
* Arguments:
* arg - a pointer to a vdc_dk_arg_t structure.
*/
void
vdc_dkio_flush_cb(void *arg)
{
int rv;
return;
}
if (rv != 0) {
PR0("%s[%d] DKIOCFLUSHWRITECACHE failed : model %x\n",
return;
}
/*
* Trigger the call back to notify the caller the the ioctl call has
* been completed.
*/
}
/* Indicate that one less DKIO write flush is outstanding */
}
/*
* This structure is used in the DKIO(7I) array below.
*/
typedef struct vdc_dk_ioctl {
int cmd; /* Solaris ioctl operation number */
/* to 64bit struct (zero otherwise) */
/*
* Subset of DKIO(7I) operations currently supported
*/
static vdc_dk_ioctl_t dk_ioctl[] = {
0, 0},
{VD_OP_GET_WCE, DKIOCGETWCE, 0,
0, 0},
{VD_OP_SET_WCE, DKIOCSETWCE, 0,
0, 0},
sizeof (struct dk_geom), 0},
sizeof (struct dk_geom), 0},
sizeof (struct dk_geom), 0},
sizeof (struct dk_geom), 0},
sizeof (struct dk_geom), 0},
sizeof (struct uscsi_cmd), sizeof (struct uscsi_cmd32)},
{0, DKIOCINFO, VD_COPYOUT,
sizeof (struct dk_cinfo), 0},
{0, DKIOCGMEDIAINFO, VD_COPYOUT,
sizeof (struct dk_minfo), 0},
{0, DKIOCREMOVABLE, 0,
0, 0},
{0, CDROMREADOFFSET, 0,
0, 0}
};
/*
* Function:
* vd_process_ioctl()
*
* Description:
* This routine is the driver entry point for handling user
* requests to get the device geometry.
*
* Arguments:
* dev - the device number
* cmd - the operation [dkio(7I)] to be processed
* arg - pointer to user provided structure
* (contains data to be set or reference parameter for get)
* mode - bit flag, indicating open settings, 32/64 bit type, etc
* rvalp - calling process return value, used in some ioctl calls
* (passed throught to vds who fills in the value)
*
* Assumptions:
* vds will make the ioctl calls in the 64 bit address space so vdc
* the copyin or copyout.
*
* Return Code:
* 0
* EFAULT
* ENXIO
* EIO
* ENOTSUP
*/
static int
{
int rv = -1;
int idx = 0; /* index into dk_ioctl[] */
PR0("%s: Processing ioctl(%x) for dev %x : model %x\n",
instance);
return (ENXIO);
}
/*
* Check to see if we can communicate with the vDisk server
*/
return (ENOLINK);
}
/*
* Validate the ioctl operation to be performed.
*
* If we have looped through the array without finding a match then we
* don't support this ioctl.
*/
break;
}
PR0("%s[%d] Unsupported ioctl(%x)\n",
return (ENOTSUP);
}
/*
* Some ioctl operations have different sized structures for 32 bit
* and 64 bit. If the userland caller is 32 bit, we need to check
* to see if the operation is one of those special cases and
* will make the call as 64 bit.
*/
}
/*
* Deal with the ioctls which the server does not provide.
*/
switch (cmd) {
case CDROMREADOFFSET:
case DKIOCREMOVABLE:
return (ENOTTY);
case DKIOCINFO:
{
return (ENXIO);
if (rv != 0)
return (EFAULT);
return (0);
}
case DKIOCGMEDIAINFO:
return (ENXIO);
if (rv != 0)
return (EFAULT);
return (0);
}
/* catch programming error in vdc - should be a VD_OP_XXX ioctl */
/* LDC requires that the memory being mapped is 8-byte aligned */
PR1("%s[%d]: struct size %d alloc %d\n",
if (alloc_len != 0)
return (EINVAL);
}
if (rv != 0) {
return (EFAULT);
}
/*
* some operations need the data to be converted from 32 bit
* to 64 bit structures so that vds can process them on the
* other side.
*/
if (do_convert_32to64) {
switch (cmd) {
case DKIOCSVTOC:
{
break;
}
case USCSICMD:
{
struct uscsi_cmd32 *uscmd32;
/*
* Convert the ILP32 uscsi data from the
* application to LP64 for internal use.
*/
break;
}
default:
break;
}
}
}
/*
* handle the special case of DKIOCFLUSHWRITECACHE
*/
if (cmd == DKIOCFLUSHWRITECACHE) {
/* no mem should have been allocated hence no need to free it */
/*
* If arg is NULL, we break here and the call operates
* synchronously; waiting for vds to return.
*
* i.e. after the request to vds returns successfully,
* all writes completed prior to the ioctl will have been
* flushed from the disk write cache to persistent media.
*/
/* put the request on a task queue */
}
}
/*
* send request to vds to service the ioctl.
*/
if (rv != 0) {
/*
* This is not necessarily an error. The ioctl could
* be returning a value such as ENOTTY to indicate
* that the ioctl is not applicable.
*/
PR0("%s[%d]: vds returned %d for ioctl 0x%x\n",
return (rv);
}
/*
* If the VTOC has been changed, then vdc needs to update the copy
* it saved in the soft state structure and try and update the device
* node properties. Failing to set the properties should not cause
* an error to be return the caller though.
*/
if (cmd == DKIOCSVTOC) {
if (vdc_create_device_nodes_props(vdc)) {
" properties", instance);
}
}
/*
* if we don't have to do a copyout, we have nothing left to do
* so we just return.
*/
return (0);
}
/* sanity check */
return (EFAULT);
/*
* some operations need the data to be converted from 64 bit
* back to 32 bit structures after vds has processed them.
*/
if (do_convert_32to64) {
switch (cmd) {
case DKIOCGVTOC:
{
break;
}
case USCSICMD:
{
struct uscsi_cmd32 *uc32;
len = sizeof (struct uscsi_cmd32);
break;
}
default:
PR1("%s[%d]: This mode (%x) should just work for(%x)\n",
break;
}
}
if (rv != 0) {
vdc_msg("%s[%d]: Could not do copy out for ioctl (%x)\n",
}
return (rv);
}
/*
* Function:
* vdc_create_fake_geometry()
*
* Description:
* This routine fakes up the disk info needed for some DKIO ioctls.
* - DKIOCINFO
* - DKIOCGMEDIAINFO
*
* [ just like lofi(7D) and ramdisk(7D) ]
*
* Arguments:
* vdc - soft state pointer for this instance of the device driver.
*
* Return Code:
* 0 - Success
*/
static int
{
/*
* DKIOCINFO support
*/
/*
* The partition number will be created on the fly depending on the
* actual slice (i.e. minor node) that is used to request the data.
*/
/*
* DKIOCGMEDIAINFO support
*/
return (0);
}