xdf.c revision 5d03b6c34e8209515d5c25d3dfc5609ad2b7b5af
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* xdf.c - Xen Virtual Block Device Driver
* TODO:
* - support alternate block size (currently only DEV_BSIZE supported)
* - revalidate geometry for removable devices
*
* This driver export solaris disk device nodes, accepts IO requests from
* those nodes, and services those requests by talking to a backend device
* in another domain.
*
* Communication with the backend device is done via a ringbuffer (which is
* managed via xvdi interfaces) and dma memory (which is managed via ddi
* interfaces).
*
* Communication with the backend device is dependant upon establishing a
* connection to the backend device. This connection process involves
* reading device configuration information from xenbus and publishing
* some frontend runtime configuration parameters via the xenbus (for
* consumption by the backend). Once we've published runtime configuration
* information via the xenbus, the backend device can enter the connected
* state and we'll enter the XD_CONNECTED state. But before we can allow
* random IO to begin, we need to do IO to the backend device to determine
* the device label and if flush operations are supported. Once this is
* done we enter the XD_READY state and can process any IO operations.
*
* We recieve notifications of xenbus state changes for the backend device
* (aka, the "other end") via the xdf_oe_change() callback. This callback
* is single threaded, meaning that we can't recieve new notification of
* other end state changes while we're processing an outstanding
* notification of an other end state change. There for we can't do any
* blocking operations from the xdf_oe_change() callback. This is why we
* have a seperate taskq (xdf_ready_tq) which exists to do the necessary
* IO to get us from the XD_CONNECTED to the XD_READY state. All IO
* generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go
* throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs
* generated by the xdf_ready_tq_thread thread have priority over all
* other IO requests.
*
* We also communicate with the backend device via the xenbus "media-req"
* (XBP_MEDIA_REQ) property. For more information on this see the
* comments in blkif.h.
*/
#include <sys/sysmacros.h>
#include <sys/mach_mmu.h>
#ifdef XPV_HVM_DRIVER
#include <sys/xpv_support.h>
#else /* !XPV_HVM_DRIVER */
#include <sys/evtchn_impl.h>
#endif /* !XPV_HVM_DRIVER */
/*
* DEBUG_EVAL can be used to include debug only statements without
* having to use '#ifdef DEBUG' statements
*/
#ifdef DEBUG
#define DEBUG_EVAL(x) (x)
#else /* !DEBUG */
#define DEBUG_EVAL(x)
#endif /* !DEBUG */
#define FLUSH_DISKCACHE 0x1
#define WRITE_BARRIER 0x2
#define USE_WRITE_BARRIER(vdp) \
#define USE_FLUSH_DISKCACHE(vdp) \
#define IS_FLUSH_DISKCACHE(bp) \
extern int do_polled_io;
/* run-time tunables that we don't want the compiler to optimize away */
volatile int xdf_debug = 0;
/* per module globals */
static void *xdf_ssp;
static kmem_cache_t *xdf_vreq_cache;
static kmem_cache_t *xdf_gs_cache;
static int xdf_maxphys = XB_MAXPHYS;
static int xdf_fbrewrites; /* flush block re-write count */
/* misc public functions (used by xdf_shell.c) */
int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
/* misc private functions */
static void xdf_io_start(xdf_t *);
/* callbacks from commmon label */
static cmlb_tg_ops_t xdf_lb_ops = {
};
/*
* I/O buffer DMA attributes
* Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most
*/
static ddi_dma_attr_t xb_dma_attr = {
(uint64_t)0, /* lowest address */
XB_BSIZE, /* min transfer */
BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */
XB_BSIZE, /* granularity */
0, /* flags (reserved) */
};
static ddi_device_acc_attr_t xc_acc_attr = {
};
static void
xdf_timeout_handler(void *arg)
{
vdp->xdf_timeout_id = 0;
/* new timeout thread could be re-scheduled */
}
/*
*
* Note: we only register one callback function to grant table subsystem
* since we only have one 'struct gnttab_free_callback' in xdf_t.
*/
static int
{
return (DDI_DMA_CALLBACK_DONE);
}
static ge_slot_t *
{
/* try to alloc GTEs needed in this slot, first */
(void (*)(void *))xdf_dmacallback,
(void *)vdp,
}
return (NULL);
}
if (vdp->xdf_timeout_id == 0)
/* restart I/O after one second */
return (NULL);
}
/* init gs_slot */
return (gs);
}
static void
{
int i;
/* release all grant table entry resources used in this slot */
}
static grant_ref_t
{
return (gr);
}
/*
* Alloc a vreq for this bp
* bp->av_back contains the pointer to the vreq upon return
*/
static v_req_t *
{
if (vdp->xdf_timeout_id == 0)
/* restart I/O after one second */
return (NULL);
}
/* init of other fields in vreq is up to the caller */
return (vreq);
}
static void
{
goto done;
case VREQ_DMAWIN_DONE:
case VREQ_GS_ALLOCED:
case VREQ_DMABUF_BOUND:
/*FALLTHRU*/
case VREQ_DMAMEM_ALLOCED:
if (!ALIGNED_XFER(bp)) {
}
/*FALLTHRU*/
case VREQ_MEMDMAHDL_ALLOCED:
if (!ALIGNED_XFER(bp))
/*FALLTHRU*/
case VREQ_DMAHDL_ALLOCED:
break;
default:
break;
}
done:
}
/*
* Snarf new data if our flush block was re-written
*/
static void
{
int nblks;
return; /* write was a flush write */
}
if (mapin)
}
}
/*
* Initalize the DMA and grant table resources for the buf
*/
static int
{
int rc;
case VREQ_INIT:
if (IS_FLUSH_DISKCACHE(bp)) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/* See if we wrote new data to our flush block */
/*FALLTHRU*/
case VREQ_INIT_DONE:
/*
* alloc DMA handle
*/
if (rc != DDI_SUCCESS) {
return (DDI_FAILURE);
}
/*FALLTHRU*/
case VREQ_DMAHDL_ALLOCED:
/*
* alloc dma handle for 512-byte aligned buf
*/
if (!ALIGNED_XFER(bp)) {
/*
* XXPV: we need to temporarily enlarge the seg
* boundary and s/g length to work round CR6381968
*/
if (rc != DDI_SUCCESS) {
"unaligned buf DMAhandle alloc failed\n",
return (DDI_FAILURE);
}
}
/*FALLTHRU*/
case VREQ_MEMDMAHDL_ALLOCED:
/*
* alloc 512-byte aligned buf
*/
if (!ALIGNED_XFER(bp)) {
if (rc != DDI_SUCCESS) {
"DMA mem allocation failed\n",
return (DDI_FAILURE);
}
}
/*FALLTHRU*/
case VREQ_DMAMEM_ALLOCED:
/*
* dma bind
*/
if (ALIGNED_XFER(bp)) {
} else {
}
/* get num of dma windows */
if (rc == DDI_DMA_PARTIAL_MAP) {
} else {
ndws = 1;
}
} else {
return (DDI_FAILURE);
}
/*FALLTHRU*/
case VREQ_DMABUF_BOUND:
/*
* get ge_slot, callback is set upon failure from gs_get(),
* if not set previously
*/
return (DDI_FAILURE);
}
break;
case VREQ_GS_ALLOCED:
/* nothing need to be done */
break;
case VREQ_DMAWIN_DONE:
/*
* move to the next dma window
*/
/* get a ge_slot for this DMA window */
return (DDI_FAILURE);
}
break;
default:
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
#if defined(XPV_HVM_DRIVER)
#else /* !XPV_HVM_DRIVER */
#endif /* !XPV_HVM_DRIVER */
}
static void
{
if (resid == 0)
}
static void
{
return;
} else {
}
}
static void
{
return;
} else {
}
}
static void
{
return;
}
static void
{
return;
}
int
{
if ((kstat = kstat_create(
return (-1);
/* See comment about locking in xdf_kstat_delete(). */
/* only one kstat can exist at a time */
return (-1);
}
/*
* Now that we've created a kstat, we need to update the waitq and
* runq counts for the kstat to reflect our current state.
*
* For a buf_t structure to be on the runq, it must have a ring
* buffer slot associated with it. To get a ring buffer slot the
* buf must first have a v_req_t and a ge_slot_t associated with it.
* Then when it is granted a ring buffer slot, v_runq will be set to
* true.
*
* For a buf_t structure to be on the waitq, it must not be on the
* runq. So to find all the buf_t's that should be on waitq, we
* walk the active buf list and add any buf_t's which aren't on the
* runq to the waitq.
*/
}
return (0);
}
void
{
/*
* The locking order here is xdf_iostat_lk and then xdf_dev_lk.
* xdf_dev_lk is used to protect the xdf_xdev_iostat pointer
* and the contents of the our kstat. xdf_iostat_lk is used
* to protect the allocation and freeing of the actual kstat.
* xdf_dev_lk can't be used for this purpose because kstat
* readers use it to access the contents of the kstat and
* hence it can't be held when calling kstat_delete().
*/
return;
}
/*
* We're about to destroy the kstat structures, so it isn't really
* necessary to update the runq and waitq counts. But, since this
* isn't a hot code path we can afford to be a little pedantic and
* go ahead and decrement the runq and waitq kstat counters to zero
* before free'ing them. This helps us ensure that we've gotten all
* our accounting correct.
*
* For an explanation of how we determine which buffers go on the
* runq vs which go on the waitq, see the comments in
* xdf_kstat_create().
*/
}
}
/*
* Add an IO requests onto the active queue.
*
* We have to detect IOs generated by xdf_ready_tq_thread. These IOs
* are used to establish a connection to the backend, so they recieve
* priority over all other IOs. Since xdf_ready_tq_thread only does
* synchronous IO, there can only be one xdf_ready_tq_thread request at any
* given time and we record the buf associated with that request in
* xdf_ready_tq_bp.
*/
static void
{
/* new IO requests from the ready thread */
return;
}
/* this is normal IO request */
/* this is only only IO on the active queue */
return;
}
/* add this IO to the tail of the active queue */
}
static void
{
/* we're done with a ready thread IO request */
return;
}
/* we're done with a normal IO request */
/* This IO was at the head of our active queue. */
} else {
/* There IO finished before some other pending IOs. */
}
}
}
static buf_t *
{
/*
* If we're in the XD_CONNECTED state, we only service IOs
* from the xdf_ready_tq_thread thread.
*/
return (NULL);
return (bp);
return (NULL);
}
/* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */
return (NULL);
for (;;) {
return (NULL);
return (bp);
/* advance the active buf index pointer */
}
}
static void
{
if (bioerr != 0)
return;
/* remove this IO from our active queue */
/* Partial transfers are an error */
} else {
}
}
/*
* xdf interrupt handler
*/
static uint_t
{
int bioerr;
return (DDI_INTR_UNCLAIMED);
/*
* complete all requests which have a response
*/
if (status != BLKIF_RSP_OKAY) {
} else {
bioerr = 0;
}
}
return (DDI_INTR_CLAIMED);
}
/*
* xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and
* block at a lower pil.
*/
static uint_t
{
int rv;
if (!do_polled_io)
return (rv);
}
static void
{
return;
"xdf@%s: xdf_ring_push: sent request(s) to backend\n",
}
}
static int
{
if (xdf_debug & SUSRES_DBG)
xen_printf("xdf_ring_drain: start\n");
goto out;
(void) xdf_intr_locked(vdp);
goto out;
/* file-backed devices can be slow */
#ifdef XPV_HVM_DRIVER
(void) HYPERVISOR_yield();
#endif /* XPV_HVM_DRIVER */
}
out:
}
if (xdf_debug & SUSRES_DBG)
xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n",
return (rv);
}
static int
{
int rv;
return (rv);
}
/*
* Destroy all v_req_t, grant table entries, and our ring buffer.
*/
static void
{
return;
}
/*
* We don't want to recieve async notifications from the backend
* when it finishes processing ring entries.
*/
#ifdef XPV_HVM_DRIVER
#else /* !XPV_HVM_DRIVER */
#endif /* !XPV_HVM_DRIVER */
/*
* Drain any requests in the ring. We need to do this before we
* can free grant table entries, because if active ring entries
* point to grants, then the backend could be trying to access
* those grants.
*/
(void) xdf_ring_drain_locked(vdp);
/* We're done talking to the backend so free up our event channel */
/* Free up any grant table entries associaed with this IO */
/* If this IO was on the runq, move it back to the waitq. */
/*
* Reset any buf IO state since we're going to re-issue the
* IO when we reconnect.
*/
}
/* reset the active queue index pointer */
/* Destroy the ring */
}
void
{
}
/*
* Check if we have a pending "eject" media request.
*/
static int
{
if (!vdp->xdf_media_req_supported)
return (B_FALSE);
return (B_FALSE);
return (B_FALSE);
}
return (B_TRUE);
}
/*
* Generate a media request.
*/
static int
{
char *xsname;
/*
* we can't be holding xdf_dev_lk because xenbus_printf() can
* block while waiting for a PIL 1 interrupt message. this
* would cause a deadlock with xdf_intr() which needs to grab
* xdf_dev_lk as well and runs at PIL 5.
*/
return (ENXIO);
/* Check if we support media requests */
return (ENOTTY);
/* If an eject is pending then don't allow any new requests */
if (xdf_eject_pending(vdp))
return (ENXIO);
/* Make sure that there is media present */
return (ENXIO);
/* We only allow operations when the device is ready and connected */
return (EIO);
return (EIO);
return (0);
}
/*
* populate a single blkif_request_t w/ a buf
*/
static void
{
int seg = 0;
if (isread)
else {
switch (vreq->v_flush_diskcache) {
case FLUSH_DISKCACHE:
return;
case WRITE_BARRIER:
break;
default:
else
break;
}
}
/*
* loop until all segments are populated or no more dma cookie in buf
*/
for (;;) {
/*
* Each segment of a blkif request can transfer up to
* one 4K page of data.
*/
"xdf@%s: seg%d: dmacS %lu blk_off %ld\n",
seg++;
if (--ndmacs) {
continue;
}
break;
}
}
static void
{
/*
* Populate the ring request(s). Loop until there is no buf to
* transfer or no free slot available in I/O ring.
*/
for (;;) {
/* don't start any new IO if we're suspending */
if (vdp->xdf_suspending)
break;
break;
/* if the buf doesn't already have a vreq, allocate one */
break;
break;
/* get next blkif_request in the ring */
break;
/* populate blkif_request with this buf */
/*
* resources, so if it isn't already in our runq, add it.
*/
}
/* Send the request(s) to the backend */
if (rreqready)
}
/* check if partition is open, -1 - check all partitions on the disk */
static boolean_t
{
int i;
if (partition == -1)
else
for (i = 0; i < OTYPCNT; i++) {
}
return (rval);
}
/*
* The connection should never be closed as long as someone is holding
* us open, there is pending IO, or someone is waiting waiting for a
* connection.
*/
static boolean_t
{
return (B_TRUE);
}
return (B_TRUE);
}
return (B_TRUE);
}
if (vdp->xdf_connect_req > 0) {
return (B_TRUE);
}
return (B_FALSE);
}
static void
{
}
static void
{
/* Check if we're already there. */
return;
/* If we're already closed then there's nothing todo. */
return;
}
#ifdef DEBUG
/* UhOh. Warn the user that something bad has happened. */
(vdp->xdf_xdev_nblocks != 0)) {
}
#endif /* DEBUG */
/* If we're busy then we can only go into the unknown state */
/* if we're closed now, let the other end know */
}
/*
* Kick-off connect process
* Status should be XD_UNKNOWN or XD_CLOSED
* On success, status will be changed to XD_INIT
* On error, it will be changed to XD_UNKNOWN
*/
static int
{
int rv;
/*
* If an eject is pending then don't allow a new connection.
* (Only the backend can clear media request eject request.)
*/
if (xdf_eject_pending(vdp))
return (DDI_FAILURE);
goto errout;
goto errout;
/*
* Sanity check for the existance of the xenbus device-type property.
* This property might not exist if we our xenbus device nodes was
* force destroyed while we were still connected to the backend.
*/
goto errout;
goto errout;
#ifdef XPV_HVM_DRIVER
#else /* !XPV_HVM_DRIVER */
DDI_SUCCESS) {
goto errout1;
}
#endif /* !XPV_HVM_DRIVER */
DDI_SUCCESS) {
goto errout2;
}
/*
* Write into xenstore the info needed by backend
*/
if (xenbus_transaction_start(&xbt)) {
goto fail_trans;
}
/*
* XBP_PROTOCOL is written by the domain builder in the case of PV
* domains. However, it is not written for HVM domains, so let's
* write it here.
*/
goto fail_trans;
}
/* kick-off connect process */
goto trans_retry;
goto fail_trans;
}
return (DDI_SUCCESS);
#ifdef XPV_HVM_DRIVER
#else /* !XPV_HVM_DRIVER */
#endif /* !XPV_HVM_DRIVER */
return (DDI_FAILURE);
}
int
{
/*
* Get a DEV_BSIZE aligned bufer
*/
(int)vdp->xdf_xdev_secsize);
return (DDI_FAILURE);
return (DDI_SUCCESS);
}
static void
xdf_setstate_ready(void *arg)
{
/*
* We've created all the minor nodes via cmlb_attach() using default
* value in xdf_attach() to make it possible to block in xdf_open(),
* in case there's anyone (say, booting thread) ever trying to open
* it before connected to backend. We will refresh all those minor
* nodes w/ latest info we've got now when we are almost connected.
*/
if (vdp->xdf_cmbl_reattach) {
if (xdf_cmlb_attach(vdp) != 0) {
return;
}
}
/* If we're not still trying to get to the ready state, then bail. */
return;
}
/*
* If backend has feature-barrier, see if it supports disk
* cache flush op.
*/
if (vdp->xdf_feature_barrier) {
/*
* Pretend we already know flush is supported so probe
* will attempt the correct op.
*/
} else {
/*
* If the other end does not support the cache flush op
* then we must use a barrier-write to force disk
* cache flushing. Barrier writes require that a data
* block actually be written.
* Cache a block to barrier-write when we are
* asked to perform a flush.
* XXX - would it be better to just copy 1 block
* (512 bytes) from whatever write we did last
* and rewrite that block?
*/
return;
}
}
}
/* Restart any currently queued up io */
}
/*
* synthetic geometry
*/
#define XDF_NSECTS 256
#define XDF_NHEADS 16
static void
{
}
/*
* Finish other initialization after we've connected to backend
* Status should be XD_INIT before calling this routine
* On success, status should be changed to XD_CONNECTED.
* On error, status should stay XD_INIT
*/
static int
{
diskaddr_t nblocks = 0;
return (DDI_FAILURE);
/* Make sure the other end is XenbusStateConnected */
return (DDI_FAILURE);
/* Determine if feature barrier is supported by backend */
/*
* Probe backend. Read the device size into xdf_xdev_nblocks
* and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE
* flags in xdf_dinfo. If the emulated device type is "cdrom",
* we always set VDISK_CDROM, regardless of if it's present in
* the xenbus info parameter.
*/
NULL) != 0) {
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
dinfo |= VDISK_CDROM;
#ifdef _ILP32
"backend disk device too large with %llu blocks for"
return (DDI_FAILURE);
}
#endif
/*
* If the physical geometry for a fixed disk has been explicity
* set then make sure that the specified physical geometry isn't
* larger than the device we connected to.
*/
if (vdp->xdf_pgeom_fixed &&
"xdf@%s: connect failed, fixed geometry too large",
return (DDI_FAILURE);
}
/* mark vbd is ready for I/O */
/* check if the cmlb label should be updated */
(!vdp->xdf_pgeom_fixed &&
if (!vdp->xdf_pgeom_fixed)
}
if (vdp->xdf_xdev_nblocks == 0) {
} else {
}
} else {
}
}
/* Restart any currently queued up io */
/*
* To get to the ready state we have to do IO to the backend device,
* but we can't initiate IO from the other end change callback thread
* (which is the current context we're executing in.) This is because
* if the other end disconnects while we're doing IO from the callback
* thread, then we can't recieve that disconnect event and we hang
* waiting for an IO that can never complete.
*/
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static void
{
/* We assume that this callback is single threaded */
/* ignore any backend state changes if we're suspending/suspended */
return;
}
switch (new_state) {
case XenbusStateUnknown:
case XenbusStateInitialising:
case XenbusStateInitWait:
case XenbusStateInitialised:
break;
break;
break;
case XenbusStateConnected:
break;
break;
}
break;
}
break;
case XenbusStateClosing:
"xdf@%s: hot-unplug failed, still in use",
break;
}
/*FALLTHROUGH*/
case XenbusStateClosed:
break;
}
/* notify anybody waiting for oe state change */
}
static int
{
/* we can't connect once we're in the closed state */
return (XD_CLOSED);
vdp->xdf_connect_req++;
/* only one thread at a time can be the connection thread */
/*
* If we haven't establised a connection
* within the reset time, then disconnect
* so we can try again, and double the reset
* time. The reset time starts at 2 sec.
*/
reset *= 2;
}
(void) xdf_setstate_init(vdp);
(void) xdf_setstate_connected(vdp);
}
goto out;
} else {
/* delay for 0.1 sec */
if (rv == -1)
timeouts++;
}
if (rv == 0)
goto out;
}
out:
/*
* wake up someone else so they can become the connection
* thread.
*/
}
/* Try to lock the media */
vdp->xdf_connect_req--;
}
static uint_t
{
return (DDI_INTR_CLAIMED);
}
#if defined(XPV_HVM_DRIVER)
typedef struct xdf_hvm_entry {
char *xdf_he_path;
static list_t xdf_hvm_list;
static kmutex_t xdf_hvm_list_lock;
static xdf_hvm_entry_t *
{
xdf_hvm_entry_t *i;
i = list_head(&xdf_hvm_list);
while (i != NULL) {
i = list_next(&xdf_hvm_list, i);
continue;
}
i = list_next(&xdf_hvm_list, i);
continue;
}
break;
}
return (i);
}
xdf_hvm_hold(const char *path)
{
xdf_hvm_entry_t *i;
if (i == NULL) {
return (B_FALSE);
}
return (dip);
}
static void
{
xdf_hvm_entry_t *i;
char *path;
/* figure out the path for the dip */
i = kmem_alloc(sizeof (*i), KM_SLEEP);
i->xdf_he_dip = dip;
list_insert_head(&xdf_hvm_list, i);
}
static void
{
xdf_hvm_entry_t *i;
list_remove(&xdf_hvm_list, i);
kmem_free(i, sizeof (*i));
}
static void
xdf_hvm_init(void)
{
}
static void
xdf_hvm_fini(void)
{
}
{
int rv;
/*
* Before try to establish a connection we need to wait for the
* backend hotplug scripts to have run. Once they are run the
* "<oename>/hotplug-status" property will be set to "connected".
*/
for (;;) {
/*
* Get the xenbus path to the backend device. Note that
* we can't cache this path (and we look it up on each pass
* through this loop) because it could change during
* suspend, resume, and migration operations.
*/
return (B_FALSE);
}
break;
/* wait for an update to "<oename>/hotplug-status" */
/* we got interrupted by a signal */
return (B_FALSE);
}
}
/* Good news. The backend hotplug scripts have been run. */
/*
* If we're emulating a cd device and if the backend doesn't support
* media request opreations, then we're not going to bother trying
* to establish a connection for a couple reasons. First off, media
* requests support is required to support operations like eject and
* media locking. Second, other backend platforms like Linux don't
* support hvm pv cdrom access. They don't even have a backend pv
* driver for cdrom device nodes, so we don't want to block forever
* waiting for a connection to a backend driver that doesn't exist.
*/
return (B_FALSE);
}
}
int
{
/* sanity check the requested physical geometry */
(geomp->g_capacity == 0)) {
return (EINVAL);
}
/*
* If we've already connected to the backend device then make sure
* we're not defining a physical geometry larger than our backend
* device.
*/
if ((vdp->xdf_xdev_nblocks != 0) &&
return (EINVAL);
}
/* force a re-validation */
return (0);
}
{
return (rv);
}
{
return (rv);
}
{
return (rv);
}
#endif /* XPV_HVM_DRIVER */
static int
{
return (ENXIO);
return (0);
}
static int
{
return (ENXIO);
return (0);
}
/*
* No real HBA, no geometry available from it
*/
/*ARGSUSED*/
static int
{
return (EINVAL);
}
static int
{
return (ENXIO);
else
return (0);
}
/* ARGSUSED3 */
int
{
int instance;
return (ENXIO);
switch (cmd) {
case TG_GETPHYGEOM:
case TG_GETVIRTGEOM:
case TG_GETCAPACITY:
case TG_GETBLOCKSIZE:
return (0);
case TG_GETATTR:
default:
return (ENOTTY);
}
}
/* ARGSUSED5 */
int
{
int err = 0;
/* We don't allow IO from the oe_change callback thread */
return (EINVAL);
else
(void) xdf_ring_drain(vdp);
return (err);
}
/*
* Lock the current media. Set the media state to "lock".
* (Media locks are only respected by the backend driver.)
*/
static int
{
int rv;
return (rv);
}
/*
* Release a media lock. Set the media state to "none".
*/
static int
{
int rv;
return (rv);
}
/*
* Eject the current media. Ignores any media locks. (Media locks
* are only for benifit of the the backend.)
*/
static int
{
int rv;
return (rv);
}
/*
* We've set the media requests xenbus parameter to eject, so now
* disconnect from the backend, wait for the backend to clear
* the media requets xenbus paramter, and then we can reconnect
* to the backend.
*/
return (EIO);
}
return (0);
}
/*
* Watch for media state changes. This can be an insertion of a device
* (triggered by a 'xm block-configure' request in another domain) or
* the ejection of a device (triggered by a local "eject" operation).
* For a full description of the DKIOCSTATE ioctl behavior see dkio(7I).
*/
static int
{
enum dkio_state prev_state;
return (EINTR);
}
}
}
if ((prev_state != DKIO_INSERTED) &&
return (0);
}
return (0);
}
/*ARGSUSED*/
static int
int *rvalp)
{
int rv;
return (ENXIO);
switch (cmd) {
default:
return (ENOTTY);
case DKIOCG_PHYGEOM:
case DKIOCG_VIRTGEOM:
case DKIOCGGEOM:
case DKIOCSGEOM:
case DKIOCGAPART:
case DKIOCSAPART:
case DKIOCGVTOC:
case DKIOCSVTOC:
case DKIOCPARTINFO:
case DKIOCGEXTVTOC:
case DKIOCSEXTVTOC:
case DKIOCEXTPARTINFO:
case DKIOCGMBOOT:
case DKIOCSMBOOT:
case DKIOCGETEFI:
case DKIOCSETEFI:
case DKIOCPARTITION:
case FDEJECT:
case DKIOCEJECT:
case CDROMEJECT:
return (xdf_ioctl_eject(vdp));
case DKIOCLOCK:
return (xdf_ioctl_mlock(vdp));
case DKIOCUNLOCK:
return (xdf_ioctl_munlock(vdp));
case CDROMREADOFFSET: {
int offset = 0;
return (ENOTTY);
return (EFAULT);
return (0);
}
case DKIOCGMEDIAINFO: {
struct dk_minfo media_info;
else
return (EFAULT);
return (0);
}
case DKIOCINFO: {
/* controller information */
else
/* unit information */
return (EFAULT);
return (0);
}
case DKIOCSTATE: {
enum dkio_state mstate;
return (EFAULT);
return (rv);
return (EFAULT);
return (0);
}
case DKIOCREMOVABLE: {
return (EFAULT);
return (0);
}
case DKIOCGETWCE: {
return (EFAULT);
return (0);
}
case DKIOCSETWCE: {
int i;
return (EFAULT);
return (0);
}
case DKIOCFLUSHWRITECACHE: {
if (vdp->xdf_flush_supported) {
} else if (vdp->xdf_feature_barrier &&
} else {
return (ENOTTY);
}
/* need to return 0 after calling callback */
rv = 0;
}
return (rv);
}
}
/*NOTREACHED*/
}
static int
{
int part;
return (0);
}
/* We don't allow IO from the oe_change callback thread */
/* Check for writes to a read only device */
return (0);
}
/* Check if this I/O is accessing a partition or the entire disk */
/* This I/O is using an absolute offset */
p_blkst = 0;
} else {
/* This I/O is using a partition relative offset */
return (0);
}
}
/*
* Adjust the real blkno and bcount according to the underline
* physical sector size.
*/
/* check for a starting block beyond the disk or partition limit */
return (0);
}
/* Legacy: don't set error flag at this case */
return (0);
}
/* sanitize the input buf */
/* Adjust for partial transfer, this will result in an error later */
if (vdp->xdf_xdev_secsize != 0 &&
} else {
}
if (vdp->xdf_xdev_secsize != 0 &&
} else {
}
}
/* Fix up the buf struct */
if (do_polled_io)
(void) xdf_ring_drain(vdp);
return (0);
}
/*ARGSUSED*/
static int
{
int part;
return (ENXIO);
return (ENXIO);
return (ENXIO);
return (ENOSPC);
return (EINVAL);
}
/*ARGSUSED*/
static int
{
int part;
return (ENXIO);
return (ENXIO);
return (ENXIO);
return (ENOSPC);
return (EINVAL);
}
/*ARGSUSED*/
static int
{
int part;
return (ENXIO);
return (ENXIO);
return (ENXIO);
return (ENOSPC);
return (EINVAL);
}
/*ARGSUSED*/
static int
{
int part;
return (ENXIO);
return (ENXIO);
return (ENXIO);
return (ENOSPC);
return (EINVAL);
}
static int
{
int err = 0;
int part;
return (ENXIO);
/* We don't allow IO from the oe_change callback thread */
return (ENXIO);
return (ENXIO);
return (EINVAL);
}
return (err);
}
/*ARGSUSED*/
static int
{
int part;
return (ENXIO);
return (ENXIO);
}
} else {
}
return (0);
}
static int
{
int part;
diskaddr_t p_blkct = 0;
return (ENXIO);
/* do cv_wait until connected or failed */
return (ENXIO);
}
return (EROFS);
}
return (EBUSY);
}
/* are we the first one to open this node? */
/* force a re-validation */
if (firstopen)
/* If this is a non-blocking open then we're done */
if (nodelay)
return (0);
/*
* This is a blocking open, so we require:
* - that the disk have a valid label on it
* - that the size of the partition that we're opening is non-zero
*/
return (ENXIO);
}
return (0);
}
/*ARGSUSED*/
static void
{
}
static int
{
/*
* Sanity check that if a dev_t or dip were specified that they
* correspond to this device driver. On debug kernels we'll
* panic and on non-debug kernels we'll return failure.
*/
return (DDI_PROP_NOT_FOUND);
}
/*ARGSUSED*/
static int
{
switch (cmd) {
case DDI_INFO_DEVT2DEVINFO:
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
case DDI_INFO_DEVT2INSTANCE:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
}
/*ARGSUSED*/
static int
{
char *oename;
goto err;
if (xdf_debug & SUSRES_DBG)
goto err;
}
goto err;
}
goto err;
}
if (xdf_debug & SUSRES_DBG)
return (DDI_SUCCESS);
err:
if (xdf_debug & SUSRES_DBG)
return (DDI_FAILURE);
}
static int
{
"xdf_debug", 0)) != 0)
xdf_debug = n;
switch (cmd) {
case DDI_RESUME:
return (xdf_resume(dip));
case DDI_ATTACH:
break;
default:
return (DDI_FAILURE);
}
/* DDI_ATTACH */
return (DDI_FAILURE);
/*
* Disable auto-detach. This is necessary so that we don't get
* detached while we're disconnected from the back end.
*/
return (DDI_FAILURE);
/* driver handles kernel-issued IOCTLs */
return (DDI_FAILURE);
return (DDI_FAILURE);
return (DDI_FAILURE);
return (DDI_FAILURE);
}
return (DDI_FAILURE);
if (dev_iscd) {
} else {
}
goto errout0;
goto errout0;
goto errout0;
}
/*
* Initialize the physical geometry stucture. Note that currently
* we don't know the size of the backend device so the number
* of blocks on the device will be initialized to zero. Once
* we connect to the backend device we'll update the physical
* geometry to reflect the real size of the device.
*/
/*
* create default device minor nodes: non-removable disk
* we will adjust minor nodes after we are connected w/ backend
*/
if (xdf_cmlb_attach(vdp) != 0) {
"xdf@%s: attach failed, cmlb attach failed",
goto errout0;
}
/*
* We ship with cache-enabled disks
*/
/* Watch backend XenbusState change */
goto errout0;
}
goto errout1;
}
#if defined(XPV_HVM_DRIVER)
/* Report our version to dom0. */
#else /* !XPV_HVM_DRIVER */
/* create kstat for iostat(1M) */
goto errout1;
}
#endif /* !XPV_HVM_DRIVER */
return (DDI_SUCCESS);
}
return (DDI_FAILURE);
}
static int
{
return (DDI_FAILURE);
if (xdf_debug & SUSRES_DBG)
if (xdf_debug & SUSRES_DBG)
return (DDI_SUCCESS);
}
static int
{
int instance;
switch (cmd) {
case DDI_PM_SUSPEND:
break;
case DDI_SUSPEND:
return (xdf_suspend(dip));
case DDI_DETACH:
break;
default:
return (DDI_FAILURE);
}
return (DDI_FAILURE);
return (DDI_FAILURE);
}
#if defined(XPV_HVM_DRIVER)
#endif /* XPV_HVM_DRIVER */
if (vdp->xdf_timeout_id != 0)
/* we'll support backend running in domU later */
#ifdef DOMU_BACKEND
#endif
return (DDI_SUCCESS);
}
/*
* Driver linkage structures.
*/
NULL,
};
struct dev_ops xdf_devops = {
DEVO_REV, /* devo_rev */
0, /* devo_refcnt */
xdf_getinfo, /* devo_getinfo */
nulldev, /* devo_identify */
nulldev, /* devo_probe */
xdf_attach, /* devo_attach */
xdf_detach, /* devo_detach */
nodev, /* devo_reset */
&xdf_cbops, /* devo_cb_ops */
NULL, /* devo_bus_ops */
NULL, /* devo_power */
ddi_quiesce_not_supported, /* devo_quiesce */
};
/*
* Module linkage structures.
*/
&mod_driverops, /* Type of module. This one is a driver */
"virtual block driver", /* short description */
&xdf_devops /* driver specific ops */
};
static struct modlinkage xdf_modlinkage = {
};
/*
* standard module entry points
*/
int
_init(void)
{
int rc;
return (EINVAL);
return (rc);
#if defined(XPV_HVM_DRIVER)
xdf_hvm_init();
#endif /* XPV_HVM_DRIVER */
#if defined(XPV_HVM_DRIVER)
xdf_hvm_fini();
#endif /* XPV_HVM_DRIVER */
return (rc);
}
return (rc);
}
int
_fini(void)
{
int err;
return (err);
#if defined(XPV_HVM_DRIVER)
xdf_hvm_fini();
#endif /* XPV_HVM_DRIVER */
return (0);
}
int
{
}