xdb.c revision ee56d0c81901bbe996dc0aa42265e53824979adf
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Note: This is the backend part of the split PV disk driver. This driver
* Currently, it does not create any minor node. So, although, it runs in
* backend domain, it will not be used directly from within dom0.
* It simply gets block I/O requests issued by frontend from a shared page
* (blkif ring buffer - defined by Xen) between backend and frontend domain,
* generates a buf, and push it down to underlying disk target driver via
* ldi interface. When buf is done, this driver will generate a response
* and put it into ring buffer to inform frontend of the status of the I/O
* request issued by it. When a new virtual device entry is added in xenstore,
* there will be an watch event sent from Xen to xvdi framework, who will,
* in turn, create the devinfo node and try to attach this driver
* (see xvdi_create_dev). When frontend peer changes its state to
* XenbusStateClose, an event will also be sent from Xen to xvdi framework,
* who will detach and remove this devinfo node (see i_xvdi_oestate_handler).
* I/O requests get from ring buffer and event coming from xenstore cannot be
* trusted. We verify them in xdb_get_buf() and xdb_check_state_transition().
*
* xenbus_* interfaces. Driver also use xvdi_* to interact with hypervisor.
* There is an on-going effort to make xvdi_* cover all xenbus_*.
*/
#include <sys/dditypes.h>
#include <sys/bootconf.h>
#include <sys/sysmacros.h>
#include <vm/seg_kmem.h>
static xdb_t *xdb_statep;
static int xdb_debug = 0;
static void xdb_close(dev_info_t *);
static int xdb_biodone(buf_t *);
#ifdef DEBUG
/*
* debug aid functions
*/
static void
{
int i;
for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
if (page_addrs[i] == va)
debug_enter("VA remapping found!");
}
for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
if (page_addrs[i] == 0) {
page_addrs[i] = va;
break;
}
}
}
static void
{
int i;
for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
if (page_addrs[i] == va) {
page_addrs[i] = 0;
break;
}
}
}
static void
{
int i;
/*
* Exploit the public interface definitions for BLKIF_OP_READ
* etc..
*/
req->nr_segments));
(unsigned long long)req->sector_number));
for (i = 0; i < req->nr_segments; i++) {
}
}
#endif /* DEBUG */
/*
* Statistics.
*/
static char *xdb_stats[] = {
"rd_reqs",
"wr_reqs",
"br_reqs",
"fl_reqs",
"oo_reqs"
};
static int
{
if (flag != KSTAT_READ)
return (EACCES);
/*
* Assignment order should match that of the names in
* xdb_stats.
*/
return (0);
}
static boolean_t
{
return (B_FALSE);
while (nstat > 0) {
knp++;
cp++;
nstat--;
}
return (B_TRUE);
}
static char *
{
return (rv);
}
static buf_t *
{
int sectors;
int i, err;
/* init a new xdb request */
if (segs == 0) {
if (op != BLKIF_OP_FLUSH_DISKCACHE)
" is seen from domain %d with zero "
return (bp);
} else if (op == BLKIF_OP_FLUSH_DISKCACHE) {
" is seen from domain %d with non-zero "
}
/*
* segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
* according to the definition of blk interface by Xen
* we do sanity check here
*/
for (i = 0; i < segs; i++) {
if (op != BLKIF_OP_READ)
/*
* first_sect should be no bigger than last_sect and
* both of them should be no bigger than
* XB_LAST_SECTOR_IN_SEG according to definition
* of blk interface by Xen, so sanity check again
*/
if (fs > XB_LAST_SECTOR_IN_SEG)
if (ls > XB_LAST_SECTOR_IN_SEG)
}
/* map in io pages */
if (err != 0)
return (NULL);
for (i = 0; i < segs; i++) {
/*
* Although HYPERVISOR_grant_table_op() returned no
* error, mapping of each single page can fail. So,
* we have to do the check here and handle the error
* if needed
*/
int j;
for (j = 0; j < i; j++) {
#ifdef DEBUG
#endif
}
break;
}
/* record page mapping handle for unmapping later */
#ifdef DEBUG
#endif
/*
* Pass the MFNs down using the shadow list (xr_pplist)
*
* This is pretty ugly since we have implict knowledge
* of how the rootnex binds buffers.
* The GNTTABOP_map_grant_ref op makes us do some ugly
* stuff since we're not allowed to touch these PTEs
* from the VM.
*
* Obviously, these aren't real page_t's. The rootnex
* only needs p_pagenum.
* Also, don't use btop() here or 32 bit PAE breaks.
*/
}
/*
* not all pages mapped in successfully, unmap those mapped-in
* page and return failure
*/
if (!pagemapok) {
for (i = 0; i < segs; i++) {
continue;
(void) HYPERVISOR_grant_table_op(
}
return (NULL);
}
} else {
int isread;
/* reuse this buf */
}
/* form a buf */
sectors = 0;
/*
* Run through the segments. There are XB_NUM_SECTORS_PER_SEG sectors
* per segment. On some OSes (e.g. Linux), there may be empty gaps
* between segments. (i.e. the first segment may end on sector 6 and
* the second segment start on sector 4).
*
* if a segments first sector is not set to 0, and this is not the
* first segment in our buf, end this buf now.
*
* if a segments last sector is not set to XB_LAST_SECTOR_IN_SEG, and
* this is not the last segment in the request, add this segment into
* the buf, then end this buf (updating the pointer to point to the
* next segment next time around).
*/
break;
}
i++;
break;
}
}
return (bp);
}
static xdb_request_t *
{
int idx;
return (req);
}
static void
{
}
static void
{
}
static void
{
int i;
sizeof (xdb_request_t), KM_SLEEP);
#ifdef DEBUG
#endif
}
vdp->xs_free_req = 0;
/* alloc va in host dom for io page mapping */
VM_SLEEP);
for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++)
}
static void
{
int i;
for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++)
}
#ifdef DEBUG
sizeof (uint64_t));
}
#endif
}
static uint_t
{
int ret = DDI_INTR_UNCLAIMED;
"xdb@%s: I/O request received from dom %d",
/* shouldn't touch ring buffer if not in connected state */
if (!vdp->xs_if_connected) {
return (DDI_INTR_UNCLAIMED);
}
/*
* We'll loop till there is no more request in the ring
* We won't stuck in this loop for ever since the size of ring buffer
* is limited, and frontend will stop pushing requests into it when
* the ring buffer is full
*/
/* req_event will be increased in xvdi_ring_get_request() */
if (op == BLKIF_OP_READ ||
op == BLKIF_OP_WRITE ||
op == BLKIF_OP_WRITE_BARRIER ||
op == BLKIF_OP_FLUSH_DISKCACHE) {
#ifdef DEBUG
#endif
switch (op) {
case BLKIF_OP_READ:
vdp->xs_stat_req_reads++;
break;
case BLKIF_OP_WRITE_BARRIER:
/* FALLTHRU */
case BLKIF_OP_WRITE:
break;
case BLKIF_OP_FLUSH_DISKCACHE:
break;
}
/* failed to form a buf */
continue;
}
" buf %p, blkno %lld, size %lu, addr %p",
/* send bp to underlying blk driver */
} else {
}
} else {
"Unsupported cmd received from dom %d",
}
}
/* notify our taskq to push buf to underlying blk driver */
if (ret == DDI_INTR_CLAIMED)
return (ret);
}
static int
{
if (bioerr)
/* check if we are done w/ this I/O request */
if (nbp) {
if (err == 0) {
"sent buf to backend ok"));
return (DDI_SUCCESS);
}
"sent buf to backend dev failed, err=%d",
} else {
}
}
/* unmap io pages */
/*
* segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
* according to the definition of blk interface by Xen
*/
for (i = 0; i < segs; i++) {
#ifdef DEBUG
#endif
}
/*
* If we have reached a barrier write or a cache flush , then we must
*/
/*
* XXX At this point the write did succeed, so I don't
* believe we should report an error because the flush
* failed. However, this is a debatable point, so
* maybe we need to think more carefully about this.
* For now, just cast to void.
*/
}
/* send response back to frontend */
if (vdp->xs_if_connected) {
"sent resp back to frontend, id=%llu",
}
/* free io resources */
/* we're closing, someone is waiting for I/O clean-up */
}
return (DDI_SUCCESS);
}
static int
{
int err;
char *oename;
/*
* Switch to the XenbusStateInitialised state. This let's the
* frontend know that we're about to negotiate a connection.
*/
/*
* Gather info from frontend
*/
return (DDI_FAILURE);
NULL);
if (err != 0) {
"Getting ring-ref and evtchn from frontend");
return (DDI_FAILURE);
}
if (err)
else {
/*
* We must check for NATIVE first, so that the fast path
* is taken for copying data from the guest to the host.
*/
vdp->xs_entrysize =
sizeof (union blkif_x86_32_sring_entry);
0) {
vdp->xs_entrysize =
sizeof (union blkif_x86_64_sring_entry);
} else {
return (DDI_FAILURE);
}
}
}
#ifdef DEBUG
#endif
/*
* Map and init ring. The ring parameters must match those which
* have been allocated in the front end.
*/
return (DDI_FAILURE);
/*
* This will be removed after we use shadow I/O ring request since
* we don't need to access the ring itself directly, thus the access
* handle is not needed
*/
/* bind event channel */
if (err != DDI_SUCCESS) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static void
{
}
/*
* that the backend is accessing. It does this by disconnecting from the
* frontend, closing the old device, clearing a bunch of xenbus parameters,
* and switching back to the XenbusStateInitialising state. The frontend
* should notice this transition to the XenbusStateInitialising state and
* should attempt to reconnect to us (the backend).
*/
static void
{
char *xsname;
int err;
return;
return;
/*
* Close the device we're currently accessing and update the
*/
goto errout;
}
/*
* Delete all the xenbus properties that are connection dependant
* and go back to the initializing state so that the frontend
* driver can re-negotiate a connection.
*/
xbt, XenbusStateInitialising) > 0))) {
goto errout;
}
/* transaction is ended, don't need to abort it */
goto trans_retry;
}
goto errout;
}
/* Change the device that we plan to access */
return;
}
/*
* xdb_watch_params_cb() - This callback is invoked whenever there
* is an update to the following xenbus parameter:
*
* This normally happens during xm block-configure operations, which
* are used to change CD device images for HVM domUs.
*/
/*ARGSUSED*/
static void
{
return;
}
return;
}
/* Nothing todo */
return;
}
/*
* If the frontend isn't a cd device, doesn't support media
* requests, or has locked the media, then we can't change
* the params value. restore the current value.
*/
if (!XDB_IS_FE_CD(vdp) ||
"!%s: media locked, ignoring params update", str);
return;
}
"block-configure params request: \"%s\"", str));
}
/*
* xdb_watch_media_req_cb() - This callback is invoked whenever there
* is an update to the following xenbus parameter:
*
* Media requests are only supported on CD devices and are issued by
* the frontend. Currently the only supported media request operaions
* are "lock" and "eject". A "lock" prevents the backend from changing
* tells the backend device that it should disconnect from the frontend
*/
/*ARGSUSED*/
static void
{
return;
}
return;
}
if (!XDB_IS_FE_CD(vdp)) {
"media-req only supported for cdrom devices");
return;
}
return;
}
}
/*
* If we're dealing with a cdrom device, let the frontend know that
* we support media requests via XBP_MEDIA_REQ_SUP, and setup a watch
* to handle those frontend media request changes, which modify the
* following xenstore parameter:
*/
static boolean_t
{
return (B_FALSE);
if (!XDB_IS_FE_CD(vdp))
return (B_TRUE);
return (B_FALSE);
"Failed to register watch for cdrom media requests");
return (B_FALSE);
}
return (B_TRUE);
}
/*
* Get our params value. Also, if we're using "params" then setup a
* watch to handle xm block-configure operations which modify the
* following xenstore parameter:
*/
static boolean_t
{
return (B_FALSE);
}
if (err != 0)
return (B_FALSE);
/*
* If we got our backing store path from "dynamic-device-path" then
* there's no reason to watch "params"
*/
if (!watch_params)
return (B_TRUE);
return (B_FALSE);
}
return (B_TRUE);
}
#define LOFI_CTRL_NODE "/dev/lofictl"
#define LOFI_DEV_NODE "/devices/pseudo/lofi@0:"
static int
{
struct lofi_ioctl *li;
return (DDI_FAILURE);
return (DDI_FAILURE);
}
if (!XDB_IS_BE_LOFI(vdp)) {
return (DDI_SUCCESS);
}
do {
if (err != 0) {
return (DDI_FAILURE);
}
sizeof (li->li_filename));
if (err != 0) {
return (DDI_FAILURE);
}
/*
* former is available immediately after calling ldi_ioctl
*/
return (DDI_SUCCESS);
}
static void
{
struct lofi_ioctl *li;
int err;
if (!XDB_IS_BE_LOFI(vdp))
return;
sizeof (li->li_filename));
do {
if (err != 0) {
return;
}
}
}
static int
{
int blksize;
char *nodepath;
/*
* it's possible to have no backing device when dealing
* with a pv cdrom drive that has no virtual cd associated
* with it.
*/
return (DDI_SUCCESS);
}
return (DDI_FAILURE);
/* try to open backend device */
"Getting device path of backend device");
return (DDI_FAILURE);
}
if (ldi_open_by_name(nodepath,
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
"blksize", DEV_BSIZE);
/* check if the underlying device is a removable disk */
"removable-media"))
return (DDI_SUCCESS);
}
static void
{
return;
}
vdp->xs_sectors = 0;
}
/*
* Kick-off connect process
* If xs_fe_initialised == B_TRUE and xs_hp_connected == B_TRUE
* the xs_if_connected will be changed to B_TRUE on success,
*/
static void
{
char *xsname;
return;
/*
* if the hotplug scripts haven't run or if the frontend is not
* initialized, then we can't try to connect.
*/
return;
}
/* If we're already connected then there's nothing todo */
if (vdp->xs_if_connected) {
return;
}
/*
* Start connect to frontend only when backend device are ready
* and frontend has moved to XenbusStateInitialised, which means
* ready to connect.
*/
return;
return;
}
/* init i/o requests */
!= DDI_SUCCESS) {
return;
}
dinfo = 0;
dinfo |= VDISK_READONLY;
if (XDB_IS_BE_RMB(vdp))
dinfo |= VDISK_REMOVABLE;
if (XDB_IS_BE_CD(vdp))
dinfo |= VDISK_CDROM;
if (XDB_IS_FE_CD(vdp))
/*
* we can recieve intr any time from now on
* mark that we're ready to take intr
*/
/* write into xenstore the info needed by frontend */
goto errout;
}
/* If feature-barrier isn't present in xenstore, add it. */
if (((!fb_exists &&
goto errout;
}
/* transaction is ended, don't need to abort it */
goto trans_retry;
}
goto errout;
}
return;
}
/*
* Disconnect from frontend and close backend device
*/
static void
{
/*
* if the hotplug scripts haven't run or if the frontend is not
* initialized, then we can't be connected, so there's no
* connection to close.
*/
return;
}
/* if we're not connected, there's nothing to do */
if (!vdp->xs_if_connected) {
return;
}
/* stop accepting I/O request from frontend */
/* clean up resources and close this interface */
}
static void
xdb_send_buf(void *arg)
{
int err;
while (vdp->xs_send_buf) {
/* wait for some io to send */
"send buf waiting for io"));
continue;
}
/* no I/O needs to be done */
(void) xdb_biodone(bp);
continue;
}
if (err != 0) {
(void) xdb_biodone(bp);
"xdb@%s: sent buf to backend devfailed, err=%d",
} else {
"sent buf to backend ok"));
}
}
}
/*ARGSUSED*/
static void
void *impl_data)
{
return;
/* If hotplug script have already run, there's nothing todo */
if (vdp->xs_hp_connected) {
return;
}
}
/*ARGSUSED*/
static void
void *impl_data)
{
/*
* Now it'd really be nice if there was a well defined state
* transition model for xen frontend drivers, but unfortunatly
* there isn't. So we're stuck with assuming that all state
* transitions are possible, and we'll just have to deal with
* them regardless of what state we're in.
*/
switch (new_state) {
case XenbusStateUnknown:
case XenbusStateInitialising:
case XenbusStateInitWait:
/* tear down our connection to the frontend */
break;
case XenbusStateInitialised:
/*
* If we were conected, then we need to drop the connection
* and re-negotiate it.
*/
break;
case XenbusStateConnected:
/* nothing todo here other than congratulate the frontend */
break;
case XenbusStateClosing:
/* monkey see monkey do */
break;
case XenbusStateClosed:
/* tear down our connection to the frontend */
break;
}
}
static int
{
char *str;
switch (cmd) {
case DDI_RESUME:
return (DDI_FAILURE);
case DDI_ATTACH:
break;
default:
return (DDI_FAILURE);
}
/* DDI_ATTACH */
return (DDI_FAILURE);
/*
* Disable auto-detach. This is necessary so that we don't get
* detached while we're disconnected from the front end.
*/
return (DDI_FAILURE);
return (DDI_FAILURE);
if (!xdb_kstat_init(vdp))
goto errout1;
/* Check if the frontend device is supposed to be a cdrom */
return (DDI_FAILURE);
/* Check if the frontend device is supposed to be read only */
return (DDI_FAILURE);
goto errout2;
}
TASKQ_DEFAULTPRI, 0);
/* Watch frontend and hotplug state change */
NULL) != DDI_SUCCESS) ||
NULL) != DDI_SUCCESS))
goto errout3;
/*
* Kick-off hotplug script
*/
goto errout3;
}
/*
* start waiting for hotplug event and otherend state event
* mainly for debugging, frontend will not take any op seeing this
*/
ddi_get_name_addr(dip)));
return (DDI_SUCCESS);
/* Disconnect from the backend */
/* wait for all io to dtrain and destroy io taskq */
/* tear down block-configure watch */
/* remove kstats */
/* free up driver state */
return (DDI_FAILURE);
}
/*ARGSUSED*/
static int
{
switch (cmd) {
case DDI_SUSPEND:
return (DDI_FAILURE);
case DDI_DETACH:
break;
default:
return (DDI_FAILURE);
}
/* DDI_DETACH handling */
/* refuse to detach if we're still in use by the frontend */
if (vdp->xs_if_connected) {
return (DDI_FAILURE);
}
ddi_get_name_addr(dip)));
return (DDI_SUCCESS);
}
static struct dev_ops xdb_dev_ops = {
DEVO_REV, /* devo_rev */
0, /* devo_refcnt */
ddi_getinfo_1to1, /* devo_getinfo */
nulldev, /* devo_identify */
nulldev, /* devo_probe */
xdb_attach, /* devo_attach */
xdb_detach, /* devo_detach */
nodev, /* devo_reset */
NULL, /* devo_cb_ops */
NULL, /* devo_bus_ops */
NULL, /* power */
ddi_quiesce_not_needed, /* quiesce */
};
/*
* Module linkage information for the kernel.
*/
&mod_driverops, /* Type of module. */
"vbd backend driver", /* Name of the module */
&xdb_dev_ops /* driver ops */
};
static struct modlinkage xdb_modlinkage = {
&modldrv,
};
int
_init(void)
{
int rv;
sizeof (xdb_t), 0)) == 0)
ddi_soft_state_fini((void **)&xdb_statep);
return (rv);
}
int
_fini(void)
{
int rv;
return (rv);
ddi_soft_state_fini((void **)&xdb_statep);
return (rv);
}
int
{
}
static int
{
return (0);
switch (vdp->xs_blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
break;
case BLKIF_PROTOCOL_X86_32:
break;
case BLKIF_PROTOCOL_X86_64:
break;
default:
}
return (1);
}
static int
{
switch (vdp->xs_blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
break;
case BLKIF_PROTOCOL_X86_32:
break;
case BLKIF_PROTOCOL_X86_64:
break;
default:
}
}
static void
{
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
if (n > src->nr_segments)
n = src->nr_segments;
for (i = 0; i < n; i++)
}
static void
{
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
if (n > src->nr_segments)
n = src->nr_segments;
for (i = 0; i < n; i++)
}