/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <io/xdf_shell.h>
/*
* General Notes
*
* We don't support disks with bad block mappins. We have this
* limitation because the underlying xdf driver doesn't support
* bad block remapping. If there is a need to support this feature
* it should be added directly to the xdf driver and we should just
* pass requests strait on through and let it handle the remapping.
* Also, it's probably worth pointing out that most modern disks do bad
* block remapping internally in the hardware so there's actually less
* of a chance of us ever discovering bad blocks. Also, in most cases
* this driver (and the xdf driver) will only be used with virtualized
* devices, so one might wonder why a virtual device would ever actually
* experience bad blocks. To wrap this up, you might be wondering how
* these bad block mappings get created and how they are managed. Well,
* there are two tools for managing bad block mappings, format(1M) and
* addbadsec(1M). Format(1M) can be used to do a surface scan of a disk
* to attempt to find bad block and create mappings for them. Format(1M)
* and addbadsec(1M) can also be used to edit existing mappings that may
* be saved on the disk.
*
* The underlying PV driver that this driver passes on requests to is the
* xdf driver. Since in most cases the xdf driver doesn't deal with
* physical disks it has it's own algorithm for assigning a physical
* geometry to a virtual disk (ie, cylinder count, head count, etc.)
* The default values chosen by the xdf driver may not match those
* assigned to a disk by a hardware disk emulator in an HVM environment.
* This is a problem since these physical geometry attributes affect
* things like the partition table, backup label location, etc. So
* to emulate disk devices correctly we need to know the physical geometry
* that was assigned to a disk at the time of it's initalization.
* Normally in an HVM environment this information will passed to
* the BIOS and operating system from the hardware emulator that is
* emulating the disk devices. In the case of a solaris dom0+xvm
* this would be qemu. So to work around this issue, this driver will
* query the emulated hardware to get the assigned physical geometry
* and then pass this geometry onto the xdf driver so that it can use it.
* But really, this information is essentially metadata about the disk
* that should be kept with the disk image itself. (Assuming or course
* that a disk image is the actual backingstore for this emulated device.)
* This metadata should also be made available to PV drivers via a common
* mechanism, probably the xenstore. The fact that this metadata isn't
* available outside of HVM domains means that it's difficult to move
* disks between HVM and PV domains, since a fully PV domain will have no
* way of knowing what the correct geometry of the target device is.
* (Short of reading the disk, looking for things like partition tables
* and labels, and taking a best guess at what the geometry was when
* the disk was initialized. Unsuprisingly, qemu actually does this.)
*
* This driver has to map xdf shell device instances into their corresponding
* xdf device instances. We have to do this to ensure that when a user
* accesses a emulated xdf shell device we map those accesses to the proper
* paravirtualized device. Basically what we need to know is how multiple
* 'disk' entries in a domU configuration file get mapped to emulated
* xdf shell devices and to xdf devices. The 'disk' entry to xdf instance
* mappings we know because those are done within the Solaris xvdi code
* and the xpvd nexus driver. But the config to emulated devices mappings
* are handled entirely within the xen management tool chain and the
* hardware emulator. Since all the tools that establish these mappings
* live in dom0, dom0 should really supply us with this information,
* probably via the xenstore. Unfortunatly it doesn't so, since there's
* no good way to determine this mapping dynamically, this driver uses
* a hard coded set of static mappings. These mappings are hardware
* emulator specific because each different hardware emulator could have
* a different device tree with different xdf shell device paths. This
* means that if we want to continue to use this static mapping approach
* to allow Solaris to run on different hardware emulators we'll have
* to analyze each of those emulators to determine what paths they
* use and hard code those paths into this driver. yech. This metadata
* really needs to be supplied to us by dom0.
*
* This driver access underlying xdf nodes. Unfortunatly, devices
* must create minor nodes during attach, and for disk devices to create
* minor nodes, they have to look at the label on the disk, so this means
* that disk drivers must be able to access a disk contents during
* attach. That means that this disk driver must be able to access
* underlying xdf nodes during attach. Unfortunatly, due to device tree
* locking restrictions, we cannot have an attach operation occuring on
* this device and then attempt to access another device which may
* cause another attach to occur in a different device tree branch
* since this could result in deadlock. Hence, this driver can only
* access xdf device nodes that we know are attached, and it can't use
* any ddi interfaces to access those nodes if those interfaces could
* trigger an attach of the xdf device. So this driver works around
* these restrictions by talking directly to xdf devices via
* xdf_hvm_hold(). This interface takes a pathname to an xdf device,
* and if that device is already attached then it returns the a held dip
* pointer for that device node. This prevents us from getting into
* deadlock situations, but now we need a mechanism to ensure that all
* the xdf device nodes this driver might access are attached before
* this driver tries to access them. This is accomplished via the
* hvmboot_rootconf() callback which is invoked just before root is
* mounted. hvmboot_rootconf() will attach xpvd and tell it to configure
* all xdf device visible to the system. All these xdf device nodes
* will also be marked with the "ddi-no-autodetach" property so that
* once they are configured, the will not be automatically unconfigured.
* The only way that they could be unconfigured is if the administrator
* explicitly attempts to unload required modules via rem_drv(1M)
* or modunload(1M).
*/
/*
* 16 paritions + fdisk (see xdf.h)
*/
typedef struct xdfs_hvm_state {
char *xdfs_hs_path;
/* local function and structure prototypes */
static int xdfs_iodone(struct buf *);
/*
* Globals
*/
/*
* Private globals
*/
static void *xdfs_ssp;
/*
* Private helper functions
*/
static boolean_t
{
if (!xsp->xdfss_tgt_attached) {
return (B_FALSE);
}
xsp->xdfss_tgt_holds++;
return (B_TRUE);
}
static void
{
if (--xsp->xdfss_tgt_holds == 0)
}
/*ARGSUSED*/
static int
{
int rv;
return (ENXIO);
if (!xdfs_tgt_hold(xsp))
return (ENXIO);
if (cmd == TG_GETVIRTGEOM) {
/*
* The native xdf driver doesn't support this ioctl.
* Intead of passing it on, emulate it here so that the
* results look the same as what we get for a real xdf
* shell device.
*
* Get the real size of the device
*/
goto out;
/*
* If the controller returned us something that doesn't
* really fit into an Int 13/function 8 geometry
* result, just fail the ioctl. See PSARC 1998/313.
*/
goto out;
}
rv = 0;
goto out;
}
out:
return (rv);
}
static boolean_t
{
int otyp;
return (B_TRUE);
}
}
return (B_FALSE);
}
static boolean_t
{
int part;
return (B_TRUE);
}
return (B_FALSE);
}
static int
{
/* Propegate back the io results */
return (0);
}
static int
{
xsp->xdfss_cmlbhandle, 0));
}
static boolean_t
{
xsp->xdfss_tgt_holds = 0;
XDF_MINOR(tgt_instance, 0));
/*
* GROSS HACK ALERT! GROSS HACK ALERT!
*
* Before we can initialize the cmlb layer, we have to tell the
* underlying xdf device what it's physical geometry should be.
* See the block comments at the top of this file for more info.
*/
if (!xsp->xdfss_tgt_is_cd &&
return (B_FALSE);
/*
* Force the xdf front end driver to connect to the backend. From
* the solaris device tree perspective, the xdf driver devinfo node
* is already in the ATTACHED state. (Otherwise xdf_hvm_hold()
* would not have returned a dip.) But this doesn't mean that the
* xdf device has actually established a connection to it's back
* end driver. For us to be able to access the xdf device it needs
* to be connected.
*/
return (B_FALSE);
}
/*
* Unfortunatly, the dom0 backend driver doesn't support
* important media request operations like eject, so fail
* the probe (this should cause us to fall back to emulated
* hvm device access, which does support things like eject).
*/
return (B_FALSE);
}
/* create kstat for iostat(1M) */
tgt_instance) != 0)
return (B_FALSE);
/*
* Now we need to mark ourselves as attached and drop xdfss_mutex.
* We do this because the final steps in the attach process will
* need to access the underlying disk to read the label and
* possibly the devid.
*/
return (B_FALSE);
}
/*
* Initalize cmlb. Note that for partition information cmlb
* will access the underly xdf disk device directly via
* xdfs_lb_rdwr() and xdfs_lb_getinfo(). There are no
* layered driver handles associated with this access because
* it is a direct disk access that doesn't go through
* any of the device nodes exported by the xdf device (since
* all exported device nodes only reflect the portion of
* is associated with.) So while not observable via the LDI,
* this direct disk access is ok since we're actually holding
* the target device.
*/
if (xdfs_cmlb_attach(xsp) != 0) {
return (B_FALSE);
}
/* setup devid string */
if (!xsp->xdfss_tgt_is_cd)
/* Have the system report any newly created device nodes */
return (B_TRUE);
}
static boolean_t
{
return (B_FALSE);
return (B_TRUE);
}
/*
* Xdf_shell interfaces that may be called from outside this file.
*/
void
{
}
/*
* Cmlb ops vector, allows the cmlb module to directly access the entire
* xdf disk device without going through any partitioning layers.
*/
int
{
int rv;
return (ENXIO);
if (!xdfs_tgt_hold(xsp))
return (ENXIO);
return (rv);
}
/*
* Driver PV and HVM cb_ops entry points
*/
/*ARGSUSED*/
static int
{
int err = 0;
return (EINVAL);
if (XDFS_HVM_MODE(xsp)) {
return (ENOTSUP);
}
/* allocate an ldi handle */
/*
* We translate all device opens (chr, blk, and lyr) into
* block device opens. Why? Because for all the opens that
* come through this driver, we only keep around one LDI handle.
* So that handle can only be of one open type. The reason
* that we choose the block interface for this is that to use
* the block interfaces for a device the system needs to allocate
* buf_ts, which are associated with system memory which can act
* as a cache for device data. So normally when a block device
* is closed the system will ensure that all these pages get
* flushed out of memory. But if we were to open the device
* as a character device, then when we went to close the underlying
* device (even if we had invoked the block interfaces) any data
* remaining in memory wouldn't necessairly be flushed out
* before the device was closed.
*/
if (err != 0) {
return (err);
}
/* Disk devices really shouldn't clone */
} else {
if (err != 0) {
return (err);
}
/* Disk devices really shouldn't clone */
}
return (0);
}
/*ARGSUSED*/
static int
{
int err = 0;
/* Sanity check the dev_t associated with this request. */
return (ENXIO);
if (XDFS_HVM_MODE(xsp)) {
return (ENOTSUP);
}
/*
* Sanity check that that the device is actually open. On debug
* kernels we'll panic and on non-debug kernels we'll return failure.
*/
return (ENXIO);
}
} else {
}
}
return (err);
}
int
{
/* Sanity check the dev_t associated with this request. */
goto err;
if (XDFS_HVM_MODE(xsp)) {
return (ENOTSUP);
}
/*
* Sanity checks that the dev_t associated with the buf we were
* passed corresponds to an open partition. On debug kernels we'll
* panic and on non-debug kernels we'll return failure.
*/
goto err;
}
/* clone this buffer */
/*
* If we're being invoked on behalf of the physio() call in
* xdfs_dioctl_rwcmd() then b_private will be set to
* XB_SLICE_NONE and we need to propegate this flag into the
* cloned buffer so that the xdf driver will see it.
*/
/*
* Pass on the cloned buffer. Note that we don't bother to check
* for failure because the xdf strategy routine will have to
* invoke biodone() if it wants to return an error, which means
* that the xdfs_iodone() callback will get invoked and it
* will propegate the error back up the stack and free the cloned
* buffer.
*/
err:
return (0);
}
static int
{
if (!XDFS_HVM_MODE(xsp))
return (ENOTSUP);
}
/*ARGSUSED*/
static int
{
if (!XDFS_HVM_MODE(xsp))
return (ENOTSUP);
}
/*ARGSUSED*/
static int
{
if (!XDFS_HVM_MODE(xsp))
return (ENOTSUP);
}
/*ARGSUSED*/
static int
{
if (!XDFS_HVM_MODE(xsp))
return (ENOTSUP);
}
/*ARGSUSED*/
static int
{
if (!XDFS_HVM_MODE(xsp))
return (ENOTSUP);
}
static int
int *rvalp)
{
int rv;
if (XDFS_HVM_MODE(xsp)) {
return (ENOTSUP);
return (xdfs_hvm_cb_ops->cb_ioctl(
}
if (done)
return (rv);
if (rv == 0) {
/* Force Geometry Validation */
}
return (rv);
}
static int
{
return (DDI_PROP_NOT_FOUND);
}
static int
{
int rv;
/*
* Sanity check that if a dev_t or dip were specified that they
* correspond to this device driver. On debug kernels we'll
* panic and on non-debug kernels we'll return failure.
*/
return (DDI_PROP_NOT_FOUND);
/*
* This property lookup might be associated with a device node
* that is not yet attached, if so pass it onto ddi_prop_op().
*/
/* If we're accessing the device in hvm mode, pass this request on */
if (XDFS_HVM_MODE(xsp))
/*
* Make sure we only lookup static properties.
*
* If there are static properties of the underlying xdf driver
* that we want to mirror, then we'll have to explicity look them
* up and define them during attach. There are a few reasons
* for this. Most importantly, most static properties are typed
* and all dynamic properties are untyped, ie, for dynamic
* properties the caller must know the type of the property and
* how to interpret the value of the property. the prop_op drivedr
* properties, so if we were to attempt to lookup and pass back
* static properties of the underlying device here then we would
* be losing the type information for those properties. Another
* reason we don't want to pass on static property requests is that
* static properties are enumerable in the device tree, where as
* dynamic ones are not.
*/
/*
* We can't use the ldi here to access the underlying device because
* the ldi actually opens the device, and that open might fail if the
* device has already been opened with the FEXCL flag. If we used
* the ldi here, it would also be possible for some other caller to
* try open the device with the FEXCL flag and get a failure back
* because we have it open to do a property query. Instad we'll
* grab a hold on the target dip.
*/
if (!xdfs_tgt_hold(xsp))
return (DDI_PROP_NOT_FOUND);
/* figure out dip the dev_t we're going to pass on down */
if (dev == DDI_DEV_T_ANY) {
} else {
}
/*
* Cdev_prop_op() is not a public interface, and normally the caller
* is required to make sure that the target driver actually implements
* this interface before trying to invoke it. In this case we know
* that we're always accessing the xdf driver and it does have this
* interface defined, so we can skip the check.
*/
return (rv);
}
/*
* Driver PV and HVM dev_ops entry points
*/
/*ARGSUSED*/
static int
void **result)
{
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
return (DDI_FAILURE);
if (XDFS_HVM_MODE(xsp))
else
break;
case DDI_INFO_DEVT2INSTANCE:
break;
default:
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
void *xsp;
if ((xdfs_hvm_dev_ops == NULL) ||
return (DDI_PROBE_FAILURE);
}
return (rv);
}
static int
{
char *path;
int i, pv_disable;
/* if we've already probed the device then there's nothing todo */
return (DDI_PROBE_PARTIAL);
/* Figure out our pathname */
/* see if we should disable pv access mode */
if (xdfs_pv_disable || pv_disable)
/*
* This xdf shell device layers on top of an xdf device. So the first
* thing we need to do is determine which xdf device instance this
* xdf shell instance should be layered on top of.
*/
break;
}
/*
* UhOh. We either don't know what xdf instance this xdf
* shell device should be mapped to or the xdf node assocaited
* with this instance isnt' attached. in either case fall
* back to hvm access.
*/
}
/* allocate and initialize our state structure */
}
/*
* Add a zero-length attribute to tell the world we support
* kernel ioctls (for layered drivers).
*/
DDI_KERNEL_IOCTL, NULL, 0);
return (DDI_PROBE_SUCCESS);
}
static int
{
if ((xdfs_hvm_dev_ops == NULL) ||
XDFS_HVM_PATH(xsp));
return (rv);
}
return (DDI_SUCCESS);
}
/*
* Autoconfiguration Routines
*/
static int
{
return (DDI_FAILURE);
if (XDFS_HVM_MODE(xsp))
if (cmd != DDI_ATTACH)
return (DDI_FAILURE);
return (DDI_SUCCESS);
}
static int
{
int rv;
if ((xdfs_hvm_dev_ops == NULL) ||
return (DDI_FAILURE);
return (rv);
return (DDI_SUCCESS);
}
static int
{
if (XDFS_HVM_MODE(xsp))
if (cmd != DDI_DETACH)
return (DDI_FAILURE);
if (!xdfs_tgt_detach(xsp)) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
if ((xdfs_hvm_dev_ops == NULL) ||
return (DDI_FAILURE);
}
static int
{
if (XDFS_HVM_MODE(xsp))
return (nodev());
}
/*
* Cmlb ops vector
*/
};
/*
* Device driver ops vector
*/
xdfs_open, /* open */
xdfs_close, /* close */
xdfs_strategy, /* strategy */
nodev, /* print */
xdfs_dump, /* dump */
xdfs_read, /* read */
xdfs_write, /* write */
xdfs_ioctl, /* ioctl */
nodev, /* devmap */
nodev, /* mmap */
nodev, /* segmap */
nochpoll, /* poll */
xdfs_prop_op, /* cb_prop_op */
0, /* streamtab */
CB_REV, /* cb_rev */
xdfs_aread, /* async read */
xdfs_awrite /* async write */
};
DEVO_REV, /* devo_rev, */
0, /* refcnt */
xdfs_getinfo, /* info */
nulldev, /* identify */
xdfs_probe, /* probe */
xdfs_attach, /* attach */
xdfs_detach, /* detach */
nodev, /* reset */
&xdfs_cb_ops, /* driver operations */
NULL, /* bus operations */
xdfs_power, /* power */
ddi_quiesce_not_supported, /* devo_quiesce */
};
/*
* Module linkage information for the kernel.
*/
&mod_driverops, /* Type of module. This one is a driver. */
NULL, /* Module description. Set by _init() */
&xdfs_ops, /* Driver ops. */
};
};
int
_init(void)
{
int rval;
return (EINVAL);
/*
* Determine the size of our soft state structure. The base
* size of the structure is the larger of the hvm clients state
* structure, or our shell state structure. Then we'll align
* the end of the structure to a pointer boundry and append
* a xdfs_hvm_state_t structure. This way the xdfs_hvm_state_t
* structure is always present and we can use it to determine the
* current device access mode (hvm or shell).
*/
xdfs_ss_size += sizeof (xdfs_hvm_state_t);
/*
* In general ide usually supports 4 disk devices, this same
* limitation also applies to software emulating ide devices.
* so by default we pre-allocate 4 xdf shell soft state structures.
*/
xdfs_ss_size, XDFS_NODES)) != 0)
return (rval);
/* Install our module */
return (rval);
}
return (0);
}
int
{
}
int
_fini(void)
{
int rval;
return (rval);
return (0);
}