pv_cmdk.c revision 342440ec94087b8c751c580ab9ed6c693d31d418
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/scsi/scsi_types.h>
#include <sys/modctl.h>
#include <sys/cmlb.h>
#include <sys/types.h>
#include <sys/xpv_support.h>
#include <sys/xendev.h>
#include <sys/gnttab.h>
#include <public/xen.h>
#include <public/grant_table.h>
#include <io/xdf.h>
#include <sys/vtoc.h>
#include <sys/dkio.h>
#include <sys/dktp/dadev.h>
#include <sys/dktp/dadkio.h>
#include <sys/dktp/tgdk.h>
#include <sys/dktp/bbh.h>
#include <sys/dktp/cmdk.h>
#include <sys/dktp/altsctr.h>
/*
* General Notes
*
* We don't support disks with bad block mappins. We have this
* limitation because the underlying xdf driver doesn't support
* bad block remapping. If there is a need to support this feature
* it should be added directly to the xdf driver and we should just
* pass requests strait on through and let it handle the remapping.
* Also, it's probably worth pointing out that most modern disks do bad
* block remapping internally in the hardware so there's actually less
* of a chance of us ever discovering bad blocks. Also, in most cases
* this driver (and the xdf driver) will only be used with virtualized
* devices, so one might wonder why a virtual device would ever actually
* experience bad blocks. To wrap this up, you might be wondering how
* these bad block mappings get created and how they are managed. Well,
* there are two tools for managing bad block mappings, format(1M) and
* addbadsec(1M). Format(1M) can be used to do a surface scan of a disk
* to attempt to find bad block and create mappings for them. Format(1M)
* and addbadsec(1M) can also be used to edit existing mappings that may
* be saved on the disk.
*
* The underlying PV driver that this driver passes on requests to is the
* xdf driver. Since in most cases the xdf driver doesn't deal with
* physical disks it has it's own algorithm for assigning a physical
* geometry to a virtual disk (ie, cylinder count, head count, etc.)
* The default values chosen by the xdf driver may not match those
* assigned to a disk by a hardware disk emulator in an HVM environment.
* This is a problem since these physical geometry attributes affect
* things like the partition table, backup label location, etc. So
* to emulate disk devices correctly we need to know the physical geometry
* that was assigned to a disk at the time of it's initalization.
* Normally in an HVM environment this information will passed to
* the BIOS and operating system from the hardware emulator that is
* emulating the disk devices. In the case of a solaris dom0+xvm
* this would be qemu. So to work around this issue, this driver will
* query the emulated hardware to get the assigned physical geometry
* and then pass this geometry onto the xdf driver so that it can use it.
* But really, this information is essentially metadata about the disk
* that should be kept with the disk image itself. (Assuming or course
* that a disk image is the actual backingstore for this emulated device.)
* This metadata should also be made available to PV drivers via a common
* mechamisn, probably the xenstore. The fact that this metadata isn't
* available outside of HVM domains means that it's difficult to move
* disks between HVM and PV domains, since a fully PV domain will have no
* way of knowing what the correct geometry of the target device is.
* (Short of reading the disk, looking for things like partition tables
* and labels, and taking a best guess at what the geometry was when
* the disk was initialized. Unsuprisingly, qemu actually does this.)
*
* This driver has to map cmdk device instances into their corresponding
* xdf device instances. We have to do this to ensure that when a user
* accesses a emulated cmdk device we map those accesses to the proper
* paravirtualized device. Basically what we need to know is how multiple
* 'disk' entries in a domU configuration file get mapped to emulated
* cmdk devices and to xdf devices. The 'disk' entry to xdf instance
* mappings we know because those are done within the Solaris xvdi code
* and the xpvd nexus driver. But the config to emulated devices mappings
* are handled entirely within the xen management tool chain and the
* hardware emulator. Since all the tools that establish these mappings
* live in dom0, dom0 should really supply us with this information,
* probably via the xenstore. Unfortunatly it doesn't so, since there's
* no good way to determine this mapping dynamically, this driver uses
* a hard coded set of static mappings. These mappings are hardware
* emulator specific because each different hardware emulator could have
* a different device tree with different cmdk device paths. This
* means that if we want to continue to use this static mapping approach
* to allow Solaris to run on different hardware emulators we'll have
* to analyze each of those emulators to determine what paths they
* use and hard code those paths into this driver. yech. This metadata
* really needs to be supplied to us by dom0.
*
* This driver access underlying xdf nodes. Unfortunatly, devices
* must create minor nodes during attach, and for disk devices to create
* minor nodes, they have to look at the label on the disk, so this means
* that disk drivers must be able to access a disk contents during
* attach. That means that this disk driver must be able to access
* underlying xdf nodes during attach. Unfortunatly, due to device tree
* locking restrictions, we cannot have an attach operation occuring on
* this device and then attempt to access another device which may
* cause another attach to occur in a different device tree branch
* since this could result in deadlock. Hence, this driver can only
* access xdf device nodes that we know are attached, and it can't use
* any ddi interfaces to access those nodes if those interfaces could
* trigger an attach of the xdf device. So this driver works around
* these restrictions by talking directly to xdf devices via
* xdf_hvm_hold(). This interface takes a pathname to an xdf device,
* and if that device is already attached then it returns the a held dip
* pointer for that device node. This prevents us from getting into
* deadlock situations, but now we need a mechanism to ensure that all
* the xdf device nodes this driver might access are attached before
* this driver tries to access them. This is accomplished via the
* hvmboot_rootconf() callback which is invoked just before root is
* mounted. hvmboot_rootconf() will attach xpvd and tell it to configure
* all xdf device visible to the system. All these xdf device nodes
* will also be marked with the "ddi-no-autodetach" property so that
* once they are configured, the will not be automatically unconfigured.
* The only way that they could be unconfigured is if the administrator
* explicitly attempts to unload required modules via rem_drv(1M)
* or modunload(1M).
*/
/*
* 16 paritions + fdisk (see xdf.h)
*/
#define XDF_DEV2UNIT(dev) XDF_INST((getminor((dev))))
#define XDF_DEV2PART(dev) XDF_PART((getminor((dev))))
#define OTYP_VALID(otyp) ((otyp == OTYP_BLK) || \
(otyp == OTYP_CHR) || \
(otyp == OTYP_LYR))
#define PV_CMDK_NODES 4
typedef struct hvm_to_pv {
char *h2p_hvm_path;
char *h2p_pv_path;
} hvm_to_pv_t;
/*
*/
static hvm_to_pv_t pv_cmdk_h2p_xen_qemu[] = {
/*
* The paths mapping here are very specific to xen and qemu. When a
* domU is booted under xen in HVM mode, qemu is normally used to
* emulate up to four ide disks. These disks always have the four
* path listed below. To configure an emulated ide device, the
* xen domain configuration file normally has an entry that looks
* like this:
* disk = [ 'file:/foo.img,hda,w' ]
*
* The part we're interested in is the 'hda', which we'll call the
* xen disk device name here. The xen management tools (which parse
* the xen domain configuration file and launch qemu) makes the
* following assumptions about this value:
* hda == emulated ide disk 0 (ide bus 0, master)
* hdb == emulated ide disk 1 (ide bus 0, slave)
* hdc == emulated ide disk 2 (ide bus 1, master)
* hdd == emulated ide disk 3 (ide bus 1, slave)
*
* (Uncoincidentally, these xen disk device names actually map to
* the /dev filesystem names of ide disk devices in Linux. So in
* Linux /dev/hda is the first ide disk.) So for the first part of
* our mapping we've just hardcoded the cmdk paths that we know
* qemu will use.
*
* To understand the second half of the mapping (ie, the xdf device
* that each emulated cmdk device should be mapped two) we need to
* know the solaris device node address that will be assigned to
* each xdf device. (The device node address is the decimal
* number that comes after the "xdf@" in the device path.)
*
* So the question becomes, how do we know what the xenstore device
* id for emulated disk will be? Well, it turns out that since the
* xen management tools expect the disk device names to be Linux
* device names, those same management tools assign each disk a
* device id that matches the dev_t of the corresponding device
* under Linux. (Big shocker.) This xen device name-to-id mapping
* is currently all hard coded here:
* xen.hg/tools/python/xen/util/blkif.py`blkdev_name_to_number()
*
* So looking at the code above we can see the following xen disk
* device name to xenstore device id mappings:
* 'hda' == 0t768 == ((3 * 256) + (0 * 64))
* 'hdb' == 0t832 == ((3 * 256) + (1 * 64))
* 'hdc' == 0t5632 == ((22 * 256) + (0 * 64))
* 'hdd' == 0t5696 == ((22 * 256) + (1 * 64))
*/
{ "/pci@0,0/pci-ide@1,1/ide@0/cmdk@0,0", "/xpvd/xdf@768" },
{ "/pci@0,0/pci-ide@1,1/ide@0/cmdk@1,0", "/xpvd/xdf@832" },
{ "/pci@0,0/pci-ide@1,1/ide@1/cmdk@0,0", "/xpvd/xdf@5632" },
{ "/pci@0,0/pci-ide@1,1/ide@1/cmdk@1,0", "/xpvd/xdf@5696" },
{ NULL, 0 }
};
typedef struct pv_cmdk {
dev_info_t *dk_dip;
cmlb_handle_t dk_cmlbhandle;
ddi_devid_t dk_devid;
kmutex_t dk_mutex;
dev_info_t *dk_xdf_dip;
dev_t dk_xdf_dev;
int dk_xdf_otyp_count[OTYPCNT][XDF_PEXT];
ldi_handle_t dk_xdf_lh[XDF_PEXT];
} pv_cmdk_t;
/*
* Globals
*/
static void *pv_cmdk_state;
static major_t pv_cmdk_major;
static hvm_to_pv_t *pv_cmdk_h2p;
/*
* Function prototypes for xdf callback functions
*/
extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
void *);
static boolean_t
pv_cmdk_isopen_part(struct pv_cmdk *dkp, int part)
{
int otyp;
ASSERT(MUTEX_HELD(&dkp->dk_mutex));
for (otyp = 0; (otyp < OTYPCNT); otyp++) {
if (dkp->dk_xdf_otyp_count[otyp][part] != 0)
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Cmlb ops vectors, allows the cmlb module to directly access the entire
* pv_cmdk disk device without going through any partitioning layers.
*/
/*ARGSUSED*/
static int
pv_cmdk_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr,
diskaddr_t start, size_t count, void *tg_cookie)
{
int instance = ddi_get_instance(dip);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
if (dkp == NULL)
return (ENXIO);
return (xdf_lb_rdwr(dkp->dk_xdf_dip, cmd, bufaddr, start, count,
tg_cookie));
}
/*ARGSUSED*/
static int
pv_cmdk_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
{
int instance = ddi_get_instance(dip);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
int err;
if (dkp == NULL)
return (ENXIO);
if (cmd == TG_GETVIRTGEOM) {
cmlb_geom_t pgeom, *vgeomp;
diskaddr_t capacity;
/*
* The native xdf driver doesn't support this ioctl.
* Intead of passing it on, emulate it here so that the
* results look the same as what we get for a real cmdk
* device.
*
* Get the real size of the device
*/
if ((err = xdf_lb_getinfo(dkp->dk_xdf_dip,
TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0)
return (err);
capacity = pgeom.g_capacity;
/*
* If the controller returned us something that doesn't
* really fit into an Int 13/function 8 geometry
* result, just fail the ioctl. See PSARC 1998/313.
*/
if (capacity >= (63 * 254 * 1024))
return (EINVAL);
vgeomp = (cmlb_geom_t *)arg;
vgeomp->g_capacity = capacity;
vgeomp->g_nsect = 63;
vgeomp->g_nhead = 254;
vgeomp->g_ncyl = capacity / (63 * 254);
vgeomp->g_acyl = 0;
vgeomp->g_secsize = 512;
vgeomp->g_intrlv = 1;
vgeomp->g_rpm = 3600;
return (0);
}
return (xdf_lb_getinfo(dkp->dk_xdf_dip, cmd, arg, tg_cookie));
}
static cmlb_tg_ops_t pv_cmdk_lb_ops = {
TG_DK_OPS_VERSION_1,
pv_cmdk_lb_rdwr,
pv_cmdk_lb_getinfo
};
/*
* devid management functions
*/
/*
* pv_cmdk_get_modser() is basically a local copy of
* cmdk_get_modser() modified to work without the dadk layer.
* (which the non-pv version of the cmdk driver uses.)
*/
static int
pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len)
{
struct scsi_device *scsi_device;
opaque_t ctlobjp;
dadk_ioc_string_t strarg;
char *s;
char ch;
boolean_t ret;
int i;
int tb;
strarg.is_buf = buf;
strarg.is_size = len;
scsi_device = ddi_get_driver_private(dkp->dk_dip);
ctlobjp = scsi_device->sd_address.a_hba_tran;
if (CTL_IOCTL(ctlobjp,
ioccmd, (uintptr_t)&strarg, FNATIVE | FKIOCTL) != 0)
return (0);
/*
* valid model/serial string must contain a non-zero non-space
* trim trailing spaces/NULL
*/
ret = B_FALSE;
s = buf;
for (i = 0; i < strarg.is_size; i++) {
ch = *s++;
if (ch != ' ' && ch != '\0')
tb = i + 1;
if (ch != ' ' && ch != '\0' && ch != '0')
ret = B_TRUE;
}
if (ret == B_FALSE)
return (0);
return (tb);
}
/*
* pv_cmdk_devid_modser() is basically a copy of cmdk_devid_modser()
* that has been modified to use local pv cmdk driver functions.
*
* Build a devid from the model and serial number
* Return DDI_SUCCESS or DDI_FAILURE.
*/
static int
pv_cmdk_devid_modser(struct pv_cmdk *dkp)
{
int rc = DDI_FAILURE;
char *hwid;
int modlen;
int serlen;
/*
* device ID is a concatenation of model number, '=', serial number.
*/
hwid = kmem_alloc(CMDK_HWIDLEN, KM_SLEEP);
modlen = pv_cmdk_get_modser(dkp, DIOCTL_GETMODEL, hwid, CMDK_HWIDLEN);
if (modlen == 0)
goto err;
hwid[modlen++] = '=';
serlen = pv_cmdk_get_modser(dkp, DIOCTL_GETSERIAL,
hwid + modlen, CMDK_HWIDLEN - modlen);
if (serlen == 0)
goto err;
hwid[modlen + serlen] = 0;
/* Initialize the device ID, trailing NULL not included */
rc = ddi_devid_init(dkp->dk_dip, DEVID_ATA_SERIAL, modlen + serlen,
hwid, (ddi_devid_t *)&dkp->dk_devid);
if (rc != DDI_SUCCESS)
goto err;
kmem_free(hwid, CMDK_HWIDLEN);
return (DDI_SUCCESS);
err:
kmem_free(hwid, CMDK_HWIDLEN);
return (DDI_FAILURE);
}
/*
* pv_cmdk_devid_read() is basically a local copy of
* cmdk_devid_read() modified to work without the dadk layer.
* (which the non-pv version of the cmdk driver uses.)
*
* Read a devid from on the first block of the last track of
* the last cylinder. Make sure what we read is a valid devid.
* Return DDI_SUCCESS or DDI_FAILURE.
*/
static int
pv_cmdk_devid_read(struct pv_cmdk *dkp)
{
diskaddr_t blk;
struct dk_devid *dkdevidp;
uint_t *ip, chksum;
int i;
if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0)
return (DDI_FAILURE);
dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP);
if (pv_cmdk_lb_rdwr(dkp->dk_dip,
TG_READ, dkdevidp, blk, NBPSCTR, NULL) != 0)
goto err;
/* Validate the revision */
if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) ||
(dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB))
goto err;
/* Calculate the checksum */
chksum = 0;
ip = (uint_t *)dkdevidp;
for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++)
chksum ^= ip[i];
if (DKD_GETCHKSUM(dkdevidp) != chksum)
goto err;
/* Validate the device id */
if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS)
goto err;
/* keep a copy of the device id */
i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid);
dkp->dk_devid = kmem_alloc(i, KM_SLEEP);
bcopy(dkdevidp->dkd_devid, dkp->dk_devid, i);
kmem_free(dkdevidp, NBPSCTR);
return (DDI_SUCCESS);
err:
kmem_free(dkdevidp, NBPSCTR);
return (DDI_FAILURE);
}
/*
* pv_cmdk_devid_fabricate() is basically a local copy of
* cmdk_devid_fabricate() modified to work without the dadk layer.
* (which the non-pv version of the cmdk driver uses.)
*
* Create a devid and write it on the first block of the last track of
* the last cylinder.
* Return DDI_SUCCESS or DDI_FAILURE.
*/
static int
pv_cmdk_devid_fabricate(struct pv_cmdk *dkp)
{
ddi_devid_t devid = NULL; /* devid made by ddi_devid_init */
struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */
diskaddr_t blk;
uint_t *ip, chksum;
int i;
if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0)
return (DDI_FAILURE);
if (ddi_devid_init(dkp->dk_dip, DEVID_FAB, 0, NULL, &devid) !=
DDI_SUCCESS)
return (DDI_FAILURE);
/* allocate a buffer */
dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP);
/* Fill in the revision */
dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB;
dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB;
/* Copy in the device id */
i = ddi_devid_sizeof(devid);
if (i > DK_DEVID_SIZE)
goto err;
bcopy(devid, dkdevidp->dkd_devid, i);
/* Calculate the chksum */
chksum = 0;
ip = (uint_t *)dkdevidp;
for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++)
chksum ^= ip[i];
/* Fill in the checksum */
DKD_FORMCHKSUM(chksum, dkdevidp);
if (pv_cmdk_lb_rdwr(dkp->dk_dip,
TG_WRITE, dkdevidp, blk, NBPSCTR, NULL) != 0)
goto err;
kmem_free(dkdevidp, NBPSCTR);
dkp->dk_devid = devid;
return (DDI_SUCCESS);
err:
if (dkdevidp != NULL)
kmem_free(dkdevidp, NBPSCTR);
if (devid != NULL)
ddi_devid_free(devid);
return (DDI_FAILURE);
}
/*
* pv_cmdk_devid_setup() is basically a local copy ofcmdk_devid_setup()
* that has been modified to use local pv cmdk driver functions.
*
* Create and register the devid.
* There are 4 different ways we can get a device id:
* 1. Already have one - nothing to do
* 2. Build one from the drive's model and serial numbers
* 3. Read one from the disk (first sector of last track)
* 4. Fabricate one and write it on the disk.
* If any of these succeeds, register the deviceid
*/
static void
pv_cmdk_devid_setup(struct pv_cmdk *dkp)
{
int rc;
/* Try options until one succeeds, or all have failed */
/* 1. All done if already registered */
if (dkp->dk_devid != NULL)
return;
/* 2. Build a devid from the model and serial number */
rc = pv_cmdk_devid_modser(dkp);
if (rc != DDI_SUCCESS) {
/* 3. Read devid from the disk, if present */
rc = pv_cmdk_devid_read(dkp);
/* 4. otherwise make one up and write it on the disk */
if (rc != DDI_SUCCESS)
rc = pv_cmdk_devid_fabricate(dkp);
}
/* If we managed to get a devid any of the above ways, register it */
if (rc == DDI_SUCCESS)
(void) ddi_devid_register(dkp->dk_dip, dkp->dk_devid);
}
/*
* Local Functions
*/
static int
pv_cmdk_iodone(struct buf *bp)
{
struct buf *bp_orig = bp->b_chain;
/* Propegate back the io results */
bp_orig->b_resid = bp->b_resid;
bioerror(bp_orig, geterror(bp));
biodone(bp_orig);
freerbuf(bp);
return (0);
}
static int
pv_cmdkstrategy(struct buf *bp)
{
dev_t dev = bp->b_edev;
int instance = XDF_DEV2UNIT(dev);
int part = XDF_DEV2PART(dev);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
dev_t xdf_devt;
struct buf *bp_clone;
/*
* Sanity checks that the dev_t associated with the buf we were
* passed actually corresponds us and that the partition we're
* trying to access is actually open. On debug kernels we'll
* panic and on non-debug kernels we'll return failure.
*/
ASSERT(getmajor(dev) == pv_cmdk_major);
if (getmajor(dev) != pv_cmdk_major)
goto err;
mutex_enter(&dkp->dk_mutex);
ASSERT(pv_cmdk_isopen_part(dkp, part));
if (!pv_cmdk_isopen_part(dkp, part)) {
mutex_exit(&dkp->dk_mutex);
goto err;
}
mutex_exit(&dkp->dk_mutex);
/* clone this buffer */
xdf_devt = dkp->dk_xdf_dev | part;
bp_clone = bioclone(bp, 0, bp->b_bcount, xdf_devt, bp->b_blkno,
pv_cmdk_iodone, NULL, KM_SLEEP);
bp_clone->b_chain = bp;
/*
* If we're being invoked on behalf of the physio() call in
* pv_cmdk_dioctl_rwcmd() then b_private will be set to
* XB_SLICE_NONE and we need to propegate this flag into the
* cloned buffer so that the xdf driver will see it.
*/
if (bp->b_private == (void *)XB_SLICE_NONE)
bp_clone->b_private = (void *)XB_SLICE_NONE;
/*
* Pass on the cloned buffer. Note that we don't bother to check
* for failure because the xdf strategy routine will have to
* invoke biodone() if it wants to return an error, which means
* that the pv_cmdk_iodone() callback will get invoked and it
* will propegate the error back up the stack and free the cloned
* buffer.
*/
ASSERT(dkp->dk_xdf_lh[part] != NULL);
return (ldi_strategy(dkp->dk_xdf_lh[part], bp_clone));
err:
bioerror(bp, ENXIO);
bp->b_resid = bp->b_bcount;
biodone(bp);
return (0);
}
/*ARGSUSED*/
static int
pv_cmdkread(dev_t dev, struct uio *uio, cred_t *credp)
{
int instance = XDF_DEV2UNIT(dev);
int part = XDF_DEV2PART(dev);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
return (ldi_read(dkp->dk_xdf_lh[part], uio, credp));
}
/*ARGSUSED*/
static int
pv_cmdkwrite(dev_t dev, struct uio *uio, cred_t *credp)
{
int instance = XDF_DEV2UNIT(dev);
int part = XDF_DEV2PART(dev);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
return (ldi_write(dkp->dk_xdf_lh[part], uio, credp));
}
/*ARGSUSED*/
static int
pv_cmdkaread(dev_t dev, struct aio_req *aio, cred_t *credp)
{
int instance = XDF_DEV2UNIT(dev);
int part = XDF_DEV2PART(dev);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
return (ldi_aread(dkp->dk_xdf_lh[part], aio, credp));
}
/*ARGSUSED*/
static int
pv_cmdkawrite(dev_t dev, struct aio_req *aio, cred_t *credp)
{
int instance = XDF_DEV2UNIT(dev);
int part = XDF_DEV2PART(dev);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
return (ldi_awrite(dkp->dk_xdf_lh[part], aio, credp));
}
static int
pv_cmdkdump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
{
int instance = XDF_DEV2UNIT(dev);
int part = XDF_DEV2PART(dev);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
return (ldi_dump(dkp->dk_xdf_lh[part], addr, blkno, nblk));
}
/*
* pv_rwcmd_copyin() is a duplicate of rwcmd_copyin().
*/
static int
pv_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag)
{
switch (ddi_model_convert_from(flag)) {
case DDI_MODEL_ILP32: {
struct dadkio_rwcmd32 cmd32;
if (ddi_copyin(inaddr, &cmd32,
sizeof (struct dadkio_rwcmd32), flag)) {
return (EFAULT);
}
rwcmdp->cmd = cmd32.cmd;
rwcmdp->flags = cmd32.flags;
rwcmdp->blkaddr = (blkaddr_t)cmd32.blkaddr;
rwcmdp->buflen = cmd32.buflen;
rwcmdp->bufaddr = (caddr_t)(intptr_t)cmd32.bufaddr;
/*
* Note: we do not convert the 'status' field,
* as it should not contain valid data at this
* point.
*/
bzero(&rwcmdp->status, sizeof (rwcmdp->status));
break;
}
case DDI_MODEL_NONE: {
if (ddi_copyin(inaddr, rwcmdp,
sizeof (struct dadkio_rwcmd), flag)) {
return (EFAULT);
}
}
}
return (0);
}
/*
* pv_rwcmd_copyout() is a duplicate of rwcmd_copyout().
*/
static int
pv_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag)
{
switch (ddi_model_convert_from(flag)) {
case DDI_MODEL_ILP32: {
struct dadkio_rwcmd32 cmd32;
cmd32.cmd = rwcmdp->cmd;
cmd32.flags = rwcmdp->flags;
cmd32.blkaddr = rwcmdp->blkaddr;
cmd32.buflen = rwcmdp->buflen;
ASSERT64(((uintptr_t)rwcmdp->bufaddr >> 32) == 0);
cmd32.bufaddr = (caddr32_t)(uintptr_t)rwcmdp->bufaddr;
cmd32.status.status = rwcmdp->status.status;
cmd32.status.resid = rwcmdp->status.resid;
cmd32.status.failed_blk_is_valid =
rwcmdp->status.failed_blk_is_valid;
cmd32.status.failed_blk = rwcmdp->status.failed_blk;
cmd32.status.fru_code_is_valid =
rwcmdp->status.fru_code_is_valid;
cmd32.status.fru_code = rwcmdp->status.fru_code;
bcopy(rwcmdp->status.add_error_info,
cmd32.status.add_error_info, DADKIO_ERROR_INFO_LEN);
if (ddi_copyout(&cmd32, outaddr,
sizeof (struct dadkio_rwcmd32), flag))
return (EFAULT);
break;
}
case DDI_MODEL_NONE: {
if (ddi_copyout(rwcmdp, outaddr,
sizeof (struct dadkio_rwcmd), flag))
return (EFAULT);
}
}
return (0);
}
static void
pv_cmdkmin(struct buf *bp)
{
if (bp->b_bcount > DK_MAXRECSIZE)
bp->b_bcount = DK_MAXRECSIZE;
}
static int
pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag)
{
struct dadkio_rwcmd *rwcmdp;
struct iovec aiov;
struct uio auio;
struct buf *bp;
int rw, status;
rwcmdp = kmem_alloc(sizeof (struct dadkio_rwcmd), KM_SLEEP);
status = pv_rwcmd_copyin(rwcmdp, (caddr_t)arg, flag);
if (status != 0)
goto out;
switch (rwcmdp->cmd) {
case DADKIO_RWCMD_READ:
case DADKIO_RWCMD_WRITE:
break;
default:
status = EINVAL;
goto out;
}
bzero((caddr_t)&aiov, sizeof (struct iovec));
aiov.iov_base = rwcmdp->bufaddr;
aiov.iov_len = rwcmdp->buflen;
bzero((caddr_t)&auio, sizeof (struct uio));
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_loffset = (offset_t)rwcmdp->blkaddr * (offset_t)XB_BSIZE;
auio.uio_resid = rwcmdp->buflen;
auio.uio_segflg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
/*
* Tell the xdf driver that this I/O request is using an absolute
* offset.
*/
bp = getrbuf(KM_SLEEP);
bp->b_private = (void *)XB_SLICE_NONE;
rw = ((rwcmdp->cmd == DADKIO_RWCMD_WRITE) ? B_WRITE : B_READ);
status = physio(pv_cmdkstrategy, bp, dev, rw, pv_cmdkmin, &auio);
biofini(bp);
kmem_free(bp, sizeof (buf_t));
if (status == 0)
status = pv_rwcmd_copyout(rwcmdp, (caddr_t)arg, flag);
out:
kmem_free(rwcmdp, sizeof (struct dadkio_rwcmd));
return (status);
}
static int
pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
int *rvalp)
{
int instance = XDF_DEV2UNIT(dev);
int part = XDF_DEV2PART(dev);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
int err;
switch (cmd) {
default:
return (ldi_ioctl(dkp->dk_xdf_lh[part],
cmd, arg, flag, credp, rvalp));
case DKIOCGETWCE:
case DKIOCSETWCE:
return (EIO);
case DKIOCADDBAD: {
/*
* This is for ata/ide bad block handling. It is supposed
* to cause the driver to re-read the bad block list and
* alternate map after it has been updated. Our driver
* will refuse to attach to any disk which has a bad blocks
* list defined, so there really isn't much to do here.
*/
return (0);
}
case DKIOCGETDEF: {
/*
* I can't actually find any code that utilizes this ioctl,
* hence we're leaving it explicitly unimplemented.
*/
ASSERT("ioctl cmd unsupported by pv_cmdk: DKIOCGETDEF");
return (EIO);
}
case DIOCTL_RWCMD: {
/*
* This just seems to just be an alternate interface for
* reading and writing the disk. Great, another way to
* do the same thing...
*/
return (pv_cmdk_dioctl_rwcmd(dev, arg, flag));
}
case DKIOCINFO: {
dev_info_t *dip = dkp->dk_dip;
struct dk_cinfo info;
/* Pass on the ioctl request, save the response */
if ((err = ldi_ioctl(dkp->dk_xdf_lh[part],
cmd, (intptr_t)&info, FKIOCTL, credp, rvalp)) != 0)
return (err);
/* Update controller info */
info.dki_cnum = ddi_get_instance(ddi_get_parent(dip));
(void) strlcpy(info.dki_cname,
ddi_get_name(ddi_get_parent(dip)), sizeof (info.dki_cname));
/* Update unit info. */
if (info.dki_ctype == DKC_VBD)
info.dki_ctype = DKC_DIRECT;
info.dki_unit = instance;
(void) strlcpy(info.dki_dname,
ddi_driver_name(dip), sizeof (info.dki_dname));
info.dki_addr = 1;
if (ddi_copyout(&info, (void *)arg, sizeof (info), flag))
return (EFAULT);
return (0);
}
} /* switch (cmd) */
/*NOTREACHED*/
}
/*ARGSUSED*/
static int
pv_cmdkopen(dev_t *dev_p, int flag, int otyp, cred_t *credp)
{
ldi_ident_t li;
dev_t dev = *dev_p;
int instance = XDF_DEV2UNIT(dev);
int part = XDF_DEV2PART(dev);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
dev_t xdf_devt = dkp->dk_xdf_dev | part;
int err = 0;
if ((otyp < 0) || (otyp >= OTYPCNT))
return (EINVAL);
/* allocate an ldi handle */
VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0);
mutex_enter(&dkp->dk_mutex);
/*
* We translate all device opens (chr, blk, and lyr) into
* block device opens. Why? Because for all the opens that
* come through this driver, we only keep around one LDI handle.
* So that handle can only be of one open type. The reason
* that we choose the block interface for this is that to use
* the block interfaces for a device the system needs to allocatex
* buf_ts, which are associated with system memory which can act
* as a cache for device data. So normally when a block device
* is closed the system will ensure that all these pages get
* flushed out of memory. But if we were to open the device
* as a character device, then when we went to close the underlying
* device (even if we had invoked the block interfaces) any data
* remaining in memory wouldn't necessairly be flushed out
* before the device was closed.
*/
if (dkp->dk_xdf_lh[part] == NULL) {
ASSERT(!pv_cmdk_isopen_part(dkp, part));
err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp,
&dkp->dk_xdf_lh[part], li);
if (err != 0) {
mutex_exit(&dkp->dk_mutex);
ldi_ident_release(li);
return (err);
}
/* Disk devices really shouldn't clone */
ASSERT(xdf_devt == (dkp->dk_xdf_dev | part));
} else {
ldi_handle_t lh_tmp;
ASSERT(pv_cmdk_isopen_part(dkp, part));
/* do ldi open/close to get flags and cred check */
err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp,
&lh_tmp, li);
if (err != 0) {
mutex_exit(&dkp->dk_mutex);
ldi_ident_release(li);
return (err);
}
/* Disk devices really shouldn't clone */
ASSERT(xdf_devt == (dkp->dk_xdf_dev | part));
(void) ldi_close(lh_tmp, flag, credp);
}
ldi_ident_release(li);
dkp->dk_xdf_otyp_count[otyp][part]++;
mutex_exit(&dkp->dk_mutex);
return (0);
}
/*ARGSUSED*/
static int
pv_cmdkclose(dev_t dev, int flag, int otyp, cred_t *credp)
{
int instance = XDF_DEV2UNIT(dev);
int part = XDF_DEV2PART(dev);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
int err = 0;
ASSERT((otyp >= 0) && otyp < OTYPCNT);
/*
* Sanity check that that the dev_t specified corresponds to this
* driver and that the device is actually open. On debug kernels we'll
* panic and on non-debug kernels we'll return failure.
*/
ASSERT(getmajor(dev) == pv_cmdk_major);
if (getmajor(dev) != pv_cmdk_major)
return (ENXIO);
mutex_enter(&dkp->dk_mutex);
ASSERT(pv_cmdk_isopen_part(dkp, part));
if (!pv_cmdk_isopen_part(dkp, part)) {
mutex_exit(&dkp->dk_mutex);
return (ENXIO);
}
ASSERT(dkp->dk_xdf_lh[part] != NULL);
ASSERT(dkp->dk_xdf_otyp_count[otyp][part] > 0);
if (otyp == OTYP_LYR) {
dkp->dk_xdf_otyp_count[otyp][part]--;
} else {
dkp->dk_xdf_otyp_count[otyp][part] = 0;
}
if (!pv_cmdk_isopen_part(dkp, part)) {
err = ldi_close(dkp->dk_xdf_lh[part], flag, credp);
dkp->dk_xdf_lh[part] = NULL;
}
mutex_exit(&dkp->dk_mutex);
return (err);
}
static int
pv_cmdk_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom)
{
struct scsi_device *scsi_device;
struct tgdk_geom tgdk_geom;
opaque_t ctlobjp;
int err;
scsi_device = ddi_get_driver_private(dip);
ctlobjp = scsi_device->sd_address.a_hba_tran;
if ((err = CTL_IOCTL(ctlobjp,
DIOCTL_GETPHYGEOM, (uintptr_t)&tgdk_geom, FKIOCTL)) != 0)
return (err);
/* This driver won't work if this isn't true */
ASSERT(tgdk_geom.g_secsiz == XB_BSIZE);
pgeom->g_ncyl = tgdk_geom.g_cyl;
pgeom->g_acyl = tgdk_geom.g_acyl;
pgeom->g_nhead = tgdk_geom.g_head;
pgeom->g_nsect = tgdk_geom.g_sec;
pgeom->g_secsize = tgdk_geom.g_secsiz;
pgeom->g_capacity = tgdk_geom.g_cap;
pgeom->g_intrlv = 1;
pgeom->g_rpm = 3600;
return (0);
}
/*
* pv_cmdk_bb_check() checks for the existance of bad blocks mappings in
* the alternate partition/slice. Returns B_FALSE is there are no bad
* block mappins found, and B_TRUE is there are bad block mappins found.
*/
static boolean_t
pv_cmdk_bb_check(struct pv_cmdk *dkp)
{
struct alts_parttbl *ap;
diskaddr_t nblocks, blk;
uint32_t altused, altbase, altlast;
uint16_t vtoctag;
int alts;
/* find slice with V_ALTSCTR tag */
for (alts = 0; alts < NDKMAP; alts++) {
if (cmlb_partinfo(dkp->dk_cmlbhandle, alts,
&nblocks, &blk, NULL, &vtoctag, 0) != 0) {
/* no partition table exists */
return (B_FALSE);
}
if ((vtoctag == V_ALTSCTR) && (nblocks > 1))
break;
}
if (alts >= NDKMAP)
return (B_FALSE); /* no V_ALTSCTR slice defined */
/* read in ALTS label block */
ap = (struct alts_parttbl *)kmem_zalloc(NBPSCTR, KM_SLEEP);
if (pv_cmdk_lb_rdwr(dkp->dk_dip,
TG_READ, ap, blk, NBPSCTR, NULL) != 0)
goto err;
altused = ap->alts_ent_used; /* number of BB entries */
altbase = ap->alts_ent_base; /* blk offset from begin slice */
altlast = ap->alts_ent_end; /* blk offset to last block */
if ((altused == 0) || (altbase < 1) ||
(altbase > altlast) || (altlast >= nblocks))
goto err;
/* we found bad block mappins */
kmem_free(ap, NBPSCTR);
return (B_TRUE);
err:
kmem_free(ap, NBPSCTR);
return (B_FALSE);
}
/*
* Autoconfiguration Routines
*/
static int
pv_cmdkattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
int instance = ddi_get_instance(dip);
dev_info_t *xdf_dip = NULL;
struct pv_cmdk *dkp;
cmlb_geom_t pgeom;
char *path;
int i;
if (cmd != DDI_ATTACH)
return (DDI_FAILURE);
/*
* This cmdk device layers on top of an xdf device. So the first
* thing we need to do is determine which xdf device instance this
* cmdk instance should be layered on top of.
*/
path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
(void) ddi_pathname(dip, path);
for (i = 0; pv_cmdk_h2p[i].h2p_hvm_path != NULL; i++) {
if (strcmp(pv_cmdk_h2p[i].h2p_hvm_path, path) == 0)
break;
}
kmem_free(path, MAXPATHLEN);
if (pv_cmdk_h2p[i].h2p_hvm_path == NULL) {
/*
* UhOh. We don't know what xdf instance this cmdk device
* should be mapped to.
*/
return (DDI_FAILURE);
}
/* Check if this device exists */
xdf_dip = xdf_hvm_hold(pv_cmdk_h2p[i].h2p_pv_path);
if (xdf_dip == NULL)
return (DDI_FAILURE);
/* allocate and initialize our state structure */
(void) ddi_soft_state_zalloc(pv_cmdk_state, instance);
dkp = ddi_get_soft_state(pv_cmdk_state, instance);
mutex_init(&dkp->dk_mutex, NULL, MUTEX_DRIVER, NULL);
dkp->dk_dip = dip;
dkp->dk_xdf_dip = xdf_dip;
dkp->dk_xdf_dev = makedevice(ddi_driver_major(xdf_dip),
XDF_MINOR(ddi_get_instance(xdf_dip), 0));
ASSERT((dkp->dk_xdf_dev & XDF_PMASK) == 0);
/*
* GROSS HACK ALERT! GROSS HACK ALERT!
*
* Before we can initialize the cmlb layer, we have to tell the
* underlying xdf device what it's physical geometry should be.
* See the block comments at the top of this file for more info.
*/
if ((pv_cmdk_getpgeom(dip, &pgeom) != 0) ||
(xdf_hvm_setpgeom(dkp->dk_xdf_dip, &pgeom) != 0)) {
ddi_release_devi(dkp->dk_xdf_dip);
mutex_destroy(&dkp->dk_mutex);
ddi_soft_state_free(pv_cmdk_state, instance);
return (DDI_FAILURE);
}
/* create kstat for iostat(1M) */
if (xdf_kstat_create(dkp->dk_xdf_dip, "cmdk", instance) != 0) {
ddi_release_devi(dkp->dk_xdf_dip);
mutex_destroy(&dkp->dk_mutex);
ddi_soft_state_free(pv_cmdk_state, instance);
return (DDI_FAILURE);
}
/*
* Force the xdf front end driver to connect to the backend. From
* the solaris device tree perspective, the xdf driver devinfo node
* is already in the ATTACHED state. (Otherwise xdf_hvm_hold()
* would not have returned a dip.) But this doesn't mean that the
* xdf device has actually established a connection to it's back
* end driver. For us to be able to access the xdf device it needs
* to be connected. There are two ways to force the xdf driver to
* connect to the backend device.
*/
if (xdf_hvm_connect(dkp->dk_xdf_dip) != 0) {
cmn_err(CE_WARN,
"pv driver failed to connect: %s",
pv_cmdk_h2p[i].h2p_pv_path);
xdf_kstat_delete(dkp->dk_xdf_dip);
ddi_release_devi(dkp->dk_xdf_dip);
mutex_destroy(&dkp->dk_mutex);
ddi_soft_state_free(pv_cmdk_state, instance);
return (DDI_FAILURE);
}
/*
* Initalize cmlb. Note that for partition information cmlb
* will access the underly xdf disk device directly via
* pv_cmdk_lb_rdwr() and pv_cmdk_lb_getinfo(). There are no
* layered driver handles associated with this access because
* it is a direct disk access that doesn't go through
* any of the device nodes exported by the xdf device (since
* all exported device nodes only reflect the portion of
* the device visible via the partition/slice that the node
* is associated with.) So while not observable via the LDI,
* this direct disk access is ok since we're actually holding
* the target device.
*/
cmlb_alloc_handle((cmlb_handle_t *)&dkp->dk_cmlbhandle);
if (cmlb_attach(dkp->dk_dip, &pv_cmdk_lb_ops,
DTYPE_DIRECT, /* device_type */
0, /* not removable */
0, /* not hot pluggable */
DDI_NT_BLOCK,
CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT, /* mimic cmdk */
dkp->dk_cmlbhandle, 0) != 0) {
cmlb_free_handle(&dkp->dk_cmlbhandle);
xdf_kstat_delete(dkp->dk_xdf_dip);
ddi_release_devi(dkp->dk_xdf_dip);
mutex_destroy(&dkp->dk_mutex);
ddi_soft_state_free(pv_cmdk_state, instance);
return (DDI_FAILURE);
}
if (pv_cmdk_bb_check(dkp)) {
cmn_err(CE_WARN,
"pv cmdk disks with bad blocks are unsupported: %s",
pv_cmdk_h2p[i].h2p_hvm_path);
cmlb_detach(dkp->dk_cmlbhandle, 0);
cmlb_free_handle(&dkp->dk_cmlbhandle);
xdf_kstat_delete(dkp->dk_xdf_dip);
ddi_release_devi(dkp->dk_xdf_dip);
mutex_destroy(&dkp->dk_mutex);
ddi_soft_state_free(pv_cmdk_state, instance);
return (DDI_FAILURE);
}
/* setup devid string */
pv_cmdk_devid_setup(dkp);
/* Calling validate will create minor nodes according to disk label */
(void) cmlb_validate(dkp->dk_cmlbhandle, 0, 0);
/*
* Add a zero-length attribute to tell the world we support
* kernel ioctls (for layered drivers).
*/
(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
DDI_KERNEL_IOCTL, NULL, 0);
/* Have the system report any newly created device nodes */
ddi_report_dev(dip);
return (DDI_SUCCESS);
}
static int
pv_cmdkdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
int instance = ddi_get_instance(dip);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
if (cmd != DDI_DETACH)
return (DDI_FAILURE);
ASSERT(MUTEX_NOT_HELD(&dkp->dk_mutex));
ddi_devid_unregister(dip);
if (dkp->dk_devid)
ddi_devid_free(dkp->dk_devid);
cmlb_detach(dkp->dk_cmlbhandle, 0);
cmlb_free_handle(&dkp->dk_cmlbhandle);
mutex_destroy(&dkp->dk_mutex);
xdf_kstat_delete(dkp->dk_xdf_dip);
ddi_release_devi(dkp->dk_xdf_dip);
ddi_soft_state_free(pv_cmdk_state, instance);
ddi_prop_remove_all(dip);
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static int
pv_cmdk_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
void **result)
{
dev_t dev = (dev_t)arg;
int instance = XDF_DEV2UNIT(dev);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
if (dkp == NULL)
return (DDI_FAILURE);
*result = (void *)dkp->dk_dip;
break;
case DDI_INFO_DEVT2INSTANCE:
*result = (void *)(intptr_t)instance;
break;
default:
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
pv_cmdk_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
int flags, char *name, caddr_t valuep, int *lengthp)
{
int instance = ddi_get_instance(dip);
struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
dev_info_t *xdf_dip;
dev_t xdf_devt;
int err;
/*
* Sanity check that if a dev_t or dip were specified that they
* correspond to this device driver. On debug kernels we'll
* panic and on non-debug kernels we'll return failure.
*/
ASSERT(ddi_driver_major(dip) == pv_cmdk_major);
ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == pv_cmdk_major));
if ((ddi_driver_major(dip) != pv_cmdk_major) ||
((dev != DDI_DEV_T_ANY) && (getmajor(dev) != pv_cmdk_major)))
return (DDI_PROP_NOT_FOUND);
/*
* This property lookup might be associated with a device node
* that is not yet attached, if so pass it onto ddi_prop_op().
*/
if (dkp == NULL)
return (ddi_prop_op(dev, dip, prop_op, flags,
name, valuep, lengthp));
/*
* Make sure we only lookup static properties.
*
* If there are static properties of the underlying xdf driver
* that we want to mirror, then we'll have to explicity look them
* up and define them during attach. There are a few reasons
* for this. Most importantly, most static properties are typed
* and all dynamic properties are untyped, ie, for dynamic
* properties the caller must know the type of the property and
* how to interpret the value of the property. the prop_op drivedr
* entry point is only designed for returning dynamic/untyped
* properties, so if we were to attempt to lookup and pass back
* static properties of the underlying device here then we would
* be losing the type information for those properties. Another
* reason we don't want to pass on static property requests is that
* static properties are enumerable in the device tree, where as
* dynamic ones are not.
*/
flags |= DDI_PROP_DYNAMIC;
/*
* We can't use the ldi here to access the underlying device because
* the ldi actually opens the device, and that open might fail if the
* device has already been opened with the FEXCL flag. If we used
* the ldi here, it would also be possible for some other caller
* to try open the device with the FEXCL flag and get a failure
* back because we have it open to do a property query.
*
* Instad we'll grab a hold on the target dip and query the
* property directly.
*/
mutex_enter(&dkp->dk_mutex);
if ((xdf_dip = dkp->dk_xdf_dip) == NULL) {
mutex_exit(&dkp->dk_mutex);
return (DDI_PROP_NOT_FOUND);
}
e_ddi_hold_devi(xdf_dip);
/* figure out the dev_t we're going to pass on down */
if (dev == DDI_DEV_T_ANY) {
xdf_devt = DDI_DEV_T_ANY;
} else {
xdf_devt = dkp->dk_xdf_dev | XDF_DEV2PART(dev);
}
mutex_exit(&dkp->dk_mutex);
/*
* Cdev_prop_op() is not a public interface, and normally the caller
* is required to make sure that the target driver actually implements
* this interface before trying to invoke it. In this case we know
* that we're always accessing the xdf driver and it does have this
* interface defined, so we can skip the check.
*/
err = cdev_prop_op(xdf_devt, xdf_dip,
prop_op, flags, name, valuep, lengthp);
ddi_release_devi(xdf_dip);
return (err);
}
/*
* Device driver ops vector
*/
static struct cb_ops pv_cmdk_cb_ops = {
pv_cmdkopen, /* open */
pv_cmdkclose, /* close */
pv_cmdkstrategy, /* strategy */
nodev, /* print */
pv_cmdkdump, /* dump */
pv_cmdkread, /* read */
pv_cmdkwrite, /* write */
pv_cmdkioctl, /* ioctl */
nodev, /* devmap */
nodev, /* mmap */
nodev, /* segmap */
nochpoll, /* poll */
pv_cmdk_prop_op, /* cb_prop_op */
0, /* streamtab */
D_64BIT | D_MP | D_NEW, /* Driver comaptibility flag */
CB_REV, /* cb_rev */
pv_cmdkaread, /* async read */
pv_cmdkawrite /* async write */
};
struct dev_ops pv_cmdk_ops = {
DEVO_REV, /* devo_rev, */
0, /* refcnt */
pv_cmdk_getinfo, /* info */
nulldev, /* identify */
nulldev, /* probe */
pv_cmdkattach, /* attach */
pv_cmdkdetach, /* detach */
nodev, /* reset */
&pv_cmdk_cb_ops, /* driver operations */
(struct bus_ops *)0 /* bus operations */
};
/*
* Module linkage information for the kernel.
*/
static struct modldrv modldrv = {
&mod_driverops, /* Type of module. This one is a driver */
"PV Common Direct Access Disk",
&pv_cmdk_ops, /* driver ops */
};
static struct modlinkage modlinkage = {
MODREV_1, (void *)&modldrv, NULL
};
int
_init(void)
{
int rval;
if ((pv_cmdk_major = ddi_name_to_major("cmdk")) == (major_t)-1)
return (EINVAL);
/*
* In general ide usually supports 4 disk devices, this same
* limitation also applies to software emulating ide devices.
* so by default we pre-allocate 4 cmdk soft state structures.
*/
if ((rval = ddi_soft_state_init(&pv_cmdk_state,
sizeof (struct pv_cmdk), PV_CMDK_NODES)) != 0)
return (rval);
/*
* Currently we only support qemu as the backing hardware emulator
* for cmdk devices.
*/
pv_cmdk_h2p = pv_cmdk_h2p_xen_qemu;
/* Install our module */
if ((rval = mod_install(&modlinkage)) != 0) {
ddi_soft_state_fini(&pv_cmdk_state);
return (rval);
}
return (0);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
int
_fini(void)
{
int rval;
if ((rval = mod_remove(&modlinkage)) != 0)
return (rval);
ddi_soft_state_fini(&pv_cmdk_state);
return (0);
}