xdf.c revision b1a3e7f1642868df84107f6eb9f823e0b02c1ac3
843e19887f64dde75055cf8842fc4db2171eff45johnlev * CDDL HEADER START
843e19887f64dde75055cf8842fc4db2171eff45johnlev * The contents of this file are subject to the terms of the
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Common Development and Distribution License (the "License").
843e19887f64dde75055cf8842fc4db2171eff45johnlev * You may not use this file except in compliance with the License.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
843e19887f64dde75055cf8842fc4db2171eff45johnlev * See the License for the specific language governing permissions
843e19887f64dde75055cf8842fc4db2171eff45johnlev * and limitations under the License.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * When distributing Covered Code, include this CDDL HEADER in each
843e19887f64dde75055cf8842fc4db2171eff45johnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * If applicable, add the following below this CDDL HEADER, with the
843e19887f64dde75055cf8842fc4db2171eff45johnlev * fields enclosed by brackets "[]" replaced with your own identifying
843e19887f64dde75055cf8842fc4db2171eff45johnlev * information: Portions Copyright [yyyy] [name of copyright owner]
843e19887f64dde75055cf8842fc4db2171eff45johnlev * CDDL HEADER END
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Use is subject to license terms.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * xdf.c - Xen Virtual Block Device Driver
843e19887f64dde75055cf8842fc4db2171eff45johnlev * - support alternate block size (currently only DEV_BSIZE supported)
843e19887f64dde75055cf8842fc4db2171eff45johnlev * - revalidate geometry for removable devices
843e19887f64dde75055cf8842fc4db2171eff45johnlev#pragma ident "%Z%%M% %I% %E% SMI"
843e19887f64dde75055cf8842fc4db2171eff45johnlev#define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */
843e19887f64dde75055cf8842fc4db2171eff45johnlev ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported)
843e19887f64dde75055cf8842fc4db2171eff45johnlev ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported)
843e19887f64dde75055cf8842fc4db2171eff45johnlev ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block))
843e19887f64dde75055cf8842fc4db2171eff45johnlev (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0))
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic void *vbd_ss;
843e19887f64dde75055cf8842fc4db2171eff45johnlev * dev_ops and cb_ops entrypoints
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_attach(dev_info_t *, ddi_attach_cmd_t);
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_detach(dev_info_t *, ddi_detach_cmd_t);
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_close(dev_t, int, int, struct cred *);
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_aread(dev_t, struct aio_req *, cred_t *);
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_awrite(dev_t, struct aio_req *, cred_t *);
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
843e19887f64dde75055cf8842fc4db2171eff45johnlev * misc private functions
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *);
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *);
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_check_state_transition(xdf_t *, XenbusState);
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic void xdf_timeout_handler(void *);
843e19887f64dde75055cf8842fc4db2171eff45johnlev 0, /* devo_refcnt */
843e19887f64dde75055cf8842fc4db2171eff45johnlev &mod_driverops, /* Type of module. This one is a driver */
843e19887f64dde75055cf8842fc4db2171eff45johnlev * I/O buffer DMA attributes
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most
843e19887f64dde75055cf8842fc4db2171eff45johnlev (uint64_t)0xffffffffffffffff, /* highest usable address */
843e19887f64dde75055cf8842fc4db2171eff45johnlev BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */
843e19887f64dde75055cf8842fc4db2171eff45johnlev 0, /* flags (reserved) */
843e19887f64dde75055cf8842fc4db2171eff45johnlev/* callbacks from commmon label */
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
843e19887f64dde75055cf8842fc4db2171eff45johnlevstatic int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) == 0) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache",
843e19887f64dde75055cf8842fc4db2171eff45johnlev sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp)
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) {
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
843e19887f64dde75055cf8842fc4db2171eff45johnlev * xdf dynamic properties are device specific and size oriented.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Requests issued under conditions where size is valid are passed
843e19887f64dde75055cf8842fc4db2171eff45johnlev * to ddi_prop_op_nblocks with the size information, otherwise the
843e19887f64dde75055cf8842fc4db2171eff45johnlev * request is passed to ddi_prop_op.
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* do cv_wait until connected or failed */
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (cmlb_partinfo(vdp->xdf_vd_lbl, XDF_PART(getminor(dev)), &p_blkcnt,
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (ddi_prop_op(dev, dip, prop_op, mod_flags, name, valuep,
843e19887f64dde75055cf8842fc4db2171eff45johnlev xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM,
843e19887f64dde75055cf8842fc4db2171eff45johnlev "xdfdebug", 0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS)
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (ddi_get_iblock_cookie(devi, 0, &ibc) != DDI_SUCCESS) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie",
843e19887f64dde75055cf8842fc4db2171eff45johnlev mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc);
843e19887f64dde75055cf8842fc4db2171eff45johnlev mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc);
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc)
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie",
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id,
843e19887f64dde75055cf8842fc4db2171eff45johnlev &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev * create kstat for iostat(1M)
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((vdp->xdf_xdev_iostat = kstat_create("xdf", instance, NULL, "disk",
843e19887f64dde75055cf8842fc4db2171eff45johnlev KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) != NULL) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev * driver handles kernel-issued IOCTLs
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop",
843e19887f64dde75055cf8842fc4db2171eff45johnlev * create default device minor nodes: non-removable disk
843e19887f64dde75055cf8842fc4db2171eff45johnlev * we will adjust minor nodes after we are connected w/ backend
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, DDI_NT_BLOCK,
843e19887f64dde75055cf8842fc4db2171eff45johnlev CMLB_FAKE_LABEL_ONE_PARTITION, vdp->xdf_vd_lbl, NULL) != 0) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed",
843e19887f64dde75055cf8842fc4db2171eff45johnlev * We ship with cache-enabled disks
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* Watch backend XenbusState change */
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi));
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* we'll support backend running in domU later */
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL)
1d03c31e0733adea0edef54f0d5d2ea9639ecd2ajohnlev /* change status to stop further I/O requests */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* make sure no more I/O responses left in the ring buffer */
1d03c31e0733adea0edef54f0d5d2ea9639ecd2ajohnlev * no need to teardown the ring buffer here
1d03c31e0733adea0edef54f0d5d2ea9639ecd2ajohnlev * it will be simply re-init'ed during resume when
1d03c31e0733adea0edef54f0d5d2ea9639ecd2ajohnlev * we call xvdi_alloc_ring
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL)
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL)
843e19887f64dde75055cf8842fc4db2171eff45johnlev * wait for any outstanding I/O to complete
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance));
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_open(dev_t *devp, int flag, int otyp, cred_t *credp)
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL)
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor)));
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* do cv_wait until connected or failed */
a6e5dd18ce1ac2c94bbb09fbb01867dc6effa694cz if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* are we the first one to open this node? */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* force a re-validation */
843e19887f64dde75055cf8842fc4db2171eff45johnlev * check size
843e19887f64dde75055cf8842fc4db2171eff45johnlev * ignore CD/DVD which contains a zero-sized s0
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_close(dev_t dev, int flag, int otyp, struct cred *credp)
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL)
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) ||
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev * starting beyond partition
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64,
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* Legacy: don't set error flag at this case */
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev * adjust for partial transfer
843e19887f64dde75055cf8842fc4db2171eff45johnlev bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT;
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n",
843e19887f64dde75055cf8842fc4db2171eff45johnlev kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))))
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop));
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_write(dev_t dev, struct uio *uiop, cred_t *credp)
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))))
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop));
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp)
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))))
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop));
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp)
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))))
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop));
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))))
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n",
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst,
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64,
843e19887f64dde75055cf8842fc4db2171eff45johnlev kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL)
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* controller information */
843e19887f64dde75055cf8842fc4db2171eff45johnlev (void) strncpy((char *)(&info.dki_cname), "xdf", 8);
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* unit information */
843e19887f64dde75055cf8842fc4db2171eff45johnlev (void) strncpy((char *)(&info.dki_dname), "xdf", 8);
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (ddi_copyout(&info, (void *)arg, sizeof (info), mode))
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate),
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev * is media removable?
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode))
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp,
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev struct dk_callback *dkc = (struct dk_callback *)arg;
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* need to return 0 after calling callback */
843e19887f64dde75055cf8842fc4db2171eff45johnlev * xdf interrupt handler
843e19887f64dde75055cf8842fc4db2171eff45johnlev * complete all requests which have a response
843e19887f64dde75055cf8842fc4db2171eff45johnlev status = ddi_get16(acchdl, (uint16_t *)&resp->status);
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n",
843e19887f64dde75055cf8842fc4db2171eff45johnlev * XXPV - close connection to the backend and restart
843e19887f64dde75055cf8842fc4db2171eff45johnlevint xdf_fbrewrites; /* how many times was our flush block rewritten */
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Snarf new data if our flush block was re-written
843e19887f64dde75055cf8842fc4db2171eff45johnlev return; /* write was a flush write */
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
843e19887f64dde75055cf8842fc4db2171eff45johnlev * return value of xdf_prepare_rreq()
843e19887f64dde75055cf8842fc4db2171eff45johnlev * used in xdf_iostart()
843e19887f64dde75055cf8842fc4db2171eff45johnlev#define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */
843e19887f64dde75055cf8842fc4db2171eff45johnlev * populate the ring request(s)
843e19887f64dde75055cf8842fc4db2171eff45johnlev * loop until there is no buf to transfer or no free slot
843e19887f64dde75055cf8842fc4db2171eff45johnlev * available in I/O ring
a6e5dd18ce1ac2c94bbb09fbb01867dc6effa694cz for (;;) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* active buf queue empty? */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* try to grab a vreq for this bp */
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL))
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* alloc DMA/GTE resources */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* get next blkif_request in the ring */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* populate blkif_request with this buf */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* finish this bp, switch to next one */
843e19887f64dde75055cf8842fc4db2171eff45johnlev kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Send the request(s) to the backend
843e19887f64dde75055cf8842fc4db2171eff45johnlev "sent request(s) to backend\n"));
843e19887f64dde75055cf8842fc4db2171eff45johnlev * populate a single blkif_request_t w/ a buf
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
843e19887f64dde75055cf8842fc4db2171eff45johnlev ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs));
843e19887f64dde75055cf8842fc4db2171eff45johnlev * loop until all segments are populated or no more dma cookie in buf
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Each segment of a blkif request can transfer up to
843e19887f64dde75055cf8842fc4db2171eff45johnlev * one 4K page of data.
843e19887f64dde75055cf8842fc4db2171eff45johnlev blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr);
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n",
843e19887f64dde75055cf8842fc4db2171eff45johnlev gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT);
843e19887f64dde75055cf8842fc4db2171eff45johnlev ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect);
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* last win */
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n",
843e19887f64dde75055cf8842fc4db2171eff45johnlev#define XDF_POLLCNT 12 /* loop for 12 times before time out */
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND))
843e19887f64dde75055cf8842fc4db2171eff45johnlev * file-backed devices can be slow
843e19887f64dde75055cf8842fc4db2171eff45johnlev/* ARGSUSED5 */
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp,
843e19887f64dde75055cf8842fc4db2171eff45johnlev vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi));
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_xdev_nblocks)
843e19887f64dde75055cf8842fc4db2171eff45johnlev kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
843e19887f64dde75055cf8842fc4db2171eff45johnlev * synthetic geometry
843e19887f64dde75055cf8842fc4db2171eff45johnlev vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi));
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp)
843e19887f64dde75055cf8842fc4db2171eff45johnlev vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi));
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev * No real HBA, no geometry available from it
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp)
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep)
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi))))
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev/* ARGSUSED3 */
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (0);
843e19887f64dde75055cf8842fc4db2171eff45johnlev return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg));
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Kick-off connect process
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Status should be XD_UNKNOWN or XD_CLOSED
843e19887f64dde75055cf8842fc4db2171eff45johnlev * On success, status will be changed to XD_INIT
843e19887f64dde75055cf8842fc4db2171eff45johnlev * On error, status won't be changed
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1)
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel",
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) !=
843e19887f64dde75055cf8842fc4db2171eff45johnlev "failed to add intr handler", ddi_get_name_addr(dip));
843e19887f64dde75055cf8842fc4db2171eff45johnlev sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) !=
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring",
843e19887f64dde75055cf8842fc4db2171eff45johnlev vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Write into xenstore the info needed by backend
843e19887f64dde75055cf8842fc4db2171eff45johnlev "failed to get xenstore node path",
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: failed to start transaction",
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref",
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u",
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: failed to write event-channel",
843e19887f64dde75055cf8842fc4db2171eff45johnlev if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev "failed to switch state to XenbusStateInitialised",
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* kick-off connect process */
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: failed to end transaction",
843e19887f64dde75055cf8842fc4db2171eff45johnlev xvdi_fatal_error(dip, rv, "completing transaction");
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting",
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Kick-off disconnect process
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Status won't be changed
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting",
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Get a DEV_BSIZE aligned bufer
843e19887f64dde75055cf8842fc4db2171eff45johnlev vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP);
843e19887f64dde75055cf8842fc4db2171eff45johnlev (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE);
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block,
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Finish other initialization after we've connected to backend
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Status should be XD_INIT before calling this routine
843e19887f64dde75055cf8842fc4db2171eff45johnlev * On success, status should be changed to XD_READY
843e19887f64dde75055cf8842fc4db2171eff45johnlev * On error, status should stay XD_INIT
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Determine if feature barrier is supported by backend
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier",
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* probe backend */
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi),
843e19887f64dde75055cf8842fc4db2171eff45johnlev "cannot read backend info", ddi_get_name_addr(devi));
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* fix disk type */
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type",
843e19887f64dde75055cf8842fc4db2171eff45johnlev "cannot read device-type", ddi_get_name_addr(devi));
843e19887f64dde75055cf8842fc4db2171eff45johnlev * We've created all the minor nodes via cmlb_attach() using default
843e19887f64dde75055cf8842fc4db2171eff45johnlev * value in xdf_attach() to make it possbile to block in xdf_open(),
843e19887f64dde75055cf8842fc4db2171eff45johnlev * in case there's anyone (say, booting thread) ever trying to open
843e19887f64dde75055cf8842fc4db2171eff45johnlev * it before connected to backend. We will refresh all those minor
843e19887f64dde75055cf8842fc4db2171eff45johnlev * nodes w/ latest info we've got now when we are almost connected.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Don't do this when xdf is already opened by someone (could happen
843e19887f64dde75055cf8842fc4db2171eff45johnlev * during resume), for that cmlb_attach() will invalid the label info
843e19887f64dde75055cf8842fc4db2171eff45johnlev * and confuse those who has already opened the node, which is bad.
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* re-init cmlb w/ latest info we got from backend */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* mark vbd is ready for I/O */
843e19887f64dde75055cf8842fc4db2171eff45johnlev * If backend has feature-barrier, see if it supports disk
843e19887f64dde75055cf8842fc4db2171eff45johnlev * cache flush op.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Pretend we already know flush is supported so probe
843e19887f64dde75055cf8842fc4db2171eff45johnlev * will attempt the correct op.
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev * If the other end does not support the cache flush op
843e19887f64dde75055cf8842fc4db2171eff45johnlev * then we must use a barrier-write to force disk
843e19887f64dde75055cf8842fc4db2171eff45johnlev * cache flushing. Barrier writes require that a data
843e19887f64dde75055cf8842fc4db2171eff45johnlev * block actually be written.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Cache a block to barrier-write when we are
843e19887f64dde75055cf8842fc4db2171eff45johnlev * asked to perform a flush.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * XXX - would it be better to just copy 1 block
843e19887f64dde75055cf8842fc4db2171eff45johnlev * (512 bytes) from whatever write we did last
843e19887f64dde75055cf8842fc4db2171eff45johnlev * and rewrite that block?
843e19887f64dde75055cf8842fc4db2171eff45johnlev cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi),
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Finish other uninitialization after we've disconnected from backend
843e19887f64dde75055cf8842fc4db2171eff45johnlev * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED
843e19887f64dde75055cf8842fc4db2171eff45johnlev/*ARGSUSED*/
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data)
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n",
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev * backend recovered from a previous failure,
843e19887f64dde75055cf8842fc4db2171eff45johnlev * kick-off connect process again
843e19887f64dde75055cf8842fc4db2171eff45johnlev " failed to start reconnecting to backend",
843e19887f64dde75055cf8842fc4db2171eff45johnlev (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* finish final init after connect */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* first check if BE closed unexpectedly */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* notify anybody waiting for oe state change */
843e19887f64dde75055cf8842fc4db2171eff45johnlev } else if ((status == XD_CLOSED) && !unexpect_die) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* interface is closed successfully, remove all minor nodes */
843e19887f64dde75055cf8842fc4db2171eff45johnlev/* check if partition is open, -1 - check all partitions on the disk */
843e19887f64dde75055cf8842fc4db2171eff45johnlev for (i = 0; i < OTYPCNT; i++) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Xdf_check_state_transition will check the XenbusState change to see
843e19887f64dde75055cf8842fc4db2171eff45johnlev * if the change is a valid transition or not.
843e19887f64dde75055cf8842fc4db2171eff45johnlev * The new state is written by backend domain, or by running xenstore-write
843e19887f64dde75055cf8842fc4db2171eff45johnlev * to change it manually in dom0
843e19887f64dde75055cf8842fc4db2171eff45johnlevxdf_check_state_transition(xdf_t *vdp, XenbusState oestate)
843e19887f64dde75055cf8842fc4db2171eff45johnlev#define STBUG 2 /* unexpected state change, could be a bug */
843e19887f64dde75055cf8842fc4db2171eff45johnlev "state change to %d!, when status is %d",
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0)
843e19887f64dde75055cf8842fc4db2171eff45johnlev * callback func when DMA/GTE resources is available
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Note: we only register one callback function to grant table subsystem
843e19887f64dde75055cf8842fc4db2171eff45johnlev * since we only have one 'struct gnttab_free_callback' in xdf_t.
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* new timeout thread could be re-scheduled */
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Alloc a vreq for this bp
843e19887f64dde75055cf8842fc4db2171eff45johnlev * bp->av_back contains the pointer to the vreq upon return
843e19887f64dde75055cf8842fc4db2171eff45johnlev vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP);
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* restart I/O after one second */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* init of other fields in vreq is up to the caller */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /*FALLTHRU*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev /*FALLTHRU*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev /*FALLTHRU*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev * Initalize the DMA and grant table resources for the buf
843e19887f64dde75055cf8842fc4db2171eff45johnlev int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) |
843e19887f64dde75055cf8842fc4db2171eff45johnlev "xdf@%s: get ge_slotfailed\n",
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* See if we wrote new data to our flush block */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /*FALLTHRU*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev * alloc DMA handle
843e19887f64dde75055cf8842fc4db2171eff45johnlev rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr,
843e19887f64dde75055cf8842fc4db2171eff45johnlev DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n",
843e19887f64dde75055cf8842fc4db2171eff45johnlev /*FALLTHRU*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev * alloc dma handle for 512-byte aligned buf
843e19887f64dde75055cf8842fc4db2171eff45johnlev * XXPV: we need to temporarily enlarge the seg
843e19887f64dde75055cf8842fc4db2171eff45johnlev * boundary and s/g length to work round CR6381968
843e19887f64dde75055cf8842fc4db2171eff45johnlev "handle alloc failed\n",
843e19887f64dde75055cf8842fc4db2171eff45johnlev /*FALLTHRU*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev * alloc 512-byte aligned buf
843e19887f64dde75055cf8842fc4db2171eff45johnlev "xdf@%s: DMA mem allocation failed\n",
843e19887f64dde75055cf8842fc4db2171eff45johnlev /*FALLTHRU*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* get num of dma windows */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /*FALLTHRU*/
843e19887f64dde75055cf8842fc4db2171eff45johnlev * get ge_slot, callback is set upon failure from gs_get(),
843e19887f64dde75055cf8842fc4db2171eff45johnlev * if not set previously
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* nothing need to be done */
843e19887f64dde75055cf8842fc4db2171eff45johnlev * move to the next dma window
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* get a ge_slot for this DMA window */
843e19887f64dde75055cf8842fc4db2171eff45johnlev rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz,
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* try to alloc GTEs needed in this slot, first */
843e19887f64dde75055cf8842fc4db2171eff45johnlev (void (*)(void *))xdf_dmacallback,
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* restart I/O after one second */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* init gs_slot */
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* release all grant table entry resources used in this slot */
843e19887f64dde75055cf8842fc4db2171eff45johnlev grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead);
843e19887f64dde75055cf8842fc4db2171eff45johnlev ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST);
843e19887f64dde75055cf8842fc4db2171eff45johnlev gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread);
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* clean up I/Os in ring that have responses */
843e19887f64dde75055cf8842fc4db2171eff45johnlev if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) {
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* free up all grant table entries */
843e19887f64dde75055cf8842fc4db2171eff45johnlev * move bp back to active list orderly
843e19887f64dde75055cf8842fc4db2171eff45johnlev * vreq_busy is updated in vreq_free()
843e19887f64dde75055cf8842fc4db2171eff45johnlev /* move to the head of list */