strategy.c revision 2bc987325e3ded1865bff043128661815c4690b9
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Device Strategy
*/
#include <sys/dktp/cm.h>
#include <sys/kstat.h>
#include <sys/dktp/quetypes.h>
#include <sys/dktp/queue.h>
#include <sys/dktp/tgcom.h>
#include <sys/dktp/fctypes.h>
#include <sys/dktp/flowctrl.h>
#include <sys/param.h>
#include <vm/page.h>
#include <sys/modctl.h>
/*
* Object Management
*/
static struct buf *qmerge_nextbp(struct que_data *qfp, struct buf *bp_merge,
int *can_merge);
static struct modlmisc modlmisc = {
&mod_miscops, /* Type of module */
"Device Strategy Objects"
};
static struct modlinkage modlinkage = {
MODREV_1,
&modlmisc,
NULL
};
int
_init(void)
{
return (mod_install(&modlinkage));
}
int
_fini(void)
{
return (mod_remove(&modlinkage));
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
/*
* Common Flow Control functions
*/
/*
* Local static data
*/
#ifdef FLC_DEBUG
#define DENT 0x0001
#define DERR 0x0002
#define DIO 0x0004
static int flc_debug = DENT|DERR|DIO;
#include <sys/thread.h>
static int flc_malloc_intr = 0;
#endif /* FLC_DEBUG */
static int flc_kstat = 1;
static struct flc_obj *fc_create(struct flc_objops *fcopsp);
static int fc_init(opaque_t queuep, opaque_t tgcom_objp, opaque_t que_objp,
void *lkarg);
static int fc_free(struct flc_obj *flcobjp);
static int fc_start_kstat(opaque_t queuep, char *devtype, int instance);
static int fc_stop_kstat(opaque_t queuep);
static struct flc_obj *
fc_create(struct flc_objops *fcopsp)
{
struct flc_obj *flcobjp;
struct fc_data *fcdp;
flcobjp = kmem_zalloc((sizeof (*flcobjp) + sizeof (*fcdp)), KM_NOSLEEP);
if (!flcobjp)
return (NULL);
fcdp = (struct fc_data *)(flcobjp+1);
flcobjp->flc_data = (opaque_t)fcdp;
flcobjp->flc_ops = fcopsp;
return ((opaque_t)flcobjp);
}
static int dmult_maxcnt = DMULT_MAXCNT;
static int
fc_init(opaque_t queuep, opaque_t tgcom_objp, opaque_t que_objp, void *lkarg)
{
struct fc_data *fcdp = (struct fc_data *)queuep;
mutex_init(&fcdp->ds_mutex, NULL, MUTEX_DRIVER, lkarg);
fcdp->ds_queobjp = que_objp;
fcdp->ds_tgcomobjp = tgcom_objp;
fcdp->ds_waitcnt = dmult_maxcnt;
QUE_INIT(que_objp, lkarg);
TGCOM_INIT(tgcom_objp);
return (DDI_SUCCESS);
}
static int
fc_free(struct flc_obj *flcobjp)
{
struct fc_data *fcdp;
fcdp = (struct fc_data *)flcobjp->flc_data;
if (fcdp->ds_queobjp)
QUE_FREE(fcdp->ds_queobjp);
if (fcdp->ds_tgcomobjp) {
TGCOM_FREE(fcdp->ds_tgcomobjp);
mutex_destroy(&fcdp->ds_mutex);
}
kmem_free(flcobjp, (sizeof (*flcobjp) + sizeof (*fcdp)));
return (0);
}
/*ARGSUSED*/
static int
fc_start_kstat(opaque_t queuep, char *devtype, int instance)
{
struct fc_data *fcdp = (struct fc_data *)queuep;
if (!flc_kstat)
return (0);
if (!fcdp->ds_kstat) {
if (fcdp->ds_kstat = kstat_create("cmdk", instance, NULL,
"disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) {
kstat_install(fcdp->ds_kstat);
}
}
return (0);
}
static int
fc_stop_kstat(opaque_t queuep)
{
struct fc_data *fcdp = (struct fc_data *)queuep;
if (fcdp->ds_kstat) {
kstat_delete(fcdp->ds_kstat);
fcdp->ds_kstat = NULL;
}
return (0);
}
/*
* Single Command per Device
*/
/*
* Local Function Prototypes
*/
static int dsngl_restart();
static int dsngl_enque(opaque_t, struct buf *);
static int dsngl_deque(opaque_t, struct buf *);
struct flc_objops dsngl_ops = {
fc_init,
fc_free,
dsngl_enque,
dsngl_deque,
fc_start_kstat,
fc_stop_kstat,
0, 0
};
struct flc_obj *
dsngl_create()
{
return (fc_create((struct flc_objops *)&dsngl_ops));
}
static int
dsngl_enque(opaque_t queuep, struct buf *in_bp)
{
struct fc_data *dsnglp = (struct fc_data *)queuep;
opaque_t tgcom_objp;
opaque_t que_objp;
que_objp = dsnglp->ds_queobjp;
tgcom_objp = dsnglp->ds_tgcomobjp;
if (!in_bp)
return (0);
mutex_enter(&dsnglp->ds_mutex);
if (dsnglp->ds_bp || dsnglp->ds_outcnt) {
QUE_ADD(que_objp, in_bp);
if (dsnglp->ds_kstat) {
kstat_waitq_enter(KSTAT_IO_PTR(dsnglp->ds_kstat));
}
mutex_exit(&dsnglp->ds_mutex);
return (0);
}
if (dsnglp->ds_kstat) {
kstat_waitq_enter(KSTAT_IO_PTR(dsnglp->ds_kstat));
}
if (TGCOM_PKT(tgcom_objp, in_bp, dsngl_restart,
(caddr_t)dsnglp) != DDI_SUCCESS) {
dsnglp->ds_bp = in_bp;
mutex_exit(&dsnglp->ds_mutex);
return (0);
}
dsnglp->ds_outcnt++;
if (dsnglp->ds_kstat)
kstat_waitq_to_runq(KSTAT_IO_PTR(dsnglp->ds_kstat));
mutex_exit(&dsnglp->ds_mutex);
TGCOM_TRANSPORT(tgcom_objp, in_bp);
return (0);
}
static int
dsngl_deque(opaque_t queuep, struct buf *in_bp)
{
struct fc_data *dsnglp = (struct fc_data *)queuep;
opaque_t tgcom_objp;
opaque_t que_objp;
struct buf *bp;
que_objp = dsnglp->ds_queobjp;
tgcom_objp = dsnglp->ds_tgcomobjp;
mutex_enter(&dsnglp->ds_mutex);
if (in_bp) {
dsnglp->ds_outcnt--;
if (dsnglp->ds_kstat) {
if (in_bp->b_flags & B_READ) {
KSTAT_IO_PTR(dsnglp->ds_kstat)->reads++;
KSTAT_IO_PTR(dsnglp->ds_kstat)->nread +=
(in_bp->b_bcount - in_bp->b_resid);
} else {
KSTAT_IO_PTR(dsnglp->ds_kstat)->writes++;
KSTAT_IO_PTR(dsnglp->ds_kstat)->nwritten +=
(in_bp->b_bcount - in_bp->b_resid);
}
kstat_runq_exit(KSTAT_IO_PTR(dsnglp->ds_kstat));
}
}
for (;;) {
if (!dsnglp->ds_bp)
dsnglp->ds_bp = QUE_DEL(que_objp);
if (!dsnglp->ds_bp ||
(TGCOM_PKT(tgcom_objp, dsnglp->ds_bp, dsngl_restart,
(caddr_t)dsnglp) != DDI_SUCCESS) ||
dsnglp->ds_outcnt) {
mutex_exit(&dsnglp->ds_mutex);
return (0);
}
dsnglp->ds_outcnt++;
bp = dsnglp->ds_bp;
dsnglp->ds_bp = QUE_DEL(que_objp);
if (dsnglp->ds_kstat)
kstat_waitq_to_runq(KSTAT_IO_PTR(dsnglp->ds_kstat));
mutex_exit(&dsnglp->ds_mutex);
TGCOM_TRANSPORT(tgcom_objp, bp);
if (!mutex_tryenter(&dsnglp->ds_mutex))
return (0);
}
}
static int
dsngl_restart(struct fc_data *dsnglp)
{
(void) dsngl_deque(dsnglp, NULL);
return (-1);
}
/*
* Multiple Commands per Device
*/
/*
* Local Function Prototypes
*/
static int dmult_restart();
static int dmult_enque(opaque_t, struct buf *);
static int dmult_deque(opaque_t, struct buf *);
struct flc_objops dmult_ops = {
fc_init,
fc_free,
dmult_enque,
dmult_deque,
fc_start_kstat,
fc_stop_kstat,
0, 0
};
struct flc_obj *
dmult_create()
{
return (fc_create((struct flc_objops *)&dmult_ops));
}
/*
* Some of the object management functions QUE_ADD() and QUE_DEL()
* do not accquire lock.
* They depend on dmult_enque(), dmult_deque() to do all locking.
* If this changes we have to grab locks in qmerge_add() and qmerge_del().
*/
static int
dmult_enque(opaque_t queuep, struct buf *in_bp)
{
struct fc_data *dmultp = (struct fc_data *)queuep;
opaque_t tgcom_objp;
opaque_t que_objp;
que_objp = dmultp->ds_queobjp;
tgcom_objp = dmultp->ds_tgcomobjp;
if (!in_bp)
return (0);
mutex_enter(&dmultp->ds_mutex);
if ((dmultp->ds_outcnt >= dmultp->ds_waitcnt) || dmultp->ds_bp) {
QUE_ADD(que_objp, in_bp);
if (dmultp->ds_kstat) {
kstat_waitq_enter(KSTAT_IO_PTR(dmultp->ds_kstat));
}
mutex_exit(&dmultp->ds_mutex);
return (0);
}
if (dmultp->ds_kstat) {
kstat_waitq_enter(KSTAT_IO_PTR(dmultp->ds_kstat));
}
if (TGCOM_PKT(tgcom_objp, in_bp, dmult_restart,
(caddr_t)dmultp) != DDI_SUCCESS) {
dmultp->ds_bp = in_bp;
mutex_exit(&dmultp->ds_mutex);
return (0);
}
dmultp->ds_outcnt++;
if (dmultp->ds_kstat)
kstat_waitq_to_runq(KSTAT_IO_PTR(dmultp->ds_kstat));
mutex_exit(&dmultp->ds_mutex);
TGCOM_TRANSPORT(tgcom_objp, in_bp);
return (0);
}
static int
dmult_deque(opaque_t queuep, struct buf *in_bp)
{
struct fc_data *dmultp = (struct fc_data *)queuep;
opaque_t tgcom_objp;
opaque_t que_objp;
struct buf *bp;
que_objp = dmultp->ds_queobjp;
tgcom_objp = dmultp->ds_tgcomobjp;
mutex_enter(&dmultp->ds_mutex);
if (in_bp) {
dmultp->ds_outcnt--;
if (dmultp->ds_kstat) {
if (in_bp->b_flags & B_READ) {
KSTAT_IO_PTR(dmultp->ds_kstat)->reads++;
KSTAT_IO_PTR(dmultp->ds_kstat)->nread +=
(in_bp->b_bcount - in_bp->b_resid);
} else {
KSTAT_IO_PTR(dmultp->ds_kstat)->writes++;
KSTAT_IO_PTR(dmultp->ds_kstat)->nwritten +=
(in_bp->b_bcount - in_bp->b_resid);
}
kstat_runq_exit(KSTAT_IO_PTR(dmultp->ds_kstat));
}
}
for (;;) {
#ifdef FLC_DEBUG
if ((curthread->t_intr) && (!dmultp->ds_bp) &&
(!dmultp->ds_outcnt))
flc_malloc_intr++;
#endif
if (!dmultp->ds_bp)
dmultp->ds_bp = QUE_DEL(que_objp);
if (!dmultp->ds_bp ||
(TGCOM_PKT(tgcom_objp, dmultp->ds_bp, dmult_restart,
(caddr_t)dmultp) != DDI_SUCCESS) ||
(dmultp->ds_outcnt >= dmultp->ds_waitcnt)) {
mutex_exit(&dmultp->ds_mutex);
return (0);
}
dmultp->ds_outcnt++;
bp = dmultp->ds_bp;
dmultp->ds_bp = QUE_DEL(que_objp);
if (dmultp->ds_kstat)
kstat_waitq_to_runq(KSTAT_IO_PTR(dmultp->ds_kstat));
mutex_exit(&dmultp->ds_mutex);
TGCOM_TRANSPORT(tgcom_objp, bp);
if (!mutex_tryenter(&dmultp->ds_mutex))
return (0);
}
}
static int
dmult_restart(struct fc_data *dmultp)
{
(void) dmult_deque(dmultp, NULL);
return (-1);
}
/*
* Duplexed Commands per Device: Read Queue and Write Queue
*/
/*
* Local Function Prototypes
*/
static int duplx_restart();
static int duplx_init(opaque_t queuep, opaque_t tgcom_objp, opaque_t que_objp,
void *lkarg);
static int duplx_free(struct flc_obj *flcobjp);
static int duplx_enque(opaque_t queuep, struct buf *bp);
static int duplx_deque(opaque_t queuep, struct buf *bp);
struct flc_objops duplx_ops = {
duplx_init,
duplx_free,
duplx_enque,
duplx_deque,
fc_start_kstat,
fc_stop_kstat,
0, 0
};
struct flc_obj *
duplx_create()
{
struct flc_obj *flcobjp;
struct duplx_data *fcdp;
flcobjp = kmem_zalloc((sizeof (*flcobjp) + sizeof (*fcdp)), KM_NOSLEEP);
if (!flcobjp)
return (NULL);
fcdp = (struct duplx_data *)(flcobjp+1);
flcobjp->flc_data = (opaque_t)fcdp;
flcobjp->flc_ops = &duplx_ops;
fcdp->ds_writeq.fc_qobjp = qfifo_create();
if (!(fcdp->ds_writeq.fc_qobjp = qfifo_create())) {
kmem_free(flcobjp, (sizeof (*flcobjp) + sizeof (*fcdp)));
return (NULL);
}
return (flcobjp);
}
static int
duplx_free(struct flc_obj *flcobjp)
{
struct duplx_data *fcdp;
fcdp = (struct duplx_data *)flcobjp->flc_data;
if (fcdp->ds_writeq.fc_qobjp) {
QUE_FREE(fcdp->ds_writeq.fc_qobjp);
}
if (fcdp->ds_readq.fc_qobjp)
QUE_FREE(fcdp->ds_readq.fc_qobjp);
if (fcdp->ds_tgcomobjp) {
TGCOM_FREE(fcdp->ds_tgcomobjp);
mutex_destroy(&fcdp->ds_mutex);
}
kmem_free(flcobjp, (sizeof (*flcobjp) + sizeof (*fcdp)));
return (0);
}
static int
duplx_init(opaque_t queuep, opaque_t tgcom_objp, opaque_t que_objp, void *lkarg)
{
struct duplx_data *fcdp = (struct duplx_data *)queuep;
fcdp->ds_tgcomobjp = tgcom_objp;
fcdp->ds_readq.fc_qobjp = que_objp;
QUE_INIT(que_objp, lkarg);
QUE_INIT(fcdp->ds_writeq.fc_qobjp, lkarg);
TGCOM_INIT(tgcom_objp);
mutex_init(&fcdp->ds_mutex, NULL, MUTEX_DRIVER, lkarg);
fcdp->ds_writeq.fc_maxcnt = DUPLX_MAXCNT;
fcdp->ds_readq.fc_maxcnt = DUPLX_MAXCNT;
/* queues point to each other for round robin */
fcdp->ds_readq.next = &fcdp->ds_writeq;
fcdp->ds_writeq.next = &fcdp->ds_readq;
return (DDI_SUCCESS);
}
static int
duplx_enque(opaque_t queuep, struct buf *in_bp)
{
struct duplx_data *duplxp = (struct duplx_data *)queuep;
opaque_t tgcom_objp;
struct fc_que *activeq;
struct buf *bp;
mutex_enter(&duplxp->ds_mutex);
if (in_bp) {
if (duplxp->ds_kstat) {
kstat_waitq_enter(KSTAT_IO_PTR(duplxp->ds_kstat));
}
if (in_bp->b_flags & B_READ)
activeq = &duplxp->ds_readq;
else
activeq = &duplxp->ds_writeq;
QUE_ADD(activeq->fc_qobjp, in_bp);
} else {
activeq = &duplxp->ds_readq;
}
tgcom_objp = duplxp->ds_tgcomobjp;
for (;;) {
if (!activeq->fc_bp)
activeq->fc_bp = QUE_DEL(activeq->fc_qobjp);
if (!activeq->fc_bp ||
(TGCOM_PKT(tgcom_objp, activeq->fc_bp, duplx_restart,
(caddr_t)duplxp) != DDI_SUCCESS) ||
(activeq->fc_outcnt >= activeq->fc_maxcnt)) {
/* switch read/write queues */
activeq = activeq->next;
if (!activeq->fc_bp)
activeq->fc_bp = QUE_DEL(activeq->fc_qobjp);
if (!activeq->fc_bp ||
(TGCOM_PKT(tgcom_objp, activeq->fc_bp,
duplx_restart, (caddr_t)duplxp) != DDI_SUCCESS) ||
(activeq->fc_outcnt >= activeq->fc_maxcnt)) {
mutex_exit(&duplxp->ds_mutex);
return (0);
}
}
activeq->fc_outcnt++;
bp = activeq->fc_bp;
activeq->fc_bp = NULL;
if (duplxp->ds_kstat)
kstat_waitq_to_runq(KSTAT_IO_PTR(duplxp->ds_kstat));
mutex_exit(&duplxp->ds_mutex);
TGCOM_TRANSPORT(tgcom_objp, bp);
if (!mutex_tryenter(&duplxp->ds_mutex))
return (0);
activeq = activeq->next;
}
}
static int
duplx_deque(opaque_t queuep, struct buf *in_bp)
{
struct duplx_data *duplxp = (struct duplx_data *)queuep;
opaque_t tgcom_objp;
struct fc_que *activeq;
struct buf *bp;
mutex_enter(&duplxp->ds_mutex);
tgcom_objp = duplxp->ds_tgcomobjp;
if (in_bp->b_flags & B_READ)
activeq = &duplxp->ds_readq;
else
activeq = &duplxp->ds_writeq;
activeq->fc_outcnt--;
if (duplxp->ds_kstat) {
if (in_bp->b_flags & B_READ) {
KSTAT_IO_PTR(duplxp->ds_kstat)->reads++;
KSTAT_IO_PTR(duplxp->ds_kstat)->nread +=
(in_bp->b_bcount - in_bp->b_resid);
} else {
KSTAT_IO_PTR(duplxp->ds_kstat)->writes++;
KSTAT_IO_PTR(duplxp->ds_kstat)->nwritten +=
(in_bp->b_bcount - in_bp->b_resid);
}
kstat_runq_exit(KSTAT_IO_PTR(duplxp->ds_kstat));
}
for (;;) {
/* if needed, try to pull request off a queue */
if (!activeq->fc_bp)
activeq->fc_bp = QUE_DEL(activeq->fc_qobjp);
if (!activeq->fc_bp ||
(TGCOM_PKT(tgcom_objp, activeq->fc_bp, duplx_restart,
(caddr_t)duplxp) != DDI_SUCCESS) ||
(activeq->fc_outcnt >= activeq->fc_maxcnt)) {
activeq = activeq->next;
if (!activeq->fc_bp)
activeq->fc_bp = QUE_DEL(activeq->fc_qobjp);
if (!activeq->fc_bp ||
(TGCOM_PKT(tgcom_objp, activeq->fc_bp,
duplx_restart, (caddr_t)duplxp) != DDI_SUCCESS) ||
(activeq->fc_outcnt >= activeq->fc_maxcnt)) {
mutex_exit(&duplxp->ds_mutex);
return (0);
}
}
activeq->fc_outcnt++;
bp = activeq->fc_bp;
activeq->fc_bp = NULL;
if (duplxp->ds_kstat)
kstat_waitq_to_runq(KSTAT_IO_PTR(duplxp->ds_kstat));
mutex_exit(&duplxp->ds_mutex);
TGCOM_TRANSPORT(tgcom_objp, bp);
if (!mutex_tryenter(&duplxp->ds_mutex))
return (0);
activeq = activeq->next;
}
}
static int
duplx_restart(struct duplx_data *duplxp)
{
(void) duplx_enque(duplxp, NULL);
return (-1);
}
/*
* Tagged queueing flow control
*/
/*
* Local Function Prototypes
*/
struct flc_objops adapt_ops = {
fc_init,
fc_free,
dmult_enque,
dmult_deque,
fc_start_kstat,
fc_stop_kstat,
0, 0
};
struct flc_obj *
adapt_create()
{
return (fc_create((struct flc_objops *)&adapt_ops));
}
/*
* Common Queue functions
*/
/*
* Local static data
*/
#ifdef Q_DEBUG
#define DENT 0x0001
#define DERR 0x0002
#define DIO 0x0004
static int que_debug = DENT|DERR|DIO;
#endif /* Q_DEBUG */
/*
* Local Function Prototypes
*/
static struct que_obj *que_create(struct que_objops *qopsp);
static int que_init(struct que_data *qfp, void *lkarg);
static int que_free(struct que_obj *queobjp);
static struct buf *que_del(struct que_data *qfp);
static struct que_obj *
que_create(struct que_objops *qopsp)
{
struct que_data *qfp;
struct que_obj *queobjp;
queobjp = kmem_zalloc((sizeof (*queobjp) + sizeof (*qfp)), KM_NOSLEEP);
if (!queobjp)
return (NULL);
queobjp->que_ops = qopsp;
qfp = (struct que_data *)(queobjp+1);
queobjp->que_data = (opaque_t)qfp;
return ((opaque_t)queobjp);
}
static int
que_init(struct que_data *qfp, void *lkarg)
{
mutex_init(&qfp->q_mutex, NULL, MUTEX_DRIVER, lkarg);
return (DDI_SUCCESS);
}
static int
que_free(struct que_obj *queobjp)
{
struct que_data *qfp;
qfp = (struct que_data *)queobjp->que_data;
mutex_destroy(&qfp->q_mutex);
kmem_free(queobjp, (sizeof (*queobjp) + sizeof (struct que_data)));
return (0);
}
static struct buf *
que_del(struct que_data *qfp)
{
struct buf *bp;
bp = qfp->q_tab.b_actf;
if (bp) {
qfp->q_tab.b_actf = bp->av_forw;
if (!qfp->q_tab.b_actf)
qfp->q_tab.b_actl = NULL;
bp->av_forw = 0;
}
return (bp);
}
/*
* Qmerge
* Local Function Prototypes
*/
static int qmerge_add(), qmerge_free();
static struct buf *qmerge_del(struct que_data *qfp);
struct que_objops qmerge_ops = {
que_init,
qmerge_free,
qmerge_add,
qmerge_del,
0, 0
};
/* fields in diskhd */
#define hd_cnt b_back
#define hd_private b_forw
#define hd_flags b_flags
#define hd_sync_next av_forw
#define hd_async_next av_back
#define hd_sync2async sync_async_ratio
#define QNEAR_FORWARD 0x01
#define QNEAR_BACKWARD 0x02
#define QNEAR_ASYNCONLY 0x04
#define QNEAR_ASYNCALSO 0x08
#define DBLK(bp) ((unsigned long)(bp)->b_private)
#define BP_LT_BP(a, b) (DBLK(a) < DBLK(b))
#define BP_GT_BP(a, b) (DBLK(a) > DBLK(b))
#define BP_LT_HD(a, b) (DBLK(a) < (unsigned long)((b)->hd_private))
#define BP_GT_HD(a, b) (DBLK(a) > (unsigned long)((b)->hd_private))
#define QNEAR_ASYNC (QNEAR_ASYNCONLY|QNEAR_ASYNCALSO)
#define SYNC2ASYNC(a) ((a)->q_tab.hd_cnt)
/*
* qmerge implements a two priority queue, the low priority queue holding ASYNC
* write requests, while the rest are queued in the high priority sync queue.
* Requests on the async queue would be merged if possible.
* By default qmerge2wayscan is 1, indicating an elevator algorithm. When
* this variable is set to zero, it has the following side effects.
* 1. We assume fairness is the number one issue.
* 2. The next request to be picked indicates current head position.
*
* qmerge_sync2async indicates the ratio of scans of high prioriy
* sync queue to low priority async queue.
*
* When qmerge variables have the following values it defaults to qsort
*
* qmerge1pri = 1, qmerge2wayscan = 0, qmerge_max_merge = 0
*
*/
static int qmerge_max_merge = 128 * 1024;
static intptr_t qmerge_sync2async = 4;
static int qmerge2wayscan = 1;
static int qmerge1pri = 0;
static int qmerge_merge = 0;
/*
* Local static data
*/
struct que_obj *
qmerge_create()
{
struct que_data *qfp;
struct que_obj *queobjp;
queobjp = kmem_zalloc((sizeof (*queobjp) + sizeof (*qfp)), KM_NOSLEEP);
if (!queobjp)
return (NULL);
queobjp->que_ops = &qmerge_ops;
qfp = (struct que_data *)(queobjp+1);
qfp->q_tab.hd_private = 0;
qfp->q_tab.hd_sync_next = qfp->q_tab.hd_async_next = NULL;
qfp->q_tab.hd_cnt = (void *)qmerge_sync2async;
queobjp->que_data = (opaque_t)qfp;
return ((opaque_t)queobjp);
}
static int
qmerge_free(struct que_obj *queobjp)
{
struct que_data *qfp;
qfp = (struct que_data *)queobjp->que_data;
mutex_destroy(&qfp->q_mutex);
kmem_free(queobjp, (sizeof (*queobjp) + sizeof (*qfp)));
return (0);
}
static int
qmerge_can_merge(bp1, bp2)
struct buf *bp1, *bp2;
{
const int paw_flags = B_PAGEIO | B_ASYNC | B_WRITE;
if ((bp1->b_un.b_addr != 0) || (bp2->b_un.b_addr != 0) ||
((bp1->b_flags & (paw_flags | B_REMAPPED)) != paw_flags) ||
((bp2->b_flags & (paw_flags | B_REMAPPED)) != paw_flags) ||
(bp1->b_bcount & PAGEOFFSET) || (bp2->b_bcount & PAGEOFFSET) ||
(bp1->b_bcount + bp2->b_bcount > qmerge_max_merge))
return (0);
if ((DBLK(bp2) + bp2->b_bcount / DEV_BSIZE == DBLK(bp1)) ||
(DBLK(bp1) + bp1->b_bcount / DEV_BSIZE == DBLK(bp2)))
return (1);
else
return (0);
}
static void
qmerge_mergesetup(bp_merge, bp)
struct buf *bp_merge, *bp;
{
struct buf *bp1;
struct page *pp, *pp_merge, *pp_merge_prev;
int forward;
qmerge_merge++;
forward = DBLK(bp_merge) < DBLK(bp);
bp_merge->b_bcount += bp->b_bcount;
pp = bp->b_pages;
pp_merge = bp_merge->b_pages;
pp_merge_prev = pp_merge->p_prev;
pp_merge->p_prev->p_next = pp;
pp_merge->p_prev = pp->p_prev;
pp->p_prev->p_next = pp_merge;
pp->p_prev = pp_merge_prev;
bp1 = bp_merge->b_forw;
bp1->av_back->av_forw = bp;
bp->av_back = bp1->av_back;
bp1->av_back = bp;
bp->av_forw = bp1;
if (!forward) {
bp_merge->b_forw = bp;
bp_merge->b_pages = pp;
bp_merge->b_private = bp->b_private;
}
}
static void
que_insert(struct que_data *qfp, struct buf *bp)
{
struct buf *bp1, *bp_start, *lowest_bp, *highest_bp;
uintptr_t highest_blk, lowest_blk;
struct buf **async_bpp, **sync_bpp, **bpp;
struct diskhd *dp = &qfp->q_tab;
sync_bpp = &dp->hd_sync_next;
async_bpp = &dp->hd_async_next;
/*
* The ioctl used by the format utility requires that bp->av_back be
* preserved.
*/
if (bp->av_back)
bp->b_error = (intptr_t)bp->av_back;
if (!qmerge1pri &&
((bp->b_flags & (B_ASYNC|B_READ|B_FREE)) == B_ASYNC)) {
bpp = &dp->hd_async_next;
} else {
bpp = &dp->hd_sync_next;
}
if ((bp1 = *bpp) == NULL) {
*bpp = bp;
bp->av_forw = bp->av_back = bp;
if ((bpp == async_bpp) && (*sync_bpp == NULL)) {
dp->hd_flags |= QNEAR_ASYNCONLY;
} else if (bpp == sync_bpp) {
dp->hd_flags &= ~QNEAR_ASYNCONLY;
if (*async_bpp) {
dp->hd_flags |= QNEAR_ASYNCALSO;
}
}
return;
}
bp_start = bp1;
if (DBLK(bp) < DBLK(bp1)) {
lowest_blk = DBLK(bp1);
lowest_bp = bp1;
do {
if (DBLK(bp) > DBLK(bp1)) {
bp->av_forw = bp1->av_forw;
bp1->av_forw->av_back = bp;
bp1->av_forw = bp;
bp->av_back = bp1;
if (((bpp == async_bpp) &&
(dp->hd_flags & QNEAR_ASYNC)) ||
(bpp == sync_bpp)) {
if (!(dp->hd_flags & QNEAR_BACKWARD) &&
BP_GT_HD(bp, dp)) {
*bpp = bp;
}
}
return;
} else if (DBLK(bp1) < lowest_blk) {
lowest_bp = bp1;
lowest_blk = DBLK(bp1);
}
} while ((DBLK(bp1->av_back) < DBLK(bp1)) &&
((bp1 = bp1->av_back) != bp_start));
bp->av_forw = lowest_bp;
lowest_bp->av_back->av_forw = bp;
bp->av_back = lowest_bp->av_back;
lowest_bp->av_back = bp;
if ((bpp == async_bpp) && !(dp->hd_flags & QNEAR_ASYNC)) {
*bpp = bp;
} else if (!(dp->hd_flags & QNEAR_BACKWARD) &&
BP_GT_HD(bp, dp)) {
*bpp = bp;
}
} else {
highest_blk = DBLK(bp1);
highest_bp = bp1;
do {
if (DBLK(bp) < DBLK(bp1)) {
bp->av_forw = bp1;
bp1->av_back->av_forw = bp;
bp->av_back = bp1->av_back;
bp1->av_back = bp;
if (((bpp == async_bpp) &&
(dp->hd_flags & QNEAR_ASYNC)) ||
(bpp == sync_bpp)) {
if ((dp->hd_flags & QNEAR_BACKWARD) &&
BP_LT_HD(bp, dp)) {
*bpp = bp;
}
}
return;
} else if (DBLK(bp1) > highest_blk) {
highest_bp = bp1;
highest_blk = DBLK(bp1);
}
} while ((DBLK(bp1->av_forw) > DBLK(bp1)) &&
((bp1 = bp1->av_forw) != bp_start));
bp->av_back = highest_bp;
highest_bp->av_forw->av_back = bp;
bp->av_forw = highest_bp->av_forw;
highest_bp->av_forw = bp;
if (((bpp == sync_bpp) ||
((bpp == async_bpp) && (dp->hd_flags & QNEAR_ASYNC))) &&
(dp->hd_flags & QNEAR_BACKWARD) && (BP_LT_HD(bp, dp)))
*bpp = bp;
}
}
/*
* dmult_enque() holds dmultp->ds_mutex lock, so we dont grab
* lock here. If dmult_enque() changes we will have to visit
* this function again
*/
static int
qmerge_add(struct que_data *qfp, struct buf *bp)
{
que_insert(qfp, bp);
return (++qfp->q_cnt);
}
static int
qmerge_iodone(struct buf *bp)
{
struct buf *bp1;
struct page *pp, *pp1, *tmp_pp;
if (bp->b_flags & B_REMAPPED)
bp_mapout(bp);
bp1 = bp->b_forw;
do {
bp->b_forw = bp1->av_forw;
bp1->av_forw->av_back = bp1->av_back;
bp1->av_back->av_forw = bp1->av_forw;
pp = (page_t *)bp1->b_pages;
pp1 = bp->b_forw->b_pages;
tmp_pp = pp->p_prev;
pp->p_prev = pp1->p_prev;
pp->p_prev->p_next = pp;
pp1->p_prev = tmp_pp;
pp1->p_prev->p_next = pp1;
if (bp->b_flags & B_ERROR) {
bp1->b_error = bp->b_error;
bp1->b_flags |= B_ERROR;
}
biodone(bp1);
} while ((bp1 = bp->b_forw) != bp->b_forw->av_forw);
biodone(bp1);
kmem_free(bp, sizeof (*bp));
return (0);
}
static struct buf *
qmerge_nextbp(struct que_data *qfp, struct buf *bp_merge, int *can_merge)
{
intptr_t private, cnt;
int flags;
struct buf *sync_bp, *async_bp, *bp;
struct buf **sync_bpp, **async_bpp, **bpp;
struct diskhd *dp = &qfp->q_tab;
if (qfp->q_cnt == 0) {
return (NULL);
}
flags = qfp->q_tab.hd_flags;
sync_bpp = &qfp->q_tab.hd_sync_next;
async_bpp = &qfp->q_tab.hd_async_next;
begin_nextbp:
if (flags & QNEAR_ASYNCONLY) {
bp = *async_bpp;
private = DBLK(bp);
if (bp_merge && !qmerge_can_merge(bp, bp_merge)) {
return (NULL);
} else if (bp->av_forw == bp) {
bp->av_forw = bp->av_back = NULL;
flags &= ~(QNEAR_ASYNCONLY | QNEAR_BACKWARD);
private = 0;
} else if (flags & QNEAR_BACKWARD) {
if (DBLK(bp) < DBLK(bp->av_back)) {
flags &= ~QNEAR_BACKWARD;
private = 0;
}
} else if (DBLK(bp) > DBLK(bp->av_forw)) {
if (qmerge2wayscan) {
flags |= QNEAR_BACKWARD;
} else {
private = 0;
}
} else if (qmerge2wayscan == 0) {
private = DBLK(bp->av_forw);
}
bpp = async_bpp;
} else if (flags & QNEAR_ASYNCALSO) {
sync_bp = *sync_bpp;
async_bp = *async_bpp;
if (flags & QNEAR_BACKWARD) {
if (BP_GT_HD(sync_bp, dp) && BP_GT_HD(async_bp, dp)) {
flags &= ~(QNEAR_BACKWARD|QNEAR_ASYNCALSO);
*sync_bpp = sync_bp->av_forw;
*async_bpp = async_bp->av_forw;
SYNC2ASYNC(qfp) = (void *)qmerge_sync2async;
qfp->q_tab.hd_private = 0;
goto begin_nextbp;
}
if (BP_LT_HD(async_bp, dp) && BP_LT_HD(sync_bp, dp)) {
if (BP_GT_BP(async_bp, sync_bp)) {
bpp = async_bpp;
bp = *async_bpp;
} else {
bpp = sync_bpp;
bp = *sync_bpp;
}
} else if (BP_LT_HD(async_bp, dp)) {
bpp = async_bpp;
bp = *async_bpp;
} else {
bpp = sync_bpp;
bp = *sync_bpp;
}
} else {
if (BP_LT_HD(sync_bp, dp) && BP_LT_HD(async_bp, dp)) {
if (qmerge2wayscan) {
flags |= QNEAR_BACKWARD;
*sync_bpp = sync_bp->av_back;
*async_bpp = async_bp->av_back;
goto begin_nextbp;
} else {
flags &= ~QNEAR_ASYNCALSO;
SYNC2ASYNC(qfp) =
(void *)qmerge_sync2async;
qfp->q_tab.hd_private = 0;
goto begin_nextbp;
}
}
if (BP_GT_HD(async_bp, dp) && BP_GT_HD(sync_bp, dp)) {
if (BP_LT_BP(async_bp, sync_bp)) {
bpp = async_bpp;
bp = *async_bpp;
} else {
bpp = sync_bpp;
bp = *sync_bpp;
}
} else if (BP_GT_HD(async_bp, dp)) {
bpp = async_bpp;
bp = *async_bpp;
} else {
bpp = sync_bpp;
bp = *sync_bpp;
}
}
if (bp_merge && !qmerge_can_merge(bp, bp_merge)) {
return (NULL);
} else if (bp->av_forw == bp) {
bp->av_forw = bp->av_back = NULL;
flags &= ~QNEAR_ASYNCALSO;
if (bpp == async_bpp) {
SYNC2ASYNC(qfp) = (void *)qmerge_sync2async;
} else {
flags |= QNEAR_ASYNCONLY;
}
}
private = DBLK(bp);
} else {
bp = *sync_bpp;
private = DBLK(bp);
if (bp_merge && !qmerge_can_merge(bp, bp_merge)) {
return (NULL);
} else if (bp->av_forw == bp) {
private = 0;
SYNC2ASYNC(qfp) = (void *)qmerge_sync2async;
bp->av_forw = bp->av_back = NULL;
flags &= ~QNEAR_BACKWARD;
if (*async_bpp)
flags |= QNEAR_ASYNCONLY;
} else if (flags & QNEAR_BACKWARD) {
if (DBLK(bp) < DBLK(bp->av_back)) {
flags &= ~QNEAR_BACKWARD;
cnt = (intptr_t)SYNC2ASYNC(qfp);
if (cnt > 0) {
cnt--;
SYNC2ASYNC(qfp) = (void *)cnt;
} else {
if (*async_bpp)
flags |= QNEAR_ASYNCALSO;
SYNC2ASYNC(qfp) =
(void *)qmerge_sync2async;
}
private = 0;
}
} else if (DBLK(bp) > DBLK(bp->av_forw)) {
private = 0;
if (qmerge2wayscan) {
flags |= QNEAR_BACKWARD;
private = DBLK(bp);
} else {
cnt = (intptr_t)SYNC2ASYNC(qfp);
if (cnt > 0) {
cnt--;
SYNC2ASYNC(qfp) = (void *)cnt;
} else {
if (*async_bpp)
flags |= QNEAR_ASYNCALSO;
SYNC2ASYNC(qfp) =
(void *)qmerge_sync2async;
}
}
} else if (qmerge2wayscan == 0) {
private = DBLK(bp->av_forw);
}
bpp = sync_bpp;
}
if (bp->av_forw) {
*can_merge = !(bp->b_flags & B_READ);
if (flags & QNEAR_BACKWARD) {
*bpp = bp->av_back;
if ((DBLK(bp->av_back) +
bp->av_back->b_bcount / DEV_BSIZE) != DBLK(bp))
*can_merge = 0;
} else {
*bpp = bp->av_forw;
if ((DBLK(bp) + bp->b_bcount / DEV_BSIZE) !=
DBLK(bp->av_forw))
*can_merge = 0;
}
bp->av_forw->av_back = bp->av_back;
bp->av_back->av_forw = bp->av_forw;
bp->av_forw = bp->av_back = NULL;
} else {
*bpp = NULL;
*can_merge = 0;
}
qfp->q_tab.hd_private = (void *)private;
qfp->q_cnt--;
qfp->q_tab.hd_flags = flags;
if (bp->b_error) {
bp->av_back = (void *)(intptr_t)bp->b_error;
bp->b_error = 0;
}
return (bp);
}
static struct buf *
qmerge_del(struct que_data *qfp)
{
struct buf *bp, *next_bp, *bp_merge;
int alloc_mergebp, merge;
if (qfp->q_cnt == 0) {
return (NULL);
}
bp_merge = bp = qmerge_nextbp(qfp, NULL, &merge);
alloc_mergebp = 1;
while (merge && (next_bp = qmerge_nextbp(qfp, bp_merge, &merge))) {
if (alloc_mergebp) {
bp_merge = kmem_alloc(sizeof (*bp_merge), KM_NOSLEEP);
if (bp_merge == NULL) {
mutex_exit(&qfp->q_mutex);
return (bp);
}
bcopy(bp, bp_merge, sizeof (*bp_merge));
bp_merge->b_iodone = qmerge_iodone;
bp_merge->b_forw = bp;
bp_merge->b_back = (struct buf *)qfp;
bp->av_forw = bp->av_back = bp;
alloc_mergebp = 0;
}
qmerge_mergesetup(bp_merge, next_bp);
}
return (bp_merge);
}
/*
* FIFO Queue functions
*/
/*
* Local Function Prototypes
*/
static int qfifo_add();
struct que_objops qfifo_ops = {
que_init,
que_free,
qfifo_add,
que_del,
0, 0
};
/*
* Local static data
*/
struct que_obj *
qfifo_create()
{
return (que_create((struct que_objops *)&qfifo_ops));
}
static int
qfifo_add(struct que_data *qfp, struct buf *bp)
{
if (!qfp->q_tab.b_actf)
qfp->q_tab.b_actf = bp;
else
qfp->q_tab.b_actl->av_forw = bp;
qfp->q_tab.b_actl = bp;
bp->av_forw = NULL;
return (0);
}
/*
* One-Way-Scan Queue functions
*/
/*
* Local Function Prototypes
*/
static int qsort_add();
static struct buf *qsort_del();
static void oneway_scan_binary(struct diskhd *dp, struct buf *bp);
struct que_objops qsort_ops = {
que_init,
que_free,
qsort_add,
qsort_del,
0, 0
};
/*
* Local static data
*/
struct que_obj *
qsort_create()
{
return (que_create((struct que_objops *)&qsort_ops));
}
static int
qsort_add(struct que_data *qfp, struct buf *bp)
{
qfp->q_cnt++;
oneway_scan_binary(&qfp->q_tab, bp);
return (0);
}
#define b_pasf b_forw
#define b_pasl b_back
static void
oneway_scan_binary(struct diskhd *dp, struct buf *bp)
{
struct buf *ap;
ap = dp->b_actf;
if (ap == NULL) {
dp->b_actf = bp;
bp->av_forw = NULL;
return;
}
if (DBLK(bp) < DBLK(ap)) {
ap = dp->b_pasf;
if ((ap == NULL) || (DBLK(bp) < DBLK(ap))) {
dp->b_pasf = bp;
bp->av_forw = ap;
return;
}
}
while (ap->av_forw) {
if (DBLK(bp) < DBLK(ap->av_forw))
break;
ap = ap->av_forw;
}
bp->av_forw = ap->av_forw;
ap->av_forw = bp;
}
static struct buf *
qsort_del(struct que_data *qfp)
{
struct buf *bp;
if (qfp->q_cnt == 0) {
return (NULL);
}
qfp->q_cnt--;
bp = qfp->q_tab.b_actf;
qfp->q_tab.b_actf = bp->av_forw;
bp->av_forw = 0;
if (!qfp->q_tab.b_actf && qfp->q_tab.b_pasf) {
qfp->q_tab.b_actf = qfp->q_tab.b_pasf;
qfp->q_tab.b_pasf = NULL;
}
return (bp);
}
/*
* Tagged queueing
*/
/*
* Local Function Prototypes
*/
struct que_objops qtag_ops = {
que_init,
que_free,
qsort_add,
qsort_del,
0, 0
};
/*
* Local static data
*/
struct que_obj *
qtag_create()
{
return (que_create((struct que_objops *)&qtag_ops));
}