common/io/logindmux.c

	logindmux.c revision 193974072f41a843678abf5f61979c748687e66b
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


/*
 * Description: logindmux.c
 *
 * The logindmux driver is used with login modules (like telmod/rlmod).
 * This is a 1x1 cloning mux and two of these muxes are used. The lower link
 * of one of the muxes receives input from net and the lower link of the
 * other mux receives input from pseudo terminal subsystem.
 *
 * The logdmux_qexch_lock mutex manages the race between LOGDMX_IOC_QEXCHANGE,
 * logdmuxunlink() and logdmuxclose(), so that the instance selected as a peer
 * in LOGDMX_IOC_QEXCHANGE cannot be unlinked or closed until the qexchange
 * is complete; see the inline comments in the code for details.
 *
 * The logdmux_peerq_lock mutex manages the race between logdmuxlwsrv() and
 * logdmuxlrput() (when null'ing tmxp->peerq during LOGDMUX_UNLINK_REQ
 * processing).
 *
 * The logdmux_minor_lock mutex serializes the growth of logdmux_minor_arena
 * (the arena is grown gradually rather than allocated all at once so that
 * minor numbers are recycled sooner; for simplicity it is never shrunk).
 *
 * The unlink operation is implemented using protocol messages that flow
 * between the two logindmux peer instances. The instance processing the
 * I_UNLINK ioctl will send a LOGDMUX_UNLINK_REQ protocol message to its
 * peer to indicate that it wishes to unlink; the peer will process this
 * message in its lrput, null its tmxp->peerq and then send a
 * LOGDMUX_UNLINK_RESP protocol message in reply to indicate that the
 * unlink can proceed; having received the reply in its lrput, the
 * instance processing the I_UNLINK can then continue. To ensure that only
 * one of the peer instances will be actively processing an I_UNLINK at
 * any one time, a single structure (an unlinkinfo_t containing a mutex,
 * state variable and pointer to an M_CTL mblk) is allocated during
 * the processing of the LOGDMX_IOC_QEXCHANGE ioctl. The two instances, if
 * trying to unlink simultaneously, will race to get control of this
 * structure which contains the resources necessary to process the
 * I_UNLINK. The instance that wins this race will be able to continue
 * with the unlink whilst the other instance will be obliged to wait.
 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/debug.h>
#include <sys/stropts.h>
#include <sys/stream.h>
#include <sys/logindmux.h>
#include <sys/logindmux_impl.h>
#include <sys/stat.h>
#include <sys/kmem.h>
#include <sys/vmem.h>
#include <sys/strsun.h>
#include <sys/sysmacros.h>
#include <sys/mkdev.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <sys/termios.h>
#include <sys/cmn_err.h>

static int logdmuxopen(queue_t *, dev_t *, int, int, cred_t *);
static int logdmuxclose(queue_t *, int, cred_t *);
static int logdmuxursrv(queue_t *);
static int logdmuxuwput(queue_t *, mblk_t *);
static int logdmuxlrput(queue_t *, mblk_t *);
static int logdmuxlrsrv(queue_t *);
static int logdmuxlwsrv(queue_t *);
static int logdmuxuwsrv(queue_t *);
static int logdmux_alloc_unlinkinfo(struct tmx *, struct tmx *);

static void logdmuxlink(queue_t *, mblk_t *);
static void logdmuxunlink(queue_t *, mblk_t *);
static void logdmux_finish_unlink(queue_t *, mblk_t *);
static void logdmux_unlink_timer(void *arg);
static void recover(queue_t *, mblk_t *, size_t);
static void flushq_dataonly(queue_t *);

static kmutex_t logdmux_qexch_lock;
static kmutex_t logdmux_peerq_lock;
static kmutex_t logdmux_minor_lock;
static minor_t  logdmux_maxminor = 256; /* grown as necessary */
static vmem_t   *logdmux_minor_arena;
static void *logdmux_statep;

static struct module_info logdmuxm_info = {
    LOGDMX_ID,
    "logindmux",
    0,
    256,
    512,
    256
};

static struct qinit logdmuxurinit = {
    NULL,
    logdmuxursrv,
    logdmuxopen,
    logdmuxclose,
    NULL,
    &logdmuxm_info
};

static struct qinit logdmuxuwinit = {
    logdmuxuwput,
    logdmuxuwsrv,
    NULL,
    NULL,
    NULL,
    &logdmuxm_info
};

static struct qinit logdmuxlrinit = {
    logdmuxlrput,
    logdmuxlrsrv,
    NULL,
    NULL,
    NULL,
    &logdmuxm_info
};

static struct qinit logdmuxlwinit = {
    NULL,
    logdmuxlwsrv,
    NULL,
    NULL,
    NULL,
    &logdmuxm_info
};

struct streamtab logdmuxinfo = {
    &logdmuxurinit,
    &logdmuxuwinit,
    &logdmuxlrinit,
    &logdmuxlwinit
};

static int logdmux_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
static int logdmux_attach(dev_info_t *, ddi_attach_cmd_t);
static int logdmux_detach(dev_info_t *, ddi_detach_cmd_t);
static dev_info_t *logdmux_dip;

DDI_DEFINE_STREAM_OPS(logdmux_ops, nulldev, nulldev, logdmux_attach,
    logdmux_detach, nulldev, logdmux_info, D_MP | D_MTPERQ, &logdmuxinfo,
    ddi_quiesce_not_needed);

static struct modldrv modldrv = {
    &mod_driverops,
    "logindmux driver",
    &logdmux_ops
};

static struct modlinkage modlinkage = {
    MODREV_1, &modldrv, NULL
};

int
_init(void)
{
    int ret;

    mutex_init(&logdmux_peerq_lock, NULL, MUTEX_DRIVER, NULL);
    mutex_init(&logdmux_qexch_lock, NULL, MUTEX_DRIVER, NULL);

    if ((ret = mod_install(&modlinkage)) != 0) {
        mutex_destroy(&logdmux_peerq_lock);
        mutex_destroy(&logdmux_qexch_lock);
        return (ret);
    }

    logdmux_minor_arena = vmem_create("logdmux_minor", (void *)1,
        logdmux_maxminor, 1, NULL, NULL, NULL, 0,
        VM_SLEEP | VMC_IDENTIFIER);
    (void) ddi_soft_state_init(&logdmux_statep, sizeof (struct tmx), 1);

    return (0);
}

int
_fini(void)
{
    int ret;

    if ((ret = mod_remove(&modlinkage)) == 0) {
        mutex_destroy(&logdmux_peerq_lock);
        mutex_destroy(&logdmux_qexch_lock);
        ddi_soft_state_fini(&logdmux_statep);
        vmem_destroy(logdmux_minor_arena);
        logdmux_minor_arena = NULL;
    }

    return (ret);
}

int
_info(struct modinfo *modinfop)
{
    return (mod_info(&modlinkage, modinfop));
}

static int
logdmux_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
    if (cmd != DDI_ATTACH)
        return (DDI_FAILURE);

    if (ddi_create_minor_node(devi, "logindmux", S_IFCHR, 0, DDI_PSEUDO,
        CLONE_DEV) == DDI_FAILURE)
        return (DDI_FAILURE);

    logdmux_dip = devi;
    return (DDI_SUCCESS);
}

static int
logdmux_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
{
    if (cmd != DDI_DETACH)
        return (DDI_FAILURE);

    ddi_remove_minor_node(devi, NULL);
    return (DDI_SUCCESS);
}

/* ARGSUSED */
static int
logdmux_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
    int error;

    switch (infocmd) {
    case DDI_INFO_DEVT2DEVINFO:
        if (logdmux_dip == NULL) {
            error = DDI_FAILURE;
        } else {
            *result = logdmux_dip;
            error = DDI_SUCCESS;
        }
        break;
    case DDI_INFO_DEVT2INSTANCE:
        *result = (void *)0;
        error = DDI_SUCCESS;
        break;
    default:
        error = DDI_FAILURE;
    }
    return (error);
}

/*
 * Logindmux open routine
 */
/*ARGSUSED*/
static int
logdmuxopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp)
{
    struct  tmx *tmxp;
    minor_t minor, omaxminor;

    if (sflag != CLONEOPEN)
        return (EINVAL);

    mutex_enter(&logdmux_minor_lock);
    if (vmem_size(logdmux_minor_arena, VMEM_FREE) == 0) {
        /*
         * The arena has been exhausted; grow by powers of two
         * up to MAXMIN; bail if we've run out of minors.
         */
        if (logdmux_maxminor == MAXMIN) {
            mutex_exit(&logdmux_minor_lock);
            return (ENOMEM);
        }

        omaxminor = logdmux_maxminor;
        logdmux_maxminor = MIN(logdmux_maxminor << 1, MAXMIN);

        (void) vmem_add(logdmux_minor_arena,
            (void *)(uintptr_t)(omaxminor + 1),
            logdmux_maxminor - omaxminor, VM_SLEEP);
    }
    minor = (minor_t)(uintptr_t)
        vmem_alloc(logdmux_minor_arena, 1, VM_SLEEP);
    mutex_exit(&logdmux_minor_lock);

    if (ddi_soft_state_zalloc(logdmux_statep, minor) == DDI_FAILURE) {
        vmem_free(logdmux_minor_arena, (void *)(uintptr_t)minor, 1);
        return (ENOMEM);
    }

    tmxp = ddi_get_soft_state(logdmux_statep, minor);
    tmxp->rdq = q;
    tmxp->muxq = NULL;
    tmxp->peerq = NULL;
    tmxp->unlinkinfop = NULL;
    tmxp->dev0 = minor;

    *devp = makedevice(getmajor(*devp), tmxp->dev0);
    q->q_ptr = tmxp;
    WR(q)->q_ptr = tmxp;

    qprocson(q);
    return (0);
}

/*
 * Logindmux close routine gets called when telnet connection is closed
 */
/*ARGSUSED*/
static int
logdmuxclose(queue_t *q, int flag, cred_t *crp)
{
    struct tmx  *tmxp = q->q_ptr;
    minor_t     minor = tmxp->dev0;

    ASSERT(tmxp->muxq == NULL);
    ASSERT(tmxp->peerq == NULL);

    qprocsoff(q);
    if (tmxp->wbufcid != 0) {
        qunbufcall(q, tmxp->wbufcid);
        tmxp->wbufcid = 0;
    }
    if (tmxp->rbufcid != 0) {
        qunbufcall(q, tmxp->rbufcid);
        tmxp->rbufcid = 0;
    }
    if (tmxp->rtimoutid != 0) {
        (void) quntimeout(q, tmxp->rtimoutid);
        tmxp->rtimoutid = 0;
    }
    if (tmxp->wtimoutid != 0) {
        (void) quntimeout(q, tmxp->wtimoutid);
        tmxp->wtimoutid = 0;
    }
    if (tmxp->utimoutid != 0) {
        (void) quntimeout(q, tmxp->utimoutid);
        tmxp->utimoutid = 0;
    }

    /*
     * Hold logdmux_qexch_lock to prevent another thread that might be
     * in LOGDMX_IOC_QEXCHANGE from looking up our state while we're
     * disposing of it.
     */
    mutex_enter(&logdmux_qexch_lock);
    ddi_soft_state_free(logdmux_statep, minor);
    vmem_free(logdmux_minor_arena, (void *)(uintptr_t)minor, 1);
    mutex_exit(&logdmux_qexch_lock);

    q->q_ptr = NULL;
    WR(q)->q_ptr = NULL;

    return (0);
}

/*
 * Upper read service routine
 */
static int
logdmuxursrv(queue_t *q)
{
    struct tmx *tmxp = q->q_ptr;

    if (tmxp->muxq != NULL)
        qenable(RD(tmxp->muxq));
    return (0);
}

/*
 * This routine gets called when telnet daemon sends data or ioctl messages
 * to upper mux queue.
 */
static int
logdmuxuwput(queue_t *q, mblk_t *mp)
{
    queue_t     *qp;
    mblk_t      *newmp;
    struct iocblk   *ioc;
    minor_t     minor;
    STRUCT_HANDLE(protocol_arg, protoh);
    struct tmx  *tmxp, *tmxpeerp;
    int     error;

    tmxp = q->q_ptr;

    switch (mp->b_datap->db_type) {

    case M_IOCTL:
        ASSERT(MBLKL(mp) == sizeof (struct iocblk));

        ioc = (struct iocblk *)mp->b_rptr;
        switch (ioc->ioc_cmd) {
        /*
         * This is a special ioctl which exchanges q info
         * of the two peers, connected to netf and ptmx.
         */
        case LOGDMX_IOC_QEXCHANGE:
            error = miocpullup(mp,
                SIZEOF_STRUCT(protocol_arg, ioc->ioc_flag));
            if (error != 0) {
                miocnak(q, mp, 0, error);
                break;
            }
            STRUCT_SET_HANDLE(protoh, ioc->ioc_flag,
                (struct protocol_arg *)mp->b_cont->b_rptr);
#ifdef _SYSCALL32_IMPL
            if ((ioc->ioc_flag & DATAMODEL_MASK) ==
                DATAMODEL_ILP32) {
                minor = getminor(expldev(
                    STRUCT_FGET(protoh, dev)));
            } else
#endif
            {
                minor = getminor(STRUCT_FGET(protoh, dev));
            }

            /*
             * The second argument to ddi_get_soft_state() is
             * interpreted as an `int', so prohibit negative
             * values.
             */
            if ((int)minor < 0) {
                miocnak(q, mp, 0, EINVAL);
                break;
            }

            /*
             * We must hold logdmux_qexch_lock while looking up
             * the proposed peer to prevent another thread from
             * simultaneously I_UNLINKing or closing it.
             */
            mutex_enter(&logdmux_qexch_lock);

            /*
             * For LOGDMX_IOC_QEXCHANGE to succeed, our peer must
             * exist (and not be us), and both we and our peer
             * must be I_LINKed (i.e., muxq must not be NULL) and
             * not already have a peer.
             */
            tmxpeerp = ddi_get_soft_state(logdmux_statep, minor);
            if (tmxpeerp == NULL || tmxpeerp == tmxp ||
                tmxpeerp->muxq == NULL || tmxpeerp->peerq != NULL ||
                tmxp->muxq == NULL || tmxp->peerq != NULL) {
                mutex_exit(&logdmux_qexch_lock);
                miocnak(q, mp, 0, EINVAL);
                break;
            }

            /*
             * If `flag' is set then exchange queues and assume
             * tmxp refers to the ptmx stream.
             */
            if (STRUCT_FGET(protoh, flag)) {
                /*
                 * Allocate and populate the structure we
                 * need when processing an I_UNLINK ioctl.
                 * Give both logindmux instances a pointer
                 * to it from their tmx structure.
                 */
                if ((error = logdmux_alloc_unlinkinfo(
                    tmxp, tmxpeerp)) != 0) {
                    mutex_exit(&logdmux_qexch_lock);
                    miocnak(q, mp, 0, error);
                    break;
                }
                tmxp->peerq = tmxpeerp->muxq;
                tmxpeerp->peerq = tmxp->muxq;
                tmxp->isptm = B_TRUE;
            }
            mutex_exit(&logdmux_qexch_lock);
            miocack(q, mp, 0, 0);
            break;

        case I_LINK:
            ASSERT(MBLKL(mp->b_cont) == sizeof (struct linkblk));
            logdmuxlink(q, mp);
            break;

        case I_UNLINK:
            ASSERT(MBLKL(mp->b_cont) == sizeof (struct linkblk));
            logdmuxunlink(q, mp);
            break;

        default:
            if (tmxp->muxq == NULL) {
                miocnak(q, mp, 0, EINVAL);
                return (0);
            }
            putnext(tmxp->muxq, mp);
            break;
        }

        break;

    case M_DATA:
        if (!tmxp->isptm) {
            if ((newmp = allocb(sizeof (char), BPRI_MED)) == NULL) {
                recover(q, mp, sizeof (char));
                return (0);
            }
            newmp->b_datap->db_type = M_CTL;
            *newmp->b_wptr++ = M_CTL_MAGIC_NUMBER;
            newmp->b_cont = mp;
            mp = newmp;
        }
        /* FALLTHRU */

    case M_PROTO:
    case M_PCPROTO:
        qp = tmxp->muxq;
        if (qp == NULL) {
            merror(q, mp, EINVAL);
            return (0);
        }

        if (queclass(mp) < QPCTL) {
            if (q->q_first != NULL || !canputnext(qp)) {
                (void) putq(q, mp);
                return (0);
            }
        }
        putnext(qp, mp);
        break;

    case M_FLUSH:
        if (*mp->b_rptr & FLUSHW)
            flushq(q, FLUSHALL);

        if (tmxp->muxq != NULL) {
            putnext(tmxp->muxq, mp);
            return (0);
        }

        *mp->b_rptr &= ~FLUSHW;
        if (*mp->b_rptr & FLUSHR)
            qreply(q, mp);
        else
            freemsg(mp);
        break;

    default:
        cmn_err(CE_NOTE, "logdmuxuwput: received unexpected message"
            " of type 0x%x", mp->b_datap->db_type);
        freemsg(mp);
    }
    return (0);
}

/*
 * Upper write service routine
 */
static int
logdmuxuwsrv(queue_t *q)
{
    mblk_t      *mp, *newmp;
    queue_t     *qp;
    struct tmx  *tmxp = q->q_ptr;

    while ((mp = getq(q)) != NULL) {
        switch (mp->b_datap->db_type) {
        case M_DATA:
            if (!tmxp->isptm) {
                if ((newmp = allocb(sizeof (char), BPRI_MED)) ==
                    NULL) {
                    recover(q, mp, sizeof (char));
                    return (0);
                }
                newmp->b_datap->db_type = M_CTL;
                *newmp->b_wptr++ = M_CTL_MAGIC_NUMBER;
                newmp->b_cont = mp;
                mp = newmp;
            }
            /* FALLTHRU */

        case M_CTL:
        case M_PROTO:
            if (tmxp->muxq == NULL) {
                merror(q, mp, EIO);
                break;
            }
            qp = tmxp->muxq;
            if (!canputnext(qp)) {
                (void) putbq(q, mp);
                return (0);
            }
            putnext(qp, mp);
            break;


        default:
            cmn_err(CE_NOTE, "logdmuxuwsrv: received unexpected"
                " message of type 0x%x", mp->b_datap->db_type);
            freemsg(mp);
        }
    }
    return (0);
}

/*
 * Logindmux lower put routine detects from which of the two lower queues
 * the data needs to be read from and writes it out to its peer queue.
 * For protocol, it detects M_CTL and sends its data to the daemon. Also,
 * for ioctl and other types of messages, it lets the daemon handle it.
 */
static int
logdmuxlrput(queue_t *q, mblk_t *mp)
{
    mblk_t      *savemp;
    queue_t     *qp;
    struct iocblk   *ioc;
    struct tmx  *tmxp = q->q_ptr;
    uchar_t     flush;
    uint_t      *messagep;
    unlinkinfo_t    *unlinkinfop = tmxp->unlinkinfop;

    if (tmxp->muxq == NULL || tmxp->peerq == NULL) {
        freemsg(mp);
        return (0);
    }

    /*
     * If there's already a message on our queue and the incoming
     * message is not of a high-priority, enqueue the message --
     * but not if it's a logindmux protocol message.
     */
    if ((q->q_first != NULL) && (queclass(mp) < QPCTL) &&
        (!LOGDMUX_PROTO_MBLK(mp))) {
        (void) putq(q, mp);
        return (0);
    }

    switch (mp->b_datap->db_type) {

    case M_IOCTL:
        ioc = (struct iocblk *)mp->b_rptr;
        switch (ioc->ioc_cmd) {

        case TIOCSWINSZ:
        case TCSETAF:
        case TCSETSF:
        case TCSETA:
        case TCSETAW:
        case TCSETS:
        case TCSETSW:
        case TCSBRK:
        case TIOCSTI:
            qp = tmxp->peerq;
            break;

        default:
            cmn_err(CE_NOTE, "logdmuxlrput: received unexpected"
                " request for ioctl 0x%x", ioc->ioc_cmd);

            /* NAK unrecognized ioctl's. */
            miocnak(q, mp, 0, 0);
            return (0);
        }
        break;

    case M_DATA:
    case M_HANGUP:
        qp = tmxp->peerq;
        break;

    case M_CTL:
        /*
         * The protocol messages that flow between the peers
         * to implement the unlink functionality are M_CTLs
         * which have the M_IOCTL/I_UNLINK mblk of the ioctl
         * attached via b_cont.  LOGDMUX_PROTO_MBLK() uses
         * this to determine whether a particular M_CTL is a
         * peer protocol message.
         */
        if (LOGDMUX_PROTO_MBLK(mp)) {
            messagep = (uint_t *)mp->b_rptr;

            switch (*messagep) {

            case LOGDMUX_UNLINK_REQ:
                /*
                 * We've received a message from our
                 * peer indicating that it wants to
                 * unlink.
                 */
                *messagep = LOGDMUX_UNLINK_RESP;
                qp = tmxp->peerq;

                mutex_enter(&logdmux_peerq_lock);
                tmxp->peerq = NULL;
                mutex_exit(&logdmux_peerq_lock);

                put(RD(qp), mp);
                return (0);

            case LOGDMUX_UNLINK_RESP:
                /*
                 * We've received a positive response
                 * from our peer to an earlier
                 * LOGDMUX_UNLINK_REQ that we sent.
                 * We can now carry on with the unlink.
                 */
                qp = tmxp->rdq;
                mutex_enter(&unlinkinfop->state_lock);
                ASSERT(unlinkinfop->state ==
                    LOGDMUX_UNLINK_PENDING);
                unlinkinfop->state = LOGDMUX_UNLINKED;
                mutex_exit(&unlinkinfop->state_lock);
                logdmux_finish_unlink(WR(qp), mp->b_cont);
                return (0);
            }
        }

        qp = tmxp->rdq;
        if (q->q_first != NULL || !canputnext(qp)) {
            (void) putq(q, mp);
            return (0);
        }
        if ((MBLKL(mp) == 1) && (*mp->b_rptr == M_CTL_MAGIC_NUMBER)) {
            savemp = mp->b_cont;
            freeb(mp);
            mp = savemp;
        }
        putnext(qp, mp);
        return (0);

    case M_IOCACK:
    case M_IOCNAK:
    case M_PROTO:
    case M_PCPROTO:
    case M_PCSIG:
    case M_SETOPTS:
        qp = tmxp->rdq;
        break;

    case M_ERROR:
        if (tmxp->isptm) {
            /*
             * This error is from ptm.  We could tell TCP to
             * shutdown the connection, but it's easier to just
             * wait for the daemon to get SIGCHLD and close from
             * above.
             */
            freemsg(mp);
            return (0);
        }
        /*
         * This is from TCP.  Don't really know why we'd
         * get this, but we have a pretty good idea what
         * to do:  Send M_HANGUP to the pty.
         */
        mp->b_datap->db_type = M_HANGUP;
        mp->b_wptr = mp->b_rptr;
        qp = tmxp->peerq;
        break;

    case M_FLUSH:
        if (*mp->b_rptr & FLUSHR)
            flushq_dataonly(q);

        if (mp->b_flag & MSGMARK) {
            /*
             * This M_FLUSH has been marked by the module
             * below as intended for the upper queue,
             * not the peer queue.
             */
            qp = tmxp->rdq;
            mp->b_flag &= ~MSGMARK;
        } else {
            /*
             * Wrap this M_FLUSH through the mux.
             * The FLUSHR and FLUSHW bits must be
             * reversed.
             */
            qp = tmxp->peerq;
            flush = *mp->b_rptr;
            *mp->b_rptr &= ~(FLUSHR | FLUSHW);
            if (flush & FLUSHW)
                *mp->b_rptr |= FLUSHR;
            if (flush & FLUSHR)
                *mp->b_rptr |= FLUSHW;
        }
        break;

    case M_START:
    case M_STOP:
    case M_STARTI:
    case M_STOPI:
        freemsg(mp);
        return (0);

    default:
        cmn_err(CE_NOTE, "logdmuxlrput: received unexpected "
            "message of type 0x%x", mp->b_datap->db_type);
        freemsg(mp);
        return (0);
    }
    if (queclass(mp) < QPCTL) {
        if (q->q_first != NULL || !canputnext(qp)) {
            (void) putq(q, mp);
            return (0);
        }
    }
    putnext(qp, mp);
    return (0);
}

/*
 * Lower read service routine
 */
static int
logdmuxlrsrv(queue_t *q)
{
    mblk_t      *mp, *savemp;
    queue_t     *qp;
    struct iocblk   *ioc;
    struct tmx  *tmxp = q->q_ptr;

    while ((mp = getq(q)) != NULL) {
        if (tmxp->muxq == NULL || tmxp->peerq == NULL) {
            freemsg(mp);
            continue;
        }

        switch (mp->b_datap->db_type) {

        case M_IOCTL:
            ioc = (struct iocblk *)mp->b_rptr;

            switch (ioc->ioc_cmd) {

            case TIOCSWINSZ:
            case TCSETAF:
            case TCSETSF:
            case TCSETA:
            case TCSETAW:
            case TCSETS:
            case TCSETSW:
            case TCSBRK:
            case TIOCSTI:
                qp = tmxp->peerq;
                break;

            default:
                cmn_err(CE_NOTE, "logdmuxlrsrv: received "
                    "unexpected request for ioctl 0x%x",
                    ioc->ioc_cmd);

                /* NAK unrecognized ioctl's. */
                miocnak(q, mp, 0, 0);
                continue;
            }
            break;

        case M_DATA:
        case M_HANGUP:
            qp = tmxp->peerq;
            break;

        case M_CTL:
            qp = tmxp->rdq;
            if (!canputnext(qp)) {
                (void) putbq(q, mp);
                return (0);
            }
            if (MBLKL(mp) == 1 &&
                (*mp->b_rptr == M_CTL_MAGIC_NUMBER)) {
                savemp = mp->b_cont;
                freeb(mp);
                mp = savemp;
            }
            putnext(qp, mp);
            continue;

        case M_PROTO:
        case M_SETOPTS:
            qp = tmxp->rdq;
            break;

        default:
            cmn_err(CE_NOTE, "logdmuxlrsrv: received unexpected "
                "message of type 0x%x", mp->b_datap->db_type);
            freemsg(mp);
            continue;
        }
        ASSERT(queclass(mp) < QPCTL);
        if (!canputnext(qp)) {
            (void) putbq(q, mp);
            return (0);
        }
        putnext(qp, mp);
    }
    return (0);
}

/*
 * Lower side write service procedure.  No messages are ever placed on
 * the write queue here, this just back-enables all of the upper side
 * write service procedures.
 */
static int
logdmuxlwsrv(queue_t *q)
{
    struct tmx *tmxp = q->q_ptr;

    /*
     * Qenable upper write queue and find out which lower
     * queue needs to be restarted with flow control.
     * Qenable the peer queue so canputnext will
     * succeed on next call to logdmuxlrput.
     */
    qenable(WR(tmxp->rdq));

    mutex_enter(&logdmux_peerq_lock);
    if (tmxp->peerq != NULL)
        qenable(RD(tmxp->peerq));
    mutex_exit(&logdmux_peerq_lock);

    return (0);
}

/*
 * This routine does I_LINK operation.
 */
static void
logdmuxlink(queue_t *q, mblk_t *mp)
{
    struct tmx  *tmxp = q->q_ptr;
    struct linkblk  *lp = (struct linkblk *)mp->b_cont->b_rptr;

    /*
     * Fail if we're already linked.
     */
    if (tmxp->muxq != NULL) {
        miocnak(q, mp, 0, EINVAL);
        return;
    }

    tmxp->muxq = lp->l_qbot;
    tmxp->muxq->q_ptr = tmxp;
    RD(tmxp->muxq)->q_ptr = tmxp;

    miocack(q, mp, 0, 0);
}

/*
 * logdmuxunlink() is called from logdmuxuwput() and is the first of two
 * functions which process an I_UNLINK ioctl. logdmuxunlink() will determine
 * the state of logindmux peer linkage and, based on this, control when the
 * second function, logdmux_finish_unlink(), is called.  It's
 * logdmux_finish_unlink() that's sending the M_IOCACK upstream and
 * resetting the link state.
 */
static void
logdmuxunlink(queue_t *q, mblk_t *mp)
{
    struct tmx  *tmxp = q->q_ptr;
    unlinkinfo_t    *unlinkinfop;

    /*
     * If we don't have a peer, just unlink.  Note that this check needs
     * to be done under logdmux_qexch_lock to prevent racing with
     * LOGDMX_IOC_QEXCHANGE, and we *must* set muxq to NULL prior to
     * releasing the lock so that LOGDMX_IOC_QEXCHANGE will not consider
     * us as a possible peer anymore (if it already considers us to be a
     * peer, then unlinkinfop will not be NULL) -- NULLing muxq precludes
     * use of logdmux_finish_unlink() here.
     */
    mutex_enter(&logdmux_qexch_lock);
    unlinkinfop = tmxp->unlinkinfop;
    if (unlinkinfop == NULL) {
        ASSERT(tmxp->peerq == NULL);
        tmxp->muxq = NULL;
        mutex_exit(&logdmux_qexch_lock);
        miocack(q, mp, 0, 0);
        return;
    }
    mutex_exit(&logdmux_qexch_lock);

    mutex_enter(&unlinkinfop->state_lock);

    switch (unlinkinfop->state) {

    case LOGDMUX_LINKED:
        /*
         * We're the first instance to process an I_UNLINK --
         * ie, the peer instance is still there. We'll change
         * the state so that only one instance is executing an
         * I_UNLINK at any one time.
         */
        unlinkinfop->state = LOGDMUX_UNLINK_PENDING;
        mutex_exit(&unlinkinfop->state_lock);
        /*
         * Attach the original M_IOCTL message to a
         * LOGDMUX_UNLINK_REQ message and send it to our peer to
         * tell it to unlink from us. When it has completed the
         * task, it will send us a LOGDMUX_UNLINK_RESP message
         * with the original M_IOCTL still attached, which will be
         * processed in our logdmuxlrput(). At that point, we will
         * call logdmux_finish_unlink() to complete the unlink
         * operation using the attached M_IOCTL.
         */
        unlinkinfop->prot_mp->b_cont = mp;
        /*
         * Put the M_CTL directly to the peer's lower RQ.
         */
        put(RD(tmxp->peerq), unlinkinfop->prot_mp);
        break;

    case LOGDMUX_UNLINK_PENDING:
        mutex_exit(&unlinkinfop->state_lock);
        /*
         * Our peer is actively processing an I_UNLINK itself.
         * We have to wait for the peer to complete and we use
         * qtimeout as a way to poll for its completion.
         * We save a reference to our mblk so that we can send
         * it upstream once our peer is done.
         */
        tmxp->unlink_mp = mp;
        tmxp->utimoutid = qtimeout(q, logdmux_unlink_timer, q,
            drv_usectohz(LOGDMUX_POLL_WAIT));
        break;

    case LOGDMUX_UNLINKED:
        /*
         * Our peer is no longer linked so we can proceed.
         */
        mutex_exit(&unlinkinfop->state_lock);
        mutex_destroy(&unlinkinfop->state_lock);
        freeb(unlinkinfop->prot_mp);
        kmem_free(unlinkinfop, sizeof (unlinkinfo_t));
        logdmux_finish_unlink(q, mp);
        break;

    default:
        mutex_exit(&unlinkinfop->state_lock);
        cmn_err(CE_PANIC,
            "logdmuxunlink: peer linkage is in an unrecognized state");
        break;
    }
}

/*
 * Finish the unlink operation.  Note that no locks should be held since
 * this routine calls into other queues.
 */
static void
logdmux_finish_unlink(queue_t *q, mblk_t *unlink_mp)
{
    struct tmx *tmxp = q->q_ptr;
    mblk_t *mp;

    /*
     * Flush any write side data downstream.
     */
    while ((mp = getq(WR(q))) != NULL)
        putnext(tmxp->muxq, mp);

    /*
     * Note that we do not NULL out q_ptr since another thread (e.g., a
     * STREAMS service thread) might call logdmuxlrput() between the time
     * we exit the logindmux perimeter and the time the STREAMS framework
     * resets q_ptr to stdata (since muxq is set to NULL, any messages
     * will just be discarded).
     */
    tmxp->muxq = NULL;
    tmxp->unlinkinfop = NULL;
    tmxp->peerq = NULL;
    miocack(q, unlink_mp, 0, 0);
}

/*
 * logdmux_unlink_timer() is executed by qtimeout(). This function will
 * check unlinkinfop->state to determine whether the peer has completed
 * its I_UNLINK. If it hasn't, we use qtimeout() to initiate another poll.
 */
static void
logdmux_unlink_timer(void *arg)
{
    queue_t     *q = arg;
    struct  tmx *tmxp = q->q_ptr;
    unlinkinfo_t    *unlinkinfop = tmxp->unlinkinfop;

    tmxp->utimoutid = 0;

    mutex_enter(&unlinkinfop->state_lock);

    if (unlinkinfop->state != LOGDMUX_UNLINKED) {
        ASSERT(unlinkinfop->state == LOGDMUX_UNLINK_PENDING);
        mutex_exit(&unlinkinfop->state_lock);
        /*
         * We need to wait longer for our peer to complete.
         */
        tmxp->utimoutid = qtimeout(q, logdmux_unlink_timer, q,
            drv_usectohz(LOGDMUX_POLL_WAIT));
    } else {
        /*
         * Our peer is no longer linked so we can proceed with
         * the cleanup.
         */
        mutex_exit(&unlinkinfop->state_lock);
        mutex_destroy(&unlinkinfop->state_lock);
        freeb(unlinkinfop->prot_mp);
        kmem_free(unlinkinfop, sizeof (unlinkinfo_t));
        logdmux_finish_unlink(q, tmxp->unlink_mp);
    }
}

static void
logdmux_timer(void *arg)
{
    queue_t     *q = arg;
    struct tmx  *tmxp = q->q_ptr;

    ASSERT(tmxp != NULL);

    if (q->q_flag & QREADR) {
        ASSERT(tmxp->rtimoutid != 0);
        tmxp->rtimoutid = 0;
    } else {
        ASSERT(tmxp->wtimoutid != 0);
        tmxp->wtimoutid = 0;
    }
    enableok(q);
    qenable(q);
}

static void
logdmux_buffer(void *arg)
{
    queue_t     *q = arg;
    struct tmx  *tmxp = q->q_ptr;

    ASSERT(tmxp != NULL);

    if (q->q_flag & QREADR) {
        ASSERT(tmxp->rbufcid != 0);
        tmxp->rbufcid = 0;
    } else {
        ASSERT(tmxp->wbufcid != 0);
        tmxp->wbufcid = 0;
    }
    enableok(q);
    qenable(q);
}

static void
recover(queue_t *q, mblk_t *mp, size_t size)
{
    timeout_id_t    tid;
    bufcall_id_t    bid;
    struct  tmx *tmxp = q->q_ptr;

    /*
     * Avoid re-enabling the queue.
     */
    ASSERT(queclass(mp) < QPCTL);
    ASSERT(WR(q)->q_next == NULL); /* Called from upper queue only */
    noenable(q);
    (void) putbq(q, mp);

    /*
     * Make sure there is at most one outstanding request per queue.
     */
    if (q->q_flag & QREADR) {
        if (tmxp->rtimoutid != 0 || tmxp->rbufcid != 0)
            return;
    } else {
        if (tmxp->wtimoutid != 0 || tmxp->wbufcid != 0)
            return;
    }
    if (!(bid = qbufcall(RD(q), size, BPRI_MED, logdmux_buffer, q))) {
        tid = qtimeout(RD(q), logdmux_timer, q, drv_usectohz(SIMWAIT));
        if (q->q_flag & QREADR)
            tmxp->rtimoutid = tid;
        else
            tmxp->wtimoutid = tid;
    } else  {
        if (q->q_flag & QREADR)
            tmxp->rbufcid = bid;
        else
            tmxp->wbufcid = bid;
    }
}

static void
flushq_dataonly(queue_t *q)
{
    mblk_t *mp, *nmp;

    /*
     * Since we are already in the perimeter, and we are not a put-shared
     * perimeter, we don't need to freeze the stream or anything to
     * be ensured of exclusivity.
     */
    mp = q->q_first;
    while (mp != NULL) {
        if (mp->b_datap->db_type == M_DATA) {
            nmp = mp->b_next;
            rmvq(q, mp);
            freemsg(mp);
            mp = nmp;
        } else {
            mp = mp->b_next;
        }
    }
}

/*
 * logdmux_alloc_unlinkinfo() is called from logdmuxuwput() during the
 * processing of a LOGDMX_IOC_QEXCHANGE ioctl() to allocate the
 * unlinkinfo_t which is needed during the processing of an I_UNLINK.
 */
static int
logdmux_alloc_unlinkinfo(struct tmx *t0, struct tmx *t1)
{
    unlinkinfo_t    *p;
    uint_t      *messagep;

    if ((p = kmem_zalloc(sizeof (unlinkinfo_t), KM_NOSLEEP)) == NULL)
        return (ENOSR);

    if ((p->prot_mp = allocb(sizeof (uint_t), BPRI_MED)) == NULL) {
        kmem_free(p, sizeof (unlinkinfo_t));
        return (ENOSR);
    }

    DB_TYPE(p->prot_mp) = M_CTL;
    messagep = (uint_t *)p->prot_mp->b_wptr;
    *messagep = LOGDMUX_UNLINK_REQ;
    p->prot_mp->b_wptr += sizeof (*messagep);
    p->state = LOGDMUX_LINKED;
    mutex_init(&p->state_lock, NULL, MUTEX_DRIVER, NULL);

    t0->unlinkinfop = t1->unlinkinfop = p;

    return (0);
}