stream.c revision 17169044f903cb92234f23d0ba0ce43449614a4d
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * CDDL HEADER START
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * The contents of this file are subject to the terms of the
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Common Development and Distribution License (the "License").
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * You may not use this file except in compliance with the License.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * See the License for the specific language governing permissions
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * and limitations under the License.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * When distributing Covered Code, include this CDDL HEADER in each
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * If applicable, add the following below this CDDL HEADER, with the
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * fields enclosed by brackets "[]" replaced with your own identifying
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * information: Portions Copyright [yyyy] [name of copyright owner]
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * CDDL HEADER END
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson/* All Rights Reserved */
9a686fbc186e8e2a64e9a5094d44c7d6fa0ea167Paul Dagnelie * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Use is subject to license terms.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson#pragma ident "%Z%%M% %I% %E% SMI"
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * This file contains all the STREAMS utility routines that may
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * be used by modules and drivers.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * STREAMS message allocator: principles of operation
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * The streams message allocator consists of all the routines that
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * allocate, dup and free streams messages: allocb(), [d]esballoc[a],
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * dupb(), freeb() and freemsg(). What follows is a high-level view
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * of how the allocator works.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Every streams message consists of one or more mblks, a dblk, and data.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * All mblks for all types of messages come from a common mblk_cache.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * The dblk and data come in several flavors, depending on how the
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * message is allocated:
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * fixed-size dblk/data caches. For message sizes that are multiples of
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * PAGESIZE, dblks are allocated separately from the buffer.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * The associated buffer is allocated by the constructor using kmem_alloc().
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * For all other message sizes, dblk and its associated data is allocated
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * as a single contiguous chunk of memory.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Objects in these caches consist of a dblk plus its associated data.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * allocb() determines the nearest-size cache by table lookup:
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * the dblk_cache[] array provides the mapping from size to dblk cache.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * kmem_alloc()'ing a buffer for the data and supplying that
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * buffer to gesballoc(), described below.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * (3) The four flavors of [d]esballoc[a] are all implemented by a
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * common routine, gesballoc() ("generic esballoc"). gesballoc()
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * allocates a dblk from the global dblk_esb_cache and sets db_base,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * db_lim and db_frtnp to describe the caller-supplied buffer.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * While there are several routines to allocate messages, there is only
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * one routine to free messages: freeb(). freeb() simply invokes the
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * dblk's free method, dbp->db_free(), which is set at allocation time.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * dupb() creates a new reference to a message by allocating a new mblk,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * incrementing the dblk reference count and setting the dblk's free
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * method to dblk_decref(). The dblk's original free method is retained
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * in db_lastfree. dblk_decref() decrements the reference count on each
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * freeb(). If this is not the last reference it just frees the mblk;
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * if this *is* the last reference, it restores db_free to db_lastfree,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * sets db_mblk to the current mblk (see below), and invokes db_lastfree.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * The implementation makes aggressive use of kmem object caching for
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * maximum performance. This makes the code simple and compact, but
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * also a bit abstruse in some places. The invariants that constitute a
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * message's constructed state, described below, are more subtle than usual.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Every dblk has an "attached mblk" as part of its constructed state.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * The mblk is allocated by the dblk's constructor and remains attached
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * until the message is either dup'ed or pulled up. In the dupb() case
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * the mblk association doesn't matter until the last free, at which time
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * dblk_decref() attaches the last mblk to the dblk. pullupmsg() affects
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * the mblk association because it swaps the leading mblks of two messages,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * so it is responsible for swapping their db_mblk pointers accordingly.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * From a constructed-state viewpoint it doesn't matter that a dblk's
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * attached mblk can change while the message is allocated; all that
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * matters is that the dblk has *some* attached mblk when it's freed.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * The sizes of the allocb() small-message caches are not magical.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * They represent a good trade-off between internal and external
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * fragmentation for current workloads. They should be reevaluated
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * periodically, especially if allocations larger than DBLK_MAX_CACHE
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * become common. We use 64-byte alignment so that dblks don't
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * straddle cache lines unnecessarily.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson (8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson (8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson (((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson ((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson#define DBLK_RTFU_REF_MASK (DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson#define DBLK_RTFU_WORD(dbp) (*((uint32_t *)&(dbp)->db_ref))
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson#define MBLK_BAND_FLAG_WORD(mp) (*((uint32_t *)&(mp)->b_band))
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson 16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3920,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson 8192, 12112, 16384, 20304, 24576, 28496, 32768, 36688,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson 40960, 44880, 49152, 53072, 57344, 61264, 65536, 69456,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson 64, 128, 320, 576, 1088, 1536, 1984, 2624, 3968,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson 8192, 12160, 16384, 20352, 24576, 28544, 32768, 36736,
static int allocb_tryhard_fails;
int dblk_kmem_flags = 0;
int mblk_kmem_flags = 0;
streams_msg_init(void)
int offset;
< PAGESIZE);
mmd_init();
mblk_t *
if (size != 0) {
goto out;
index = 0;
goto out;
out:
return (mp);
mblk_t *
return (mp);
mblk_t *
return (mp);
mblk_t *
return (mp);
while (mp) {
mblk_t *
unsigned char *old_rptr;
return (mp);
return (NULL);
if (copy) {
return (mp1);
mblk_t *
goto out;
goto out;
out:
return (new_mp);
static mblk_t *
goto out;
out:
return (mp);
mblk_t *
if (!str_ftnever) {
return (mp);
mblk_t *
if (!str_ftnever) {
return (mp);
mblk_t *
if (!str_ftnever) {
return (mp);
mblk_t *
if (!str_ftnever) {
return (mp);
mblk_t *
if (!str_ftnever) {
return (mp);
bcache_t *
NULL) {
return (NULL);
return (bcp);
mblk_t *
goto out;
goto out;
out:
return (mp);
static mblk_t *
void *buf;
return (NULL);
return (mp);
mblk_t *
return (bp);
return (NULL);
* than strsubr.c) so that we don't have to expose all of the
mblk_t *
if (size != 0) {
return (mp);
index = 0;
return (NULL);
return (mp);
mblk_t *
return (NULL);
return (mp);
return (bc_id);
if (bcp) {
goto again;
if (pbcp)
mblk_t *
return (NULL);
return (NULL);
return (head);
mblk_t *
return (NULL);
return (NULL);
return (head);
* The alignment of rptr (w.r.t. word alignment) will be the same in the copy
mblk_t *
return (NULL);
return (nbp);
return (NULL);
return (nbp);
mblk_t *
return (NULL);
return (NULL);
return (head);
mblk_t *
return (bp1);
mblk_t *
if (lastp)
return (mp);
ssize_t n;
len -= n;
mblk_t *
ssize_t n;
return (NULL);
return (NULL);
return (NULL);
while (len > 0) {
len -= n;
return (NULL);
return (newmp);
unsigned char type;
ssize_t n;
int fromhead;
int first;
if (len < 0) {
fromhead = 0;
if (fromhead) {
while (len) {
len -= n;
first = 0;
while (len) {
len -= n;
return (count);
mblk_t *
return (bp);
return (bp);
mblk_t *
mblkcnt++;
return (bp);
int backenab = 0;
ASSERT(q);
if (band == 0) {
i = band;
if (backenab == 0) {
if (band != 0)
mblkcnt++;
int backenab = 0;
unsigned char bpri;
q->q_count = 0;
q->q_mblkcnt = 0;
while (mp) {
bpri++;
qbf[0] = 0;
if (backenab) {
if (qbf[0])
backenable(q, 0);
int band;
if (pri == 0) {
q->q_count = 0;
q->q_mblkcnt = 0;
while (mp) {
while (--band > 0)
if (flushed)
q = q->q_nfsrv;
q = q->q_nfsrv;
if (pri == 0) {
while (--pri)
while (*qbpp)
q->q_nband++;
if (!q->q_first) {
if (qbp) {
mblkcnt++;
if (qbp) {
qenable_locked(q);
while (*qbpp)
q->q_nband++;
if (tmp)
if (qbp) {
if (tmp) {
mblkcnt++;
if (qbp) {
qenable_locked(q);
goto badord;
if (emp) {
goto badord;
while (*qbpp)
q->q_nband++;
mblkcnt++;
qenable_locked(q);
queue_t *
q = _OTHERQ(q);
if (q->q_next) {
q = q->q_next;
return (_OTHERQ(q));
return (NULL);
qenable_locked(q);
int count = 0;
count++;
return (count);
int error = 0;
goto done;
if (pri != 0) {
while (*qbpp)
goto done;
q->q_nband++;
i = pri;
switch (what) {
case QHIWAT:
if (qbp)
case QLOWAT:
if (qbp)
case QMAXPSZ:
if (qbp)
if (strmsgsz != 0) {
case QMINPSZ:
if (qbp)
case QSTRUIOT:
if (qbp)
case QCOUNT:
case QFIRST:
case QLAST:
case QFLAG:
done:
return (error);
int error = 0;
goto done;
if (pri != 0) {
while (*qbpp)
goto done;
q->q_nband++;
i = pri;
switch (what) {
case QHIWAT:
if (qbp)
case QLOWAT:
if (qbp)
case QMAXPSZ:
if (qbp)
case QMINPSZ:
if (qbp)
case QCOUNT:
if (qbp)
case QFIRST:
if (qbp)
case QLAST:
if (qbp)
case QFLAG:
if (qbp)
case QSTRUIOT:
if (qbp)
done:
return (error);
* Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a
if (events)
unsigned char *ptr;
int error = 0;
if (stwrq)
switch (typ) {
case STRUIOT_STANDARD:
if (noblock) {
no_trap();
goto out;
if (noblock)
no_trap();
goto out;
if (noblock)
no_trap();
goto out;
out:
return (error);
static boolean_t
return (B_FALSE);
return (B_TRUE);
int (*proc)();
int isread;
int rval;
isread = 0;
return (EINVAL);
return (EINVAL);
return (EINVAL);
return (EINVAL);
goto out;
out:
return (rval);
return (rval);
int (*proc)();
int rval;
return (EINVAL);
return (rval);
return (rval);
#if defined(__sparc)
int disable_putlocks = 0;
q = _OTHERQ(q);
* If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for
* the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's
q = _WR(q);
if (disable_putlocks != 0)
while (_SAMESTR(q)) {
if (stream == 0)
q = q->q_next;
KM_NOSLEEP))) {
str_ftnever++;
ix = 0;
goto cas_good;
* (as is logsubr.c at this comment writing).