wrsmd.c revision 3db86aab554edbb4244c8d1a1c90f152eee768af
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* DLPI driver for RSMPI
*
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/tnf_probe.h>
#ifdef DEBUG
#define DEBUG_WRSMD 1
#define DEBUG_PRINTF 1
#define DEBUG_LOG 1
#endif
#define WRSM_NAME "wrsm"
/*
* Lock hierarchy:
*
* ssp->ss_lock
* wrsmdp->wrsmd_lock
* wrsmddevlock
*
* rd->rd_lock
* rd->rd_xmit_lock
* rd->rd_net_lock
*
* wrsmd->wrsmd_dest_lock
* wrsmd->wrsmd_runq_lock
*
* wrsmdp->wrsmd_ipq_rwlock
* wrsmdp->event_lock;
* wrsmdstruplock
* rd->rd_nlb_lock -- currently never taken while another lock is held
* wrsmdattlock
* wrsmddbglock
*/
/*
* Defining DEBUG_WRSMD on the compile line (-DDEBUG_WRSMD) will compile
* debugging code into the driver. Whether any debug output actually gets
* printed depends on the value of wrsmddbg, which determines the class of
* messages that the user is interested in, and wrsmddbgmode, which
* determines how the user wants the messages to be produced.
*
* See the #defines for D1(), D2(), etc. below for which bits in wrsmddbg
* cause which messages to get printed.
*
* There are two ways debug output may be produced. The code to produce
* these various types is also conditionally compiled, using the following
* symbols:
*
* DEBUG_LOG If this is defined, support for an internal circular
* buffer of log entries is compiled in. The buffer may
* be dumped out by using the wrsmddumplog utility.
* This is currently the preferred trace method.
*
* DEBUG_PRINTF If this is defined, support for kernel debug printfs
* is compiled in. In many cases, this is not very
* useful since the sheer volume of tracing information
* overwhelms the console driver. In particular, if a
* problem causes a panic, you will very often not see
* the last few debugging messages produced before the
* panic, which are probably the ones you really wanted
* to see.
*
* The various types of output are controlled by bits in wrsmddbgmode, as
* follows. Multiple types of output may be used at once, if desired.
*
* (wrsmddbgmode & 1) Use debugging log.
* (wrsmddbgmode & 2) Use kernel printfs.
*/
#ifndef lint
#ifdef DEBUG_WRSMD
int wrsmddbg = 0x100;
int wrsmddbgmode = 0x3;
/* Always print -- at least for now */
#define D0 wrsmddebug
#define D1 \
if (wrsmddbg & 0x01) \
/* Additional function debugging. */
#define D2 \
if (wrsmddbg & 0x02) \
#define D4 \
if (wrsmddbg & 0x08) \
/* Latency timing output. */
#define D5 \
if (wrsmddbg & 0x10) \
/* Excessive debugging output */
#define D6 \
if (wrsmddbg & 0x20) \
/* outgoing packet tossed due to queue overflow */
#define DERR \
if (wrsmddbg & 0x100) \
/* Dumps of incoming packets */
#define D3D \
#else /* DEBUG_WRSMD */
#define D3D(a, b)
#endif /* DEBUG_WRSMD */
#else /* lint */
#ifdef DEBUG_WRSMD
int wrsmddbg;
int wrsmddbgmode;
#endif
#endif /* lint */
/*
* Function prototypes.
*/
static int wrsmdprobe(dev_info_t *);
static void wrsmddodetach(wrsmdstr_t *);
static void wrsmdfqetmo(void *);
static void wrsmdsconntmo(void *);
static void wrsmdacktmo(void *);
static void wrsmdaccepttmo(void * arg);
static void wrsmdfreedestevt(void *);
static void wrsmdteardown_tmo(void * arg);
static void wrsmd_event_thread(void *);
static void wrsmd_process_event(wrsmd_t *);
static void wrsmd_add_event(wrsmd_t *, int, void *);
static int wrsmdisstate(wrsmd_dest_t *, int);
static int wrsmdgetstate(wrsmd_dest_t *);
static void wrsmdsetstate(wrsmd_dest_t *, int);
static void wrsmdsetstate_nosrv(wrsmd_dest_t *, int);
ushort_t);
ushort_t);
/* LINTED: E_STATIC_FUNC_CALLD_NOT_DEFINED */
static void wrsmddebug(const char *, ...);
static void wrsmderror(dev_info_t *, const char *, ...);
static void wrsmdkstatinit(wrsmd_t *);
static void wrsmdtakedown(wrsmd_t *, int);
static void wrsmdsetipq(wrsmd_t *);
static void wrsmdfreebuf(wrsmdbuf_t *);
static void wrsmdputfqe(wrsmd_dest_t *, int);
static void wrsmdsyncfqe(wrsmd_dest_t *);
static void wrsmdsyncdqe(wrsmd_dest_t *);
static int wrsmdavailfqe(wrsmd_dest_t *);
static int wrsmdgetfqe(wrsmd_dest_t *, int *);
static void wrsmdungetfqe(wrsmd_dest_t *, int);
/* LINTED: E_STATIC_FUNC_CALLD_NOT_DEFINED */
#ifdef _DDICT
#endif
/*
* The wrsmd driver implements a reference count scheme for destination
* structures. The idea behind the scheme is to prevent the driver from
* deleting a destination structure while it is being used elsewhere, for
* example in a message handling routine. (Failures to protect against
* this occurrence have led to a fair array of baffling bugs over the
* lifetime of the driver.)
*
* The following set of macros implement the reference count scheme,
* translation from RSM address to destination structure, and removal of
* destinations from the run queue. All must be intertwined, since
* otherwise it would be possible to get a destination pointer from an RSM
* address , or from the run queue, but have some other part of the driver
* delete the destination before you could bump its reference count. The
* solves this race condition.
*/
/*
* FINDDEST attempts to find the destination with RSM address rsm_addr. If the
* destination exists, rd is set to point to it. If the destination exists,
* isdel is set to indicate whether the destination is currently being deleted
* (nonzero implies a delete is in progress). If the destination exists and
* is not being deleted, its reference count is increased by one.
*/
if (rd) \
D6("FINDDEST ctlr %d addr %ld refcnt++ is %d\n", \
} \
}
/*
* MAKEDEST attempts to find the destination with RSM address rsm_addr. If the
* destination exists, rd and isdel are set as in the description of FINDDEST,
* above. If the destination does not exist, a new destination structure is
* allocated and installed, rd is set to point to it, and isnew is set to 1.
*/
if (!(rd)) { \
(isnew) = 1; \
} \
if (rd) \
D6("MAKEDEST ctlr %d addr %ld refcnt++ is %d\n", \
} \
}
/*
* GETRUNQ attempts to return the destination which is at the head of wrsmd's
* run queue. If the run queue is non-empty, the head of the queue is removed,
* and rd is set to point to it; otherwise, rd is set to NULL. If rd is
* nonzero, isdel is set to 1 if the destination pointed to by rd is being
* deleted, or to 0 otherwise. Finally, if rd is nonzero, and isdel is zero,
* then rd's reference count is increased by one.
*/
if (rd) { \
D6("GETRUNQ ctlr %d addr %ld refcnt++ is %d\n", \
wrsmd->wrsmd_ctlr_id, \
(rd)->rd_rsm_addr, \
} \
} \
}
/*
* REFDEST checks to see if the destination pointed to by rd is currently being
* deleted. If so, isdel is set to a nonzero value; otherwise, it is set to
* zero, and the destination's reference count is incremented.
*/
D6("REFDEST ctlr %d addr %ld refcnt++ is %d\n", \
} \
}
/*
* UNREFDEST decrements the reference count of the destination pointed to by
* rd. If the reference count becomes zero, we start the deletion process for
* the destination.
*/
D6("UNREFDEST ctlr %d addr %ld refcnt-- is %d\n", \
} else \
}
/* Local Static def's */
/*
* Lock and variable to allow attach routines to initialize global mutexes
*/
/*
* Linked list of "wrsmd" structures - one per physical device.
*/
/* when we add a device to the list */
/*
* Linked list of active (inuse) driver Streams.
*/
/*
* Our DL_INFO_ACK template.
*/
static dl_info_ack_t wrsmdinfoack = {
DL_INFO_ACK, /* dl_primitive */
MEDIUM_MTU, /* dl_max_sdu */
0, /* dl_min_sdu */
WRSMD_DEVICE_ADDRL, /* dl_addr_length */
DL_ETHER, /* dl_mac_type */
0, /* dl_reserved */
0, /* dl_current_state */
-2, /* dl_sap_length - 2 bytes (short), */
/* second component in DLSAP address */
DL_CLDLS, /* dl_service_mode */
0, /* dl_qos_length */
0, /* dl_qos_offset */
0, /* dl_range_length */
0, /* dl_range_offset */
DL_STYLE2, /* dl_provider_style */
sizeof (dl_info_ack_t), /* dl_addr_offset */
DL_VERSION_2, /* dl_version */
WRSMD_BCAST_ADDRL, /* dl_brdcst_addr_length */
0 /* dl_growth */
};
/*
* use standard ethernet broadcast address - all 1's
*/
static struct ether_addr wrsmdbcastaddr = {
0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};
static struct ether_addr wrsmdbadaddr = {
0xB, 0xAD, 0xB, 0xAD, 0xB, 0xAD
};
static void *wrsmd_state; /* opaque handle for soft state structs */
/*
* ****************************************************************
* *
* B E G I N BASIC MODULE BOILERPLATE *
* *
* ****************************************************************
*/
/* Standard Streams declarations */
static struct module_info wrsmdminfo = {
WRSMDIDNUM, /* mi_idnum */
WRSMDNAME, /* mi_idname */
WRSMDMINPSZ, /* mi_minpsz */
WRSMDMAXPSZ, /* mi_minpsz */
WRSMDHIWAT, /* mi_hiwat */
WRSMDLOWAT, /* mi_lowat */
};
static struct qinit wrsmdrinit = {
0, /* qi_putp */
0, /* qi_srvp */
wrsmdopen, /* qi_qopen */
wrsmdclose, /* qi_qclose */
0, /* qi_qadmin */
&wrsmdminfo, /* qi_minfo */
NULL, /* qi_mstat */
};
static struct qinit wrsmdwinit = {
wrsmdwput, /* qi_putp */
wrsmdwsrv, /* qi_srvp */
0, /* qi_qopen */
0, /* qi_qclose */
0, /* qi_qadmin */
&wrsmdminfo, /* qi_minfo */
NULL, /* qi_mstat */
};
static struct streamtab wrsmd_info = {
&wrsmdrinit, /* st_rdinit */
&wrsmdwinit, /* st_wrinit */
NULL, /* st_muxrinit */
NULL, /* st_muxwrinit */
};
/*
* cb_ops contains the driver entry points and is roughly equivalent
* to the cdevsw and bdevsw structures in previous releases.
*
* dev_ops contains, in addition to the pointer to cb_ops, the routines
* that support loading and unloading our driver.
*
* Unsupported entry points are set to nodev, except for the poll
* routine , which is set to nochpoll(), a routine that returns ENXIO.
*/
static struct cb_ops wrsmd_cb_ops = {
nodev, /* cb_open */
nodev, /* cb_close */
nodev, /* cb_strategy */
nodev, /* cb_print */
nodev, /* cb_dump */
nodev, /* cb_read */
nodev, /* cb_write */
nodev, /* cb_ioctl */
nodev, /* cb_devmap */
nodev, /* cb_mmap */
nodev, /* cb_segmap */
nochpoll, /* cb_chpoll */
ddi_prop_op, /* cb_prop_op */
&wrsmd_info, /* cb_stream */
D_MP, /* cb_flag */
};
DEVO_REV, /* devo_rev */
0, /* devo_refcnt */
ddi_no_info, /* devo_getinfo */
nulldev, /* devo_identify */
wrsmdprobe, /* devo_probe */
wrsmdattach, /* devo_attach */
wrsmddetach, /* devo_detach */
nodev, /* devo_reset */
&wrsmd_cb_ops, /* devo_cb_ops */
};
/*
* Module linkage information for the kernel.
*/
&mod_driverops, /* Type of module. This one is a driver */
"RSMPI DLPI %I% %E%", /* Description */
&wrsmd_ops, /* driver ops */
};
static struct modlinkage modlinkage = {
};
/*
* Module Loading and Installation Routines.
*/
/*
* Module Installation
* Install the driver, initialize soft state system, initialize wrsmdattlock
*/
int
_init(void)
{
int status;
if (status != 0) {
"wrsmd:_init - soft_state_init failed: 0x%x\n", status);
return (status);
}
/* initialize global locks here */
if (status != DDI_SUCCESS) {
}
return (status);
}
/*
* Module Removal
*/
int
_fini(void)
{
int status;
/* LINTED possibly invalid annotation name */
return (status);
}
return (status);
}
/*
* Return Module Info.
*/
int
{
}
/*
* Autoconfiguration Routines
*/
/*
* Probe to see if device exists.
*/
static int
{
return (DDI_PROBE_SUCCESS);
}
/*
* Attach the device, create and fill in the device-specific structure.
*/
static int
{
int instance;
int progress = 0;
if (cmd != DDI_ATTACH) {
"wrsmdattach end; failure 'cmd != DDI_ATTACH'",
return (DDI_FAILURE);
}
/*
* Allocate soft data structure
*/
D1("wrsmdattach: bad state zalloc, returning DDI_FAILURE");
"wrsmdattach end; failure 'ddi_soft_state_zalloc'",
return (DDI_FAILURE);
}
"wrsmdattach end; failure get_soft_state",
return (DDI_FAILURE);
}
/*
* Stuff private info into dip.
*/
/*
* Get device parameters from the device tree and save them in our
* per-device structure for later use.
*/
/*
* Initialize mutexes for this device.
*/
/*
* Initialize kernel statistics.
*/
/*
* Create the filesystem device node.
*/
D1("wrsmdattach: bad create_minor_node, returning "
"DDI_FAILURE");
"wrsmdattach end; failure 'ddi_create_minor_node'",
return (DDI_FAILURE);
}
/*
* Link this per-device structure in with the rest.
*/
/*
* Update our idea of the smallest buffer size seen so far. We do this
* because many clients do a get_info request before they've attached
* to a particular piece of hardware (ie, PPA). We need to have
* something to give them for the MTU, and giving them a value that's
* bigger than the one used by the device they eventually attach to
* causes problems.
*/
/*
* Start up event thread for this wrsmd device.
* This seems like as good a place as any...
*/
D1("wrsmdattach: returning DDI_SUCCESS");
return (DDI_SUCCESS);
}
/*
* Detach - Free resources allocated in attach
*/
/*ARGSUSED*/
static int
{
int instance;
if (cmd != DDI_DETACH) {
"wrsmddetach end; failure 'cmd != DDI_DETACH'",
return (DDI_FAILURE);
}
"wrsmddetach end; failure get_soft_state",
return (DDI_FAILURE);
}
/*
* The teardown timeout now reschedules itself, so we
* have to go to great lengths to kill it.
*/
while (tmoid) {
/*
* A timeout is scheduled to teardown the
* device. Cancel, as we intend to do this now.
*/
/*
* untimeout guarantees the either the function was
* cancelled, or it has completed. If timeout was
* cancelled before the function ran, the timout id will
* not have changed.
*/
wrsmdp->wrsmd_teardown_tmo_id = 0;
}
/*
* If we can't release all destination and RSMPI resources, we can't
* detach. The user will have to try later to unload the driver.
*/
if (wrsmduninit(wrsmdp) != 0) {
"wrsmddettach end; failure 'wrsmduninit'",
if (tmoid_orig) {
/* restart cancelled timeout */
}
return (DDI_FAILURE);
}
/*
* Release all our resources. At this point, all attachment
* setup must have completed, so must all be torn down.
*/
return (DDI_SUCCESS);
}
/*
* Undo tasks done by wrsmdattach(), either because we're detaching or because
* attach() got partly done then failed. progress is a bitmap that tells
* us what has been done so far.
*/
static void
int progress) /* Mask of RSMPI_ATT_xxx values */
{
int instance;
D1("wrsmdtakedown: wrsmdp 0x%p (ctlr %d), progress 0x%x",
if (progress & WRSMD_ATT_EVT_THREAD) {
}
if (progress & WRSMD_ATT_LINKED) {
else {
break;
}
}
progress &= ~WRSMD_ATT_LINKED;
}
if (progress & WRSMD_ATT_KSTAT) {
progress &= ~WRSMD_ATT_KSTAT;
}
if (progress & WRSMD_ATT_MINOR) {
progress &= ~WRSMD_ATT_MINOR;
}
if (progress & WRSMD_ATT_MUTEX) {
progress &= ~WRSMD_ATT_MUTEX;
}
D1("wrsmdtakedown: returning DDI_SUCCESS");
}
/*
* Determine the device ipq pointer after a state change. The device ipq
* pointer is basically a performance hack; it is set to one of our attached
* queues if, and only if, (a) that queue is the only one which has bound to
* IP's SAP, i.e., has expressed interest in getting IP packets; and (b) there
* is no stream attached to us which has gone into any sort of promiscuous mode,
* i.e., has expressed interest in getting all packets. The performance win
* comes when ipq is set; if it is, we can just send all incoming IP packets
* to that queue without having to traverse the entire list of queues attached
* to us.
*/
static void
{
/*
* Take ipq writer lock to prevent the fastpath from using the
* wrong ipq. Note: must take prior to taking struplock.
*/
break;
break;
} else {
break;
}
}
}
}
/*
* Hook a new stream onto the driver. We create a wrsmdstr structure for the
* new stream, and, if this is a clone open, allocate an unused minor device
* number for it.
*/
/*ARGSUSED*/
static int
{
int rc = 0;
D1("wrsmdopen: rq 0x%p, *dev 0x%lx, flag %d, sflag %d",
/*
* Serialize all driver open and closes.
*/
/*
* Determine minor device number.
*/
prevssp = &wrsmdstrup;
minordev = 0;
break;
minordev++;
}
} else
/*
* Link new entry into the list of active entries.
*/
}
return (rc);
}
/*
* Unhook a stream from the driver. If it was attached to a specific physical
* device, detach it from the device, then remove it from our list of streams.
*/
/*ARGSUSED*/
static int
{
/* Detach Stream from interface */
/* Unlink the per-Stream entry from the active list and free it */
break;
D1("wrsmdclose: returning 0");
return (0);
}
/*
* ****************************************************************
* *
* E N D BASIC MODULE BOILERPLATE *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N STATUS REPORTING STUFF *
* *
* ****************************************************************
*/
/*
* This routine makes the data in our kernel statistics structure reflect
* the current state of the device; it's called whenever a user requests
* the kstat data. Basically, all we do is copy the stats from the RSMPI
* controller structure, where they're maintained, to the kstat's data
* portion.
*/
static int
int rw) /* Indicates read or write (we don't support write) */
{
if (rw == KSTAT_WRITE)
return (EACCES);
return (0);
}
/*
* This routine initializes the kernel statistics structures for an
* WRSMD device.
*/
static void
{
/*
* We create a kstat for the device, then create a whole bunch of
* named stats inside that first kstat.
*/
sizeof (kstat_named_t), 0)) == NULL) {
return;
}
/*
* The first five named stats we create have well-known names, and are
* used by standard SunOS utilities (e.g., netstat). (There is actually
* a sixth well-known stat, called "queue", which we don't support.)
*/
/*
* MIB II kstat variables
*/
/*
* PSARC 1997/198
*/
/*
* The remainder of the named stats are specific to our driver, and
* are extracted using the kstat utility.
*/
}
/*
* This routine removes any kstats we might have created.
*/
static void
{
}
static void
{
}
static void
{
}
/* Dump detailed information about the destination entry */
static void
{
int found;
volatile wrsmd_fqe_t *fqep;
volatile wrsmd_dqe_t *dqep;
int isdel;
found = 0;
found = 1;
D0("rsmaddr %ld\n",
D0("wrsmd_runq: ");
D0("sd 0x%p (%ld, %ld): state (%d, 0x%x, %d) ref %d "
"nlb %d nlb_del %d\n",
D0(" numlbufs %d nlb_del %d rbuflen %d "
"numrbuf %d, queueh %lx tail %lx\n",
if (isdel) continue; /* No need to UNREFDEST */
D0(" XMT: queue_len %d (max %d), tmo_int %d tmo_tot %d "
"(max %d) tmo_id %lx\n",
D0(" FQR: cached %d fqr_seq %04x size %d, fqr f/l/n "
"= %lx %lx %lx\n",
do {
D0("* ");
}
D0("\n shadow DQ: dqeq f/l/i/o = %lx %lx %lx %lx\n",
do {
D0("* ");
}
D0("\n remote DQ: dqw_seq %04x size %d, "
"dqw f_off = %lx\n",
D0("\n\n RCV: rd_rbufoff %08lx, rbuflen %d, numrbuf %d\n",
D0(" DQR: dqr_seq = %04x size = %d, dqr f/l/n = "
"%lx %lx %lx\n",
do {
D0("* ");
}
D0("\n shadow FQE: shdwfqw f/l/i/o = %lx %lx %lx %lx\n",
do {
D0("* ");
}
D0("\n remote FQ: fqw_seq %04x size %d, fqw f = %lx\n",
}
rsm_addr);
}
/* Dump summary information about all destination entries */
static void
dump_ioctl(void)
{
int dest;
D0("..head of wrsmd structure list, wrsmddev: 0x%lx\n",
D0(" next wrsmd pointer, wrsmd_nextp: 0x%lx\n",
D0(" dev info pointer, wrsmd_dip: 0x%lx, ipq 0x%lx\n",
D0(" rsmaddr %ld\n",
continue;
D0(" rd 0x%p (%d, %ld): state (%d, "
"0x%x, %d) ref %d nlb %d nlb_del %d\n",
}
}
}
/*
* Print an error message to the console.
*/
static void
const char *fmt, /* Format of output */
...) /* Parameters for output */
{
char name[16];
char buf[1024];
if (dip) {
} else {
}
}
#ifdef DEBUG_WRSMD
#ifdef DEBUG_LOG
/*
* The following variables support the debug log buffer scheme.
*/
int wrsmddbgnext; /* Next byte to write in buffer (note */
/* this is an index, not a pointer */
int wrsmddbginit = 0; /* Nonzero if wrsmddbglock's inited */
/*
* Add the string str to the end of the debug log, followed by a newline.
*/
static void
wrsmddbglog(char *str)
{
/*
* If this is the first time we've written to the log, initialize it.
*/
if (!wrsmddbginit) {
if (!wrsmddbginit) {
NULL);
wrsmddbgnext = 0;
wrsmddbginit = 1;
}
}
/*
* Note the log is circular; if this string would run over the end,
* we copy the first piece to the end and then the last piece to
* the beginning of the log.
*/
if (remlen)
wrsmddbgnext = 0;
}
wrsmddbgnext += length;
if (wrsmddbgnext >= sizeof (wrsmddbgbuf))
wrsmddbgnext = 0;
}
#endif
/*
* Add a printf-style message to whichever debug logs we're currently using.
*/
static void
wrsmddebug(const char *fmt, ...)
{
char buf[512];
#ifdef DEBUG_LOG
if (wrsmddbgmode & 0x1)
#endif
#ifdef DEBUG_PRINTF
if (wrsmddbgmode & 0x2)
#endif
}
/*
* Debugging routine, dumps data in hex.
*/
static void
int length) /* Bytes to dump */
{
int bytesonline;
int offset;
char *lineptr;
char line[80];
bytesonline = 0;
offset = 0;
while (length) {
if (bytesonline == 0) {
}
length--;
lineptr += 3;
bytesonline++;
if (bytesonline >= 16) {
*lineptr = '\0';
bytesonline = 0;
offset += 16;
}
}
if (bytesonline) {
*lineptr = '\0';
}
}
#endif
/*
* ****************************************************************
* *
* E N D STATUS REPORTING STUFF *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N BASIC STREAMS OPERATIONS *
* *
* ****************************************************************
*/
/*
* Process a new message being sent down one of our streams.
*/
static int
{
case M_DATA:
/*
* This message is a raw data item. Most messages
* end up in this case.
*/
/*
* It is possible that an interrupt thread handling
* incoming packets has taken wrsmdstruplock or
* ipq_rwlock, sent a packet upstream (usually to
* ar), then looped back down to here. Meanwhile,
* a separate thread could be attempting to modify
* the ipq shortcut, which first takes
* ssp->ss_lock, then takes wrsmdstriplock. This
* causes a deadlock. Avoid this by enqueueing the
* message if the ssp->ss_lock can't be taken
* immediately.
*/
break;
}
/* If we're not supposed to get raw data, toss it. */
(WRSMD_SLRAW | WRSMD_SLFAST)) == 0) ||
"wrsmdwput end; type M_DATA",
break;
}
/*
* If any msgs already enqueued or the interface will
* loop back up the message (due to wrsmd_promisc),
* then enqueue the msg. (Can't handle promiscuous
* here because it takes wrsmdstruplock, which might
* cause a recursive rw_enter.) Otherwise just xmit it
* directly.
*/
} else {
sap, 1);
}
"wrsmdwput end; type M_DATA",
break;
case M_PROTO:
case M_PCPROTO:
/*
* This message is a DLPI control message. In
* almost all cases, we just put this on the queue
* for the service routine to process. Why?
* Basically, because processing of some of the
* internal locks that are also held across
* upstream putnext calls. For instance,
* wrsmdread() holds wrsmdstruplock and may
* hold wrsmdp->wrsmd_ipq_rwlock when it
* calls putnext(). In some cases, IP's or
* TCP's put routine, which was called from
* putnext() could immediately loop back, do a
* downward putnext() of a M_PROTO message, and end
* up right here. If we were then to try and
* process that message, we could try to obtain
* wrsmdstruplock or wrsmd_ipq_rwlock, which we
* already have, thus leading to a recursive
* mutex_enter panic.
*
* To prevent this, we put the M_PROTO message on
* the service routine's queue. When the service
* routine runs, it will be in a different context
* which can safely acquire the appropriate locks.
*/
"wrsmdwput end; type M_PROTO",
break;
case M_IOCTL:
/*
* ARP may do a downward putnext() of an M_IOCTL
* stream in response to an ack sent upstream
* by this module while holding internal locks.
* As described above, we avoid a recursive mutex
* enter by handling it in the service routine.
* We do an immediate nak for unrecognized ioctls.
*/
"wrsmdwput end; type M_IOCTL",
break;
case M_FLUSH:
/*
* This message is asking us to flush our queues,
* probably in preparation for taking down the
* stream.
*/
}
else
"wrsmdwput end; type M_FLUSH",
break;
default:
"wrsmdwput end; type unknown",
break;
}
D1("wrsmdwput: returning 0");
return (0);
}
/*
* Write service routine. This routine processes any messages put on the queue
* via a putq() in the write put routine. It also handles any destinations put
* on the destination run queue.
*/
static int
{
int isdel;
/*
* Process message queue.
*/
case M_DATA:
D5("wrsmdwsrv: got data time 0x%llx",
gethrtime());
if (wrsmdp) {
} else
"wrsmdwsrv qcount; type M_DATA",
break;
case M_PROTO:
case M_PCPROTO:
D5("wrsmdwsrv: got proto time 0x%llx",
gethrtime());
"wrsmdwsrv qcount; type M_PROTO",
break;
case M_IOCTL:
/*
* This message is an ioctl.
* We do not hold locks around the whole ioctl
* processing, as the holding of locks across a
* qreply() of ack or nak is a violation of the
*/
"wrsmdwsrv msg; type M_IOCTL",
break;
default: /* nothing is working at ths point */
"wrsmdwsrv qcount; type unknown",
ASSERT(0);
break;
}
}
/*
* Traverse list of scheduled destinations, looking for work to do
*/
D1("wrsmdwsrv: wrsmdp NULL, returning 0");
return (0);
}
/*
* rd's refcnt is incremented by GETRUNQ
*/
while (rd) {
if (isdel) {
D2("wrsmdwsrv: dest 0x%p being deleted, ignored",
(void *)rd);
continue;
}
delete = 0;
D5("wrsmdwsrv: running state %s time 0x%llx",
switch (oldstate) {
case WRSMD_STATE_S_XFER: {
if (rd->rd_queue_h)
else
break;
}
case WRSMD_STATE_S_REQ_CONNECT: {
delete = 1;
}
break;
}
case WRSMD_STATE_S_NEWCONN: {
delete = 1;
}
break;
}
case WRSMD_STATE_S_CONNXFER_ACCEPT: {
delete = 1;
}
break;
}
case WRSMD_STATE_S_CONNXFER_ACK: {
delete = 1;
}
break;
}
/*
* Delete this connection. This causes a message
* to be sent to the remote side when RSM_SENDQ_DESTROY
* is called, so there is no need to send an additional
* message.
*/
case WRSMD_STATE_S_DELETE: {
delete = 1;
break;
}
/*
* Retry the SCONN.
*/
case WRSMD_STATE_S_SCONN: {
delete = 1;
}
break;
}
default:
D1("wrsmd: bad state %s in wsrv "
break;
}
if (delete)
}
D1("wrsmdwsrv: returning 0");
return (0);
}
/*
* Discard all messages queued for output to this destination, updating
* error statistics as appropriate.
*/
static void
{
D1("wrsmddumpqueue: wrsmdp 0x%p (ctlr %d), rd 0x%p (addr %ld)",
(void *)wrsmdp,
"wrsmddumpqueue start",
rd->rd_queue_len = 0;
while (mp) {
wrsmdp->wrsmd_oerrors++;
}
D1("wrsmddumpqueue: done");
}
/*
* Execute an ioctl request from the service routine.
*/
static void
{
case DLIOCRAW: /* raw M_DATA mode */
D1("wrsmdioctl: DLIOCRAW");
break;
case DL_IOC_HDR_INFO: /* M_DATA "fastpath" info request */
D1("wrsmdioctl: DL_IOC_HDR_INFO");
break;
case WRSMD_DUMP_IOCTL:
dump_ioctl();
break;
case WRSMD_DUMP_DEST:
sizeof (dest))) {
break;
}
break;
default:
break;
}
D1("wrsmdioctl: done");
}
/*
* Execute an immediate ioctl request from the put routine.
* Does not take any locks. Returns FALSE if not handled immediately.
*/
static int
{
case DLIOCRAW: /* raw M_DATA mode */
case DL_IOC_HDR_INFO: /* M_DATA "fastpath" info request */
case WRSMD_DUMP_IOCTL:
case WRSMD_DUMP_DEST:
/* handle from the service routine */
return (B_FALSE);
default:
break;
}
D1("wrsmdioctlimmediate: done");
return (B_TRUE);
}
/*
* M_DATA "fastpath" info request.
* Following the M_IOCTL mblk should come a DL_UNITDATA_REQ mblk. We ack with
* an M_IOCACK pointing to the original DL_UNITDATA_REQ mblk, followed by an
* mblk containing the raw medium header corresponding to the destination
* address. Subsequently, we may receive M_DATA msgs which start with this
* header and may send up M_DATA msgs containing the network-layer data.
* This is all selectable on a per-Stream basis.
*/
static void
{
struct ether_header *headerp;
int minsize;
/*
* Sanity check the request.
*/
DL_UNITDATA_REQ) ||
D1("wrsmd_dl_ioc_hdr_info: bad req, done");
return;
}
/*
* Sanity check the DL_UNITDATA_REQ destination address
* offset and length values.
*/
D1("wrsmd_dl_ioc_hdr_info: bad addr, done");
return;
}
/*
* Allocate a new mblk to hold the medium header.
*/
== NULL) {
D1("wrsmd_dl_ioc_hdr_info: ENOMEM, done");
return;
}
/*
* Fill in the medium header.
*/
&(headerp->ether_shost));
/*
* Link new mblk in after the "request" mblks.
*/
D1("wrsmd_dl_ioc_hdr_info: done");
}
/*
* ****************************************************************
* *
* E N D BASIC STREAMS OPERATIONS *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N DLPI OPERATIONS *
* *
* ****************************************************************
*/
/*
* Parse and execute a DLPI request.
*/
static void
{
union DL_primitives *dlp;
/* Make sure we at least have dlp->dl_primitive */
return;
}
switch (prim) {
case DL_UNITDATA_REQ:
"wrsmdproto prim",
break;
case DL_ATTACH_REQ:
"wrsmdproto prim",
break;
case DL_DETACH_REQ:
"wrsmdproto prim",
break;
case DL_ENABMULTI_REQ:
"wrsmdproto prim",
/* Accept enable-multicast-request */
break;
case DL_DISABMULTI_REQ:
"wrsmdproto prim",
/* Accept disable-multicast-request */
break;
case DL_BIND_REQ:
"wrsmdproto prim",
break;
case DL_UNBIND_REQ:
"wrsmdproto prim",
break;
case DL_INFO_REQ:
"wrsmdproto prim",
break;
case DL_PROMISCON_REQ:
"wrsmdproto prim",
break;
case DL_PROMISCOFF_REQ:
"wrsmdproto prim",
break;
case DL_PHYS_ADDR_REQ:
"wrsmdproto prim",
break;
default:
"wrsmdproto prim",
break;
}
D1("wrsmdproto: done");
}
/*
* START OF GENERIC DLPI INTERFACE ROUTINES
*/
/*
* DLPI attach request (attach stream to physical device)
*
* The PPA is the RSM controller id, which equals the DLPI device instance
* number.
*/
static void
{
union DL_primitives *dlp;
D1("wrsmdareq: bad size, done");
return;
}
D1("wrsmdareq: bad state, done");
return;
}
/*
* Valid ppa?
*/
D1("wrsmdareq: bad ppa, done");
return;
}
break;
}
}
/* when qassociate() succeeds, ppa must be present */
/*
* The teardown timeout now reschedules itself, so we
* have to go to great lengths to kill it.
*/
while (tmoid) {
/*
* A timeout is scheduled to teardown the device -
* cancel it, as device is once again in use.
*/
/*
* untimeout guarantees the either the function was
* cancelled, or it has completed. If timeout was
* cancelled before the function ran, the timout id will
* not have changed.
*/
wrsmdp->wrsmd_teardown_tmo_id = 0;
}
/*
* Has WRSMD device (RSM controller) been initialized? Do so if
* necessary.
*/
DL_INITFAILED, 0);
D1("wrsmdareq: init failed, done");
/* dissociate on failure */
return;
}
}
wrsmdp->wrsmd_promisc++;
/*
* Save pointer to this queue if this destination doesn't already
* have one
*/
/*
* Set link to WRSMD device (RSM controller) and update our state.
*/
D1("wrsmdareq: done");
}
/*
* DLPI detach request (detach stream from physical device)
*/
static void
{
D1("wrsmddreq: bad size, done");
return;
}
D1("wrsmddreq: bad state, done");
return;
}
D1("wrsmddreq: done");
}
/*
* Detach a Stream from an interface.
*/
static void
{
/*
* Need to protect this assignment with wrsmd_lock mutex in case
* of concurrent execution of detach for different streams, to avoid
* detaching device structure until all streams detached.
*/
wrsmdp->wrsmd_promisc--;
/*
* Detach from device structure.
* Uninit the device when no other streams are attached to it.
*/
break;
if (tslp)
else
/* Make sure teardown only scheduled once. */
if (wrsmdp->wrsmd_attached_streams == 0) {
/*
* Schedule a teardown. This allows queues to destinations
* through this controller to drain, and keeps the data
* structures around in case a new connection to this
* device is about to occur.
*/
}
D1("wrsmddodetach: done");
}
/*
* DLPI bind request (register interest in a particular address & SAP)
*/
static void
{
union DL_primitives *dlp;
D1("wrsmdbreq: bad size, done");
return;
}
D1("wrsmdbreq: bad state, done");
return;
}
if (xidtest) {
D1("wrsmdbreq: bad xidtest, done");
return;
}
return;
}
D1("wrsmdbreq: bad sap, done");
return;
}
/*
* Save SAP value for this Stream and change state.
*/
WRSMD_DEVICE_ADDRL, 0, 0);
D1("wrsmdbreq: done");
}
/*
* DLPI unbind request (cancel interest in a particular local address & SAP)
*/
static void
{
D1("wrsmdubreq: bad size, done");
return;
}
D1("wrsmdubreq: bad state, done");
return;
}
D1("wrsmdubreq: done");
}
/*
* DLPI device information request
*/
static void
{
void *dlbcastap;
D1("wrsmdireq: bad size, done");
return;
}
/*
* Exchange current msg for a DL_INFO_ACK.
*/
D1("wrsmdireq: bad mexchange, done");
return;
}
/*
* Fill in the DL_INFO_ACK fields and reply.
*/
*dlip = wrsmdinfoack;
/*
* fill in the local DLSAP address, if connected to a controller
*/
dlip->dl_max_sdu =
} else {
ASSERT(wrsmdminbuflen != 0);
}
/*
* fill in the broadcast address; it's at least short aligned
*/
D1("wrsmdireq: done");
}
/*
* DLPI enable promiscuous mode request
*
* We only snoop and deliver messages that are generated by this node
* or received by this mode. Unlike promiscuous mode on a bus-based
* network, we do not see (and therefore cannot deliver) messages
* destined for other nodes.
*/
static void
{
D1("wrsmdponreq: bad size, done");
return;
}
case DL_PROMISC_PHYS:
}
}
break;
case DL_PROMISC_SAP:
break;
default:
DL_NOTSUPPORTED, 0);
D1("wrsmdponreq: option not supported, done");
return;
}
D1("wrsmdponreq: done");
}
/*
* DLPI disable promiscuous mode request
*/
static void
{
int flag;
D1("wrsmdpoffreq: bad size, done");
return;
}
case DL_PROMISC_PHYS:
break;
case DL_PROMISC_SAP:
break;
default:
DL_NOTSUPPORTED, 0);
D1("wrsmdpoffreq: option not supported, done");
return;
}
D1("wrsmdpoffreq: mode not on, done");
return;
}
}
D1("wrsmdpoffreq: done");
}
/*
* DLPI get physical address request
*
* Return the PPA (RSM hardware address) of the WRSMD device (RSM controller)
* to which this stream is attached.
*/
static void
{
union DL_primitives *dlp;
D1("wrsmdpareq: bad size, done");
return;
}
D1("wrsmdpareq: bad state, done");
return;
}
switch (type) {
case DL_FACT_PHYS_ADDR:
case DL_CURR_PHYS_ADDR:
D1("wrsmdpareq: done");
return;
default:
DL_NOTSUPPORTED, 0);
D1("wrsmdpoffreq: option not supported, done");
return;
}
}
/*
* DLPI unit data send request
*/
static void
{
register dl_unitdata_req_t *dludp;
D1("wrsmdudreq: bad state, done");
return;
}
/*
* Validate destination address format.
*/
DL_BADADDR, 0);
#ifdef DEBUG_WRSMD
D2("wrsmdudreq bad addr: ADDRL %ld addr len %d, rsm addr 0x%lx "
"sap 0x%x",
D1("wrsmdudreq: bad addr, done");
#endif
return;
}
/*
* Error if no M_DATA follows.
*/
DL_BADDATA, 0);
D1("wrsmdudreq: bad data, done");
return;
}
/* Extract address information. */
/* Discard DLPI header. */
/*
* Transmit message.
*/
D1("wrsmdudreq: done");
}
/*
* ****************************************************************
* *
* E N D DLPI OPERATIONS *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N HIGH LEVEL PROTOCOL INTERFACE AND UTILITIES *
* *
* ****************************************************************
*/
/*
* An outgoing raw packet has a header at the beginning, telling us the
* destination address and SAP. Since this header is not used by the
* Wildcat hardware in this form, we call it the "fake hardware header".
*
* This routine parses the fake hardware header at the start of mp, strips
* the header from mp then returns address and sap. It returns a pointer
* to the stripped mblk, which may or may not be the same pointer which was
* passed in, or NULL if there's no data to send.
*/
static mblk_t *
{
/*
* Parse header; it's at least short aligned.
*/
/* Strip off header */
/*
* If there's nothing left in this mblk, and there are more
* following, get rid of it. If there's nothing left, and
* there aren't more following, we have a zero-length
* message; return an error. (A following mblk might
* conceivably be empty, giving us a zero-length message as
* well; we don't check for this.)
*/
D1("wrsmdstrip: returning 1");
return (nmp);
} else {
D1("wrsmdstrip: returning 0");
return (NULL);
}
} else {
D1("wrsmdstrip: returning 1");
return (mp);
}
} else {
D1("wrsmdstrip: returning 0");
return (NULL);
}
}
/*
* Queue the message to the proper destination structure, creating the
* destination if necessary. Discard the message if required.
* If from_put is true, and there are no other messages queued to this
* destination or being transmitted, start this message transmitting.
* Schedule destination for service, if necessary. The function returns
* true is message was successfully queued.
*/
{
int isdel = 0;
int isnew = 0;
if (copy) {
if (!mp)
return (B_FALSE);
} else {
}
/* Find destination structure for this message */
if (isdel) {
wrsmdp->wrsmd_oerrors++;
if (copy) {
}
DERR("wrsmdqueuemsg: TOSS!!! dest being deleted, "
"toss packet, done");
"wrsmdqueuemsg end; failure destbeingdel",
return (B_FALSE);
} else if (isnew) {
wrsmdp->wrsmd_oerrors++;
if (copy) {
}
DERR("wrsmdqueuemsg: TOSS!!! can't mkdest, "
"toss packet, done");
"wrsmdqueuemsg end; failure cantmkdest",
return (B_FALSE);
}
}
/* if state was new, move to req_connect */
/* Make sure we don't have too many queued already */
if (rd->rd_queue_len >=
if (copy) {
}
wrsmdp->wrsmd_oerrors++;
DERR("wrsmdqueuemsg: TOSS!!! too many queued (%d), "
"toss packet, done",
rd->rd_queue_len);
"wrsmdqueuemsg end; failure 2manyqueued",
return (B_FALSE);
}
else
rd->rd_queue_len++;
/*
* Since we're making a singly-linked list of mblks hanging off the
* destination structure, we can get away with stashing the destination
* SAP in the b_prev pointer of the mblk. This is pretty disgusting,
* but is much more efficient than the alternative: allocating a new
* structure with space for the SAP and a pointer to the mblk, and
* making a list of those instead.
*/
wrsmdp->wrsmd_starts++;
if (from_put) {
} else
if (wrsmdavailfqe(rd)) {
rd->rd_fqe_tmo_id = 0;
rd->rd_tmo_int = 0;
if (tmoid)
if (from_put) {
} else
} else {
/*
* no FQEs available, return to waiting state
*/
}
}
return (B_TRUE);
}
/*
* Verify whether this is a valid message. Determine whether this is a
* broadcast message; send message to each recipient. Handle promiscuous
* mode.
*/
static void
int from_put)
{
int i;
D1("wrsmdstart: wrsmdp 0x%p (cltr %d), mp 0x%p, rsmaddr %ld, sap 0x%x, "
/* Make sure we're not sending to ourselves */
wrsmdp->wrsmd_oerrors++;
DERR("wrsmdstart: TOSS!!! sending to ourselves, toss packet, "
"done");
"wrsmdstart end; failure sendtoself",
return;
}
/* Make sure message is contiguous in memory */
wrsmdp->wrsmd_oerrors++;
DERR("wrsmdstart: TOSS!!! can't pullup message, "
"toss packet, "
"done");
return;
}
wrsmdp->wrsmd_pullup++;
}
/* Make sure message isn't too big */
wrsmdp->wrsmd_oerrors++;
DERR("wrsmdstart: TOSS!!! message too big, toss packet, done");
"wrsmdstart end; failure msgtoobig",
return;
}
/*
* Send message to each addressee (normally there is just one).
*/
/* Loop message back up if we're in promiscuous mode */
if (wrsmdp->wrsmd_promisc) {
}
/*
* handle broadcast and multicast messages
*/
D1("wrsmdstart: broadcast message; collect peers");
D1("wrsmdstart: cannot collect list of peers "
"for broadcast");
wrsmdp->wrsmd_oerrors++;
return;
}
/*
* Make a copy of the message for all but the last
* recipient. Don't broadcast to the local node.
*/
for (i = 0; i < num_addrs; i++) {
continue;
/*
* If this is the last node or if this is the
* second to last and the last is ourselves,
* don't make a copy of the message.
*/
if ((i == (num_addrs - 1)) ||
((i == (num_addrs - 2)) &&
} else {
}
}
}
} else {
}
}
D1("wrsmdstart: done");
}
/*
* These macros are used in several places in wrsmdsendup().
*/
/*
* Return 1 if the stream pointed to by wrsmdstr is connected to wrsmdp and
* interested in this sap.
*/
/*
* Return 1 if the stream pointed to by wrsmdstr is connected to wrsmdp,
* interested in all saps, and in physical promiscuous mode.
*/
/*
* Do appropriate processing to send message msg up stream wrsmdstr.
* "name" is the name of the calling routine, used for debugging messages;
* to, from and sap are the packet's to address, from address, and SAP.
*/
(msg)); \
} \
}
/*
* Send packet upstream.
*/
static void
{
D1("wrsmdsendup: wrsmdp 0x%p (cltr %d), mp 0x%p, to %ld, from %ld, "
/*
* While holding a reader lock on the linked list of streams
* structures, attempt to match the address criteria for each stream
* and pass up the DL_UNITDATA_IND.
*/
/*
* This is pretty tricky. If there are multiple streams that want
* this packet, we have to make a new copy for all but one of them
* (we can send the original packet up one of the streams). However,
* if we do things the straightforward way:
*
* while (stream wants packet)
* newmsg = copy (msg);
* send newmsg up stream
* go to next stream
* free oldmsg
*
* we end up always doing a copy, even if (as is usually the case)
* the packet only goes to one stream. This is bad. Thus what we do
* is the following:
*
* Find a stream that wants this packet. If there aren't any,
* we're done.
* For each other stream that wants this packet, make a copy of
* it and send it up.
* Finally, send the original packet up the first stream we found.
*/
break;
}
if (ssp) {
D1("wrsmdsendup nssp->ss_sap 0x%x flags 0x%x",
}
}
/*
* Do the last one.
*/
sap);
"");
} else
} else
D1("wrsmdsendup: done");
}
/*
* Send outgoing packet upstream to promiscuous mode readers. This routine
* is an exact duplicate of wrsmdsendup(), above, except that we use
* WRSMDPROMMATCH instead of WRSMDSAPMATCH. (The difference is that
* the latter only selects streams which are in promiscuous mode; this
* keeps IP from getting its own packets back, since we don't check the
* destination addresses when sending packets upstream.)
*/
static void
{
D1("wrsmdpromsendup: wrsmdp 0x%p (cltr %d), mp 0x%p, to %ld, from %ld, "
/*
* While holding a reader lock on the linked list of streams structures,
* attempt to match the address criteria for each stream
* and pass up the DL_UNITDATA_IND.
*/
/*
* See explanation above of why this is somewhat less than
* straightforward.
*/
break;
}
if (ssp) {
/*
* Do the last one.
*/
} else
} else
D1("wrsmdpromsendup: done");
}
/*
* Prefix msg with a DL_UNITDATA_IND mblk and return the new msg. If we
* can't, free the msg and return NULL.
*/
static mblk_t *
{
D1("wrsmdaddudind: wrsmdp 0x%p (cltr %d), mp 0x%p, to %ld, from %ld, "
/*
* Allocate an M_PROTO mblk for the DL_UNITDATA_IND.
* own headers as well.
*/
wrsmdp->wrsmd_ierrors++;
D1("wrsmdaddudind: bad allocb, returning NULL");
return (NULL);
}
/*
* Construct a DL_UNITDATA_IND primitive.
*/
dludindp->dl_group_address = 0;
/* plug in dest addr */
/* plug in src addr */
dlap = (wrsmddladdr_t *)
/*
* Link the M_PROTO and M_DATA together.
*/
D1("wrsmdaddudind: new header follows");
return (nmp);
}
/*
* Prefix msg with a "fake hardware header" (either in-place, or in a
* separate mblk)and return the new msg. If we can't, free the msg and
* return NULL.
*/
static mblk_t *
{
struct ether_header *headerp;
D1("wrsmdaddhdr: wrsmdp 0x%p (cltr %d), mp 0x%p, to %ld, from %ld, "
/*
* Create link-level header by either prepending it onto the
* data if possible, or allocating a new mblk if not.
*/
} else {
/* Allocate an M_DATA mblk for the header. */
wrsmdp->wrsmd_ierrors++;
D1("wrsmdaddhdr: bad allocb, returning NULL");
return (NULL);
}
}
/*
* Fill in header. It is at least short aligned.
*/
return (mp);
}
/*
* Callback routine, called when an desballoc'ed buffer is eventually freed.
*/
static void
{
/*
* Find out if this is the last outstanding buffer, and whether we're
* being deleted.
*/
/*
* If we're being deleted, we don't put this buffer on the free queue.
* Also, if we're being deleted, and this was the last outstanding
* buffer, we do an UNREF. Otherwise we send this buffer to the other
* system for reuse.
*/
if (delflg) {
if (zerflg)
} else {
}
D1("wrsmdfreebuf: done");
}
/*
* wrsmdread() takes the packet described by the arguments and sends it
* upstream.
*/
static int
int bufnum, /* Index of buffer containing packet */
int offset, /* Offset of packet within buffer */
int length, /* Length of packet */
{
int buffree = 0;
D1("wrsmdread: rd 0x%p, bufnum %d, offset %d, length %d, sap 0x%x",
/* Figure out if we can loan this buffer up or not */
}
if (canloan) {
/*
* We make the mblk cover the whole buffer in case anybody
*/
buffree = 1;
wrsmdp->wrsmd_ierrors++;
D1("wrsmdread: can't desballoc, done");
"wrsmdread end; failure desballoc",
return (1);
}
wrsmdp->wrsmd_lbufs++;
} else {
/*
* We make the destination (within the new mblk) have the
* same address mod 64 as our source, so that the kernel
* bcopy is as efficient as possible. (This is a sun4u
*/
if (mp) {
if (dstoffset < 0)
dstoffset += 0x40;
length);
wrsmdp->wrsmd_nlbufs++;
buffree = 1;
} else {
buffree = 1;
wrsmdp->wrsmd_ierrors++;
D1("wrsmdread: can't allocb, done");
"wrsmdread end; failure allocb",
return (1);
}
}
wrsmdp->wrsmd_ipackets++;
/*
* IP shortcut
*/
if (canputnext(ipq)) {
} else
} else
sap);
return (buffree);
}
/*
* ****************************************************************
* *
* E N D HIGH LEVEL PROTOCOL INTERFACE AND UTILITIES *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* *
* ****************************************************************
*/
/*
* Initialize WRSMD resources. Return 0 on success, nonzero on error.
*/
static int
{
int stat;
/* LINTED: E_TRUE_LOGICAL_EXPR */
wrsmdp->wrsmd_flags = 0;
/*
* Preceding teardown may not have released controller.
* If so, do so now to avoid multiple reference counts.
*/
D1("wrsmdinit: controller still held, "
"call rsm_release_controller()");
&(wrsmdp->wrsmd_ctlr));
}
return (1);
}
&(wrsmdp->wrsmd_ctlr));
return (1);
}
/*
* We only support RSM addresses that fit into 6 bytes. This
* will always be the case on Wildcat. (Address range is 0-255.)
*/
(rsm_addr_t)0xffffffffffffLL);
/*
* Since this address is locally generated, turn on the locally
* administered bit in the ethernet address to comply with IEEE 802.
*/
RSM_SUCCESS) {
&(wrsmdp->wrsmd_ctlr));
return (1);
}
/*
* Clear any leftover junk from run queue. (We could have destinations
* here if the user detached from a device, then reattached before
* the destinations got deleted.) These destination structures will
* eventually be freed when the deletion process completes.
*/
D1("wrsmdinit: returning 0");
return (0);
}
/*
* Un-initialize WRSMD resources. Returns 0 if completely successful.
* Returns -1 if not in a state where uninitialize makes sense. Returns >0
* if uninitialize was started, but hasn't completed because not all
* connections have been torn down yet.
*/
static int
{
int dests_not_cleaned_up;
int i;
if (wrsmdp->wrsmd_attached_streams) {
/*
* don't uninitialize device while streams are attached to it
*/
D1("wrsmduninit: %d streams still attached, failing",
return (-1);
}
/*
* Must release the mutex here to avoid a potential deadlock.
* The wrsm_unregister_handler() code acquires the wrsm
* service->handler_mutex. If an inbound interrupt occurs,
* the wrsm service_callback grabs the service->handler_mutex,
* and calls back into the wrsmd_rsm_intr_handler() routine,
* which attempts to grab the wrsmdp->wrsmd_lock. If at the
* time we invoke RSM_UNREGISTER_HANDLER() while holding the
* wrsmdp->wrsmd_lock, we get a circular lock deadlock.
*/
}
for (i = 0; i < RSM_MAX_DESTADDR; i++)
wrsmdfreedest(wrsmdp, i);
(dests_not_cleaned_up == 0)) {
/*
* there will be no more RSMPI calls, so
* it's safe to release the controller
*/
D1("wrsmduninit: call rsm_release_controller()");
&(wrsmdp->wrsmd_ctlr));
}
D1("wrsmduninit: returning %d",
return (dests_not_cleaned_up);
}
/*
* Get all the wrsmd parameters out of the device tree and store them in a
* WRSMD device (RSM controller) structure.
*/
static void
{
/* Get parameters */
"wrsmd-buffers", WRSMD_BUFFERS_DFLT);
"wrsmd-buffer-size", WRSMD_BUFFER_SIZE_DFLT);
"wrsmd-queue-size", WRSMD_QUEUE_SIZE_DFLT);
"wrsmd-buffers-retained", WRSMD_BUFFERS_RETAINED_DFLT);
"wrsmd-idle-reclaim-time", WRSMD_IDLE_RECLAIM_TIME_DFLT);
"wrsmd-err-retries", WRSMD_ERR_RETRIES_DFLT);
"wrsmd-max-queued-pkts", WRSMD_MAX_QUEUED_PKTS_DFLT);
"wrsmd-nobuf-init-tmo", WRSMD_NOBUF_INIT_TMO_DFLT);
"wrsmd-nobuf-max-tmo", WRSMD_NOBUF_MAX_TMO_DFLT);
"wrsmd-nobuf-drop-tmo", WRSMD_NOBUF_DROP_TMO_DFLT);
"wrsmd-msg-init-tmo", WRSMD_MSG_INIT_TMO_DFLT);
"wrsmd-msg-max-tmo", WRSMD_MSG_MAX_TMO_DFLT);
"wrsmd-msg-drop-tmo", WRSMD_MSG_DROP_TMO_DFLT);
"wrsmd-ack-tmo", WRSMD_ACK_TMO_DFLT);
"wrsmd-sync-tmo", WRSMD_SYNC_TMO_DFLT);
"wrsmd-teardown-tmo", WRSMD_TEARDOWN_TMO_DFLT);
"wrsmd-train-size", WRSMD_TRAIN_SIZE_DFLT);
"wrsmd-fqe-sync-size", WRSMD_FQE_SYNC_SIZE_DFLT);
/*
* Sanity check parameters, modify if needed. Note that we mainly
* check to make sure parameters won't make the driver malfunction;
* we don't necessarily prevent them from being stupid.
*/
/* Need to have at least one buffer. */
if (sp->wrsmd_buffers == 0)
#ifdef RESTRICT_MAX_BUFFER_SIZE
/* Can't put more than 64K in a buffer (IP max packet length). */
}
#endif
/*
* Have to be able to send at least a 576-byte packet (IP reqmnt).
* Add 2 cachelines so that packet will fit no matter how it is
* aligned.
*/
if (sp->wrsmd_buffer_size <
}
/* Buffer length must be multiple of 64 (0x40). */
}
if (modified_bufsize) {
}
/*
* Must have at least one more queue element then the number of
* buffers. This is so that we can track when all queue elements
* need to be flushed to remote.
*/
}
/* Can't retain more buffers than we have. */
}
/* Have to be able to send at least 1 packet at a time. */
}
/* Have to be able to queue at least 1 packet. */
"value from wrsmd.conf to 0x%x",
}
/*
* Convert timeout parameters in milliseconds to
* absolute clock ticks, depending on clock hertz.
*/
/* Can't sleep for less than 1 tick. */
"value from wrsmd.conf to 0x%x",
}
"value from wrsmd.conf to 0x%x",
}
"value from wrsmd.conf to 0x%x",
}
"value from wrsmd.conf to 0x%x",
}
"value from wrsmd.conf to 0x%x",
sp->wrsmd_ack_tmo);
}
"value from wrsmd.conf to 0x%x",
sp->wrsmd_sync_tmo);
}
}
/*
* ****************************************************************
* *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N CONNECTION DATA STRUCTURE MANAGEMENT *
* *
* ****************************************************************
*/
/*
* Create the indicated destination structure, and return a pointer to it.
* NOTE: this should never be called directly; use the MAKEDEST macro
* instead. The macro checks that the destination structure does not yet
* exist before calling this function.
*/
static wrsmd_dest_t *
{
/* Is the destination reasonable? */
if (rsm_addr >= RSM_MAX_DESTADDR) {
D1("wrsmdmkdest: too big, returning NULL");
return (NULL);
}
return (rd);
}
D1("wrsmdmkdest: can't alloc, returning NULL");
return (NULL);
}
/*
* Use the time to generate a pseudo-random initial sequence
* number.
*/
wrsmdp->wrsmd_numdest++;
return (rd);
}
/*
* Destination deletion
*
* As mentioned above (way above), we maintain a reference count on all
* destinations, which is incremented and decremented around uses of the
* destination structure. When this reference count goes to zero, we delete
* the destination.
*
* Because of the possibility of other threads trying to use the destination
* while we're deleting it, deletion is actually a multiple-step process,
* which works as follows.
*
* 1. When a destination is created, its dstate (deletion state) is set to
* zero, and its reference count is set to one.
*
* 2. When the service routine or some other routine decides that a destination
* should be deleted, it calls wrsmdfreedest(). That routine sets dstate
* to 1 and cancels any pending sync timeouts. It then decrements the
* destination's reference count. This deletes the reference set in
* wrsmdmkdest. (Note that since dstate is now 1, the FINDDEST and REFDEST
* macros will now note that the destination is being deleted; thus, any
* interrupt referring to the destination will no longer modify the
* reference count.)
*
* 3. Soon after this, wrsmddest_refcnt_0 is called. (This may either be
* directly from wrsmdfreedest(), or perhaps from another routine if it
* was running concurrently with freedest() and its UNREF happened last).
* This routine sees that dstate is 1, and immediately queues an event
* which will execute wrsmdfreedestevt(). (This is necessary because we
* may not be able to do everything in the phase 1 deletion from the routine
* that we're currently in.)
*
* 4. wrsmdfreedestevt() runs, it checks if there are any outstanding
* loaned-up buffers. If so, it sets a flag to cause the loan returning
* code to decrement the refcnt, and returns without performing cleanup.
* When all loaned buffers are returned and the refcnt is decremented, we
* go back to step 3, above. When wrsmdfreedestevt() finally runs with
* no loaned buffers, gets rid of most of the WRSMD resources attached
* to the destination. It also throws away any queued packets, gets
* rid of any allocated DVMA resources. It changes dstate to 2, takes
* this destination structure out of the base-ID => destination table.
* It then decrements the reference count that had been added by
* wrsmddest_refcnt_0().
*
* 5. When the reference count becomes 0, wrsmddest_refcnt_0 is again called.
* It notices that dstate is 2, and frees the destination structure.
*/
/*
* A destination's reference count went to 0, deal with it.
*/
static boolean_t
{
D1("wrsmddest_refcnt_0: rd 0x%p (addr %ld ctlr %d), refcnt %d, "
"dstate %d",
/*
* We may be called from a routine that can't actually do the
* work that needs to be done, so we schedule an event
* to do the rest of the work. This can not be a timeout.
*/
/* Destroy all the mutexes */
/*
* Free any allocated memory hanging off the dest structure.
*/
if (rd->rd_cached_fqr) {
}
if (rd->rd_shdwfqw_f_addr) {
}
if (rd->rd_shdwdqw_f_addr) {
}
if (rd->rd_bufbase) {
sizeof (*rd->rd_bufbase));
}
if (rd->rd_rawmem_base_addr) {
}
/* Finally free the dest structure */
wrsmdp->wrsmd_numdest--;
D1("wrsmddest_refcnt_0: freed rd data structures");
}
D1("wrsmddest_refcnt_0: done");
return (freed);
}
/*
* Do deletion work.
*/
static void
wrsmdfreedestevt(void * arg)
{
int err;
/* Get rid of any queued outgoing buffers */
while (mp) {
wrsmdp->wrsmd_oerrors++;
}
rd->rd_queue_len = 0;
/*
* See if there are any more outstanding loaned buffers. If so,
* set flag so that freebuf will eventually do an UNREF when it
* frees the last buffer. This removes the reference added in
* wrsmddest_refcnt_0(), causing the count to again go to 0.
* wrsmddest_refcnt_0() will again be called, increment the refcnt
* and cause this routine to be called to complete cleanup.
*/
DERR("wrsmdfreedestevt: loaned buffers outstanding %d, dest "
return;
}
/*
* Retry for up to 10 times to clean up, pausing slightly each
* iteration. This gives the remote side a chance to clean up
* in the case of unpublish, and allows us to catch other errors
* now as well.
*/
/*
* Perform the sendq destroy first -- this notifies the
* remote side that the connection is going away, so
* it can immediately start cleaning up. This helps
* to avoid a situation where a segment is unpublished
* while there is still a connection to it (which is legal,
* but causes overhead in the Wildcat RSM driver).
*/
D1("wrsmdfreedestevt: destroying sendq\n");
if (err) {
} else {
}
}
D1("wrsmdfreedestevt: disconn from remote segment\n");
rd->rd_rxferhand);
if (err) {
} else {
}
}
D1("wrsmdfreedestevt: unpublishing local segment\n");
rd->rd_lxferhand);
if (err) {
} else {
}
}
D1("wrsmdfreedestevt: destroying local segment\n");
rd->rd_lxferhand);
if (err) {
} else {
}
}
count++;
D1("freedestevt: Pass %d, (sstate & mask)=0x%x\n",
/* Busy wait for a few microseconds */
drv_usecwait(5000);
}
}
if (count >= 10) {
D1("freedestevt: sstate&mask !0 after %d tries. 0x%x\n",
D0("freedestevt: Clearing state but status != 0, stat=%x\n",
}
/* Take out of desttbl */
/* Make sure dest isn't on service queue */
else {
while (lastrd) {
break;
}
}
}
/*
* Removes the reference added in wrsmddest_refcnt_0().
*/
D1("wrsmdfreedestevt: done");
}
/*
* Start the deletion process for a destination.
*/
static void
{
D1("wrsmdfreedest: ctlr %d remote rsmaddr %ld",
return;
}
/*
* Turn off any timeouts. The sync timeout reschedules itself, so we
* have to go to great lengths to kill it.
*/
rd->rd_fqe_tmo_id = 0;
if (tmoid)
if (fqe_tmoid)
/*
* Flush any outstanding events from the event thread. Since the
* freedestevt() will be queued after any pending syncs, we
* should be OK; but will start the ball rolling just in case.
*/
D1("wrsmdfreedest: done");
/* remove reference added in wrsmdmkdest() */
}
/*
* ****************************************************************
* *
* E N D CONNECTION DATA STRUCTURE MANAGEMENT *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N MAIN STATE MACHINE *
* *
* ****************************************************************
*/
/*
* We change a destination's state in a number of routines; we define these
* macros to make sure it gets done the same way every time.
*/
if (WRSMD_SCHED_STATE(newstate)) { \
} \
} \
if (WRSMD_SCHED_STATE(newstate)) { \
} \
/*
* This routine processes a notification that a destination has become
* unreachable. Delete our record of it, so that when it comes back up we
* will re-establish our association. We do this by changing its state to
* S_DELETE; the service routine will then start the deletion
* process.
*
* Since other parts of the driver may have operations in progress that
* involve this destination, most of the time we cannot just whack the
* state to the new value. Instead, we record (in rd_estate) that the
* connection was lost. The next time someone else attempts to change the
* state, the state change routines recognize that there is a pending event
* and change the state to the one we wanted instead. (There are
* exceptions in cases where the new state indicates that we've enabled
* some sort of timeout; in this case, we may wait until the following
* state change to take note of the event.)
*/
static void
{
/* LINTED: E_CONSTANT_CONDITION */
} else {
}
D1("wrsmd_lostconn: state now %s, estate now %s",
/*
* Stop trying to flush queue entries to the other side.
*
* stopq doesn't really need a lock to protect its state, as the
* only thing that happens to it is that it is set to true just
* prior to deleting rd, and the only purpose of this is to avoid
* unnecessary work. Other threads can read the state of this
* variable at any time, without taking a special lock.
*
* Note that rd itself is protected from going away from the
*/
D1("wrsmd_lostconn: done");
}
/*
* Figure out what state transition should actually occur after an event
* has happened.
*/
static int
{
/*
* If we're going to a state where we've just set a timeout, don't
* mess with the state. When the timeout happens, it will change
* state again, and we'll nab 'em there. If we're about to delete
* rd, don't bother worrying about the event.
*/
switch (newstate) {
case WRSMD_STATE_W_SCONNTMO:
case WRSMD_STATE_W_ACCEPT:
case WRSMD_STATE_W_ACK:
case WRSMD_STATE_W_FQE:
case WRSMD_STATE_DELETING:
case WRSMD_STATE_S_DELETE:
return (retval);
}
}
return (retval);
}
/*
* If this destination's state is equal to state, set its state to INPROGRESS
* and return 1, otherwise return 0.
*/
static int
int state) /* State to check for */
{
int retval;
/*
* We check first without the lock to save time in a common case,
* namely, we're called from the wrsmdmsghdlr_syncdqe() routine and
* we want to know if we're waiting for an FQE.
*/
#ifndef __lock_lint
D1("wrsmdisstate: state was %s, returning 0",
return (0);
}
#endif /* __lock_lint */
retval = 1;
D1("wrsmdisstate: returning 1");
} else {
retval = 0;
D1("wrsmdisstate: state was %s, returning 0",
}
return (retval);
}
/*
* Return destination's state, then set its state to INPROGRESS.
*/
static int
{
int state;
return (state);
}
/*
* Set destination's state; must be preceded by a getstate call. (i.e.,
* destination's current state must be INPROGRESS.)
*/
static void
int newstate) /* State to set */
{
} else {
D1("wrsmd: setstate without getstate");
}
D1("wrsmdsetstate: done");
}
/*
* Special case of wrsmdsetstate, designed to be called from the service
* routine. Does everything wrsmdsetstate does _except_ qenable the service
* routine.
*/
static void
int newstate) /* State to set */
{
newstate);
} else {
D1("wrsmd: setstate without getstate");
}
D1("wrsmdsetstate_nosrv: done");
}
/*
* Set state to newstate iff state is oldstate. Return 1 if move happened,
* else 0.
*/
static int
int oldstate, /* State to check against */
int newstate) /* State to set if check succeeds */
{
int retval;
D1("wrsmdmovestate: rd 0x%p, oldstate %s, newstate %s",
retval = 1;
D1("wrsmdmovestate: state changed, returning 1");
} else {
retval = 0;
D1("wrsmdmovestate: oldstate really %s, returning 0",
}
return (retval);
}
/*
* ****************************************************************
* *
* E N D MAIN STATE MACHINE *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N HANDLERS FOR INCOMING RSM MESSAGES *
* *
* ****************************************************************
*/
/*
* Handlers for the various messages that may arrive. All of these happen
* during interrupt handling, and will not actually use RSMPI calls.
* Rather, they will schedule actions to happen.
*/
/*
* Received CONNECT REQUEST message. Cause this side to set up
* connection to xfer segment and send back an ACCEPT message.
*
* We must have everything set up before sending the ACCEPT.
* However, we must not transmit any data until we receive the ACK
* of the ACCEPT.
*/
static void
{
D1("wrsmdmsghdlr_req_connect: rd 0x%p (addr %ld ctlr %d)",
/*
* xmit lock guarantees that timeout has really been set
* for any wait conditions.
*/
if (rd->rd_segid_valid) {
/*
* Another connect message - is it a duplicate?
* If so, just ignore. Otherwise, there is a
* problem, so force a connection teardown.
*/
/* Not the same connect request, drop connection */
}
return;
}
/* remember the message sequence number of this connection request */
/*
* Crossed connection requests. If we're the higher
* numbered address, cancel the ACCEPT timeout and accept
* the remote request. If we're the lower numbered
* address, ignore this request because the remote side
* will accept ours. If the W_ACCEPT timeout expires prior
* to cancelling the timeout, the timeout function will
* notice the state is no longer W_ACCEPT, and will not
* cause the connection to be torn down. If the timeout
* has already occurred (and the rd state is S_DELETE),
* we're out of luck, and will have to wait for a new
* connection request from the remote side.
*/
/* LINTED: E_CONSTANT_CONDITION */
rd->rd_tmo_int = 0;
}
} else {
/*
* Save away the connection information. If possible,
* change the state to cause the request to be immediately
* acted upon. If the state is currently INPROGRESS
* in the early stages of connection (during crexfer
* or the start of sconn), then this request will
* eventually be noticed when sconn() is called. The
* sconn() function will notice that the segid is valid,
* and perform the CONNXER_ACCEPT tasks instead.
*
* If this rd's state was in a later stage of the
* connection dance (or after a connection exists), a
* previous connection request should have been received,
* the new connection request will not be expected, and
* this will have been caught by noticing the segid was
* already valid, and cause a failure, above.
*/
/*
* No connection was in progress. Start a new
* connection setup process.
*/
/* LINTED: E_CONSTANT_CONDITION */
/*
* Accept this request instead of resending our
* connect request. Cancel the timeout. If the
* SCONNTMO timeout function is called prior to
* cancelling the timeout, it will notice the state
* is no longer W_SCONNTMO, and will not cause a
* new connection request to be sent. If the
* timeout already occurred (and rd is in the
* S_SCONN state), the sconn() function will notice
* that the segid is valid, and perform the
* CONNXER_ACCEPT tasks instead.
*/
/* LINTED: E_CONSTANT_CONDITION */
rd->rd_tmo_int = 0;
}
}
if (utmo)
}
/*
* Received ACCEPT message. Cause this side to set up a connection
* to the remote transfer segment and send back an ACK message.
*/
static void
{
D1("wrsmdmsghdlr_con_accept: rd 0x%p (addr %ld ctlr %d)",
/*
* xmit lock protects segid field
*/
/* LINTED: E_CONSTANT_CONDITION */
if (utmo)
} else {
return;
}
}
/*
* Received ACK message. Now ok to proceed with DLPI data transfer.
*/
static void
{
D1("wrsmdmsghdlr_con_ack: rd 0x%p (addr %ld ctlr %d)",
/* LINTED: E_CONSTANT_CONDITION */
if (utmo)
} else {
return;
}
}
/*
* Remote side has just sync'ed up the local DQE with its copy, so there
* may be buffers to deliver.
*/
static void
{
D1("wrsmdmsghdlr_syncdqe: rd 0x%p (addr %ld ctlr %d)",
"wrsmdmsghdlr_syncdqe start",
/*
* message sanity check
*/
D1("wrsmdmsghdlr_syncdqe: bad rcv_segid");
"wrsmdmsghdlr_syncdqe end; failure bad msg",
return;
}
/*
* Since we'll eventually call RSM_PUT, and we're
* in interrupt context, we need to process this
* from the event thread
*/
}
static void
{
int freebufs = 0;
/* Loop through all valid DQE's and process their packets. */
/* Don't try to send up DQE with zero length */
if (length)
else {
freebufs++;
}
if (freebufs ==
freebufs = 0;
}
}
if (freebufs) {
}
/*
* We hold xmit_lock to keep wrsmdfqetmo() from running while
* we're deciding what to do. In the case where we're waiting
* for FQE's but don't have any, if we let fqetmo run before we
* set the state back to W_FQE, it won't do anything and we
* could hang in that state until another packet came in
* (which could be forever).
*/
if (avail) {
rd->rd_fqe_tmo_id = 0;
rd->rd_tmo_int = 0;
/*
* Note: since the fqetmo gets xmit_lock, we have
* to release it before we call untimeout() to prevent
* a deadlock from occurring.
*/
} else {
}
} else {
}
D1("wrsmdmsghdlr_syncdqe: success");
"wrsmd_msg_hdlr_syncdqe end: success");
}
static void
{
}
/*
* Handler for connection-related RSMPI messages from remote WRSMD drivers
*/
/* ARGSUSED */
static rsm_intr_hand_ret_t
void *data,
{
int isdel = 0;
/* LINTED E_FUNC_SET_NOT_USED */
int isnew = 0;
/*
* We only handle RSM addresses that fit in 48 bits.
* This is no problem for Wildcat.
*/
D1("wrsmd_intr_handle: wrsmdp 0x%p (cltr %d) sender-addr %ld",
(void *)wrsmdp,
/* Is this our interrupt? */
D1("wrsmd_intr_handle: bad controller handle");
return (RSM_INTR_HAND_UNCLAIMED);
}
/*
* We don't really care about anything but a received packet
* or a queue destroy
*/
switch (operation) {
case RSM_INTR_Q_OP_CREATE: {
/*
* Create a dest structure, on the assumption that
* somebody's about to communicate with us.
*/
"wrsmdintrhdlr end; failure cantfindormkdest",
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
case RSM_INTR_Q_OP_CONFIGURE:
/* ignore configure messages */
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
case RSM_INTR_Q_OP_DROP:
case RSM_INTR_Q_OP_DESTROY: {
/*
* The remote side has shut down the connection. We need
* to shut local side of the connection down as well.
*/
"wrsmdintrhdlr end; failure cantfinddest",
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
case RSM_INTR_Q_OP_RECEIVE:
/*
* A DLPI message from the remote node. Handle in the main
* body.
*/
break;
default:
/* ignore */
"wrsmdintrhdlr end; unknown message type");
return (RSM_INTR_HAND_UNCLAIMED);
}
/*
* Dest should already exist, having been created by the
* RSM_INTR_Q_OP_CREATE, above.
*/
if (isdel) {
"wrsmdintrhdlr end; failure dest deleting",
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
D1("wrsmd_rsm_intr_handler: can't finddest");
"wrsmdintrhdlr end; failure cantfinddest",
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
/*
* Non-matching driver version!
* Toss message.
*/
"non-matching wrsmd version (%d) in "
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
case WRSMD_MSG_REQ_CONNECT:
break;
case WRSMD_MSG_CON_ACCEPT:
break;
case WRSMD_MSG_CON_ACK:
break;
/*
* Maybe scan the incoming queue at this time?
*/
case WRSMD_MSG_SYNC_DQE:
break;
default:
break;
}
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
/*
* ****************************************************************
* *
* E N D HANDLERS FOR INCOMING RSM MESSAGES *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N CONNECTION MANAGEMENT *
* *
* ****************************************************************
*/
/*
* Create and initialize a transfer segment for the remote destination. If
* successful, return 0, else 1. The destination's state must be
* INPROGRESS. In remains INPROGRESS during this function.
*/
static int
{
volatile wrsmd_xfer_hdr_t *xfer;
volatile wrsmd_fqe_t *fqep;
volatile wrsmd_dqe_t *dqep;
int i, stat;
size_t transport_pgsize = 0;
D1("wrsmdcrexfer: rd 0x%p (addr %ld ctlr %d)",
for (i = 0; i < (sizeof (size_t) * 8); i++) {
((size_t)1 << i)) {
transport_pgsize = 1024 << i;
break;
}
}
if (transport_pgsize == 0) {
"page sizes (attr_page_size is 0x%lx)",
return (1);
}
/*
* Make sure the remote side is responding before setting
* up the local xfer segment.
*/
if (stat != RSM_SUCCESS) {
D1("wrsmdcrexfer: can't create send queue, stat 0x%x, "
"returning 1", stat);
"wrsmdcrexfer end; failure RSM_SENDQ_CREATE",
"stat 0x%x", stat);
return (1);
}
/*
* Allocate memory for segment. Allow for alignment of DQE list
* and FQE list. Also allow buffers to be aligned on
* RSM-page-sized boundaries.
*/
+ 64 +
+ 64 +
+ (transport_pgsize -1);
if (!xfer_start) {
D1("wrsmdcrexfer: can't allocate memory, returning 1");
"wrsmdcrexfer end; failure kmem_alloc",
return (1);
}
/*
* Round up memory pointer and round down size to allow alignment
* within the transport's supported page size.
*/
(transport_pgsize -1));
if (roundup != transport_pgsize) {
xfer_start += roundup;
}
D2("wrsmdcrexfer: rawsize 0x%lx rawmem 0x%p xfersize 0x%lx "
"xfermem 0x%p pgsize 0x%lx\n",
(void *)rd->rd_rawmem_base_addr,
(void *)xfer_start,
/* Force FQ to start on a 64-byte boundary. */
fq_offset = sizeof (struct wrsmd_xfer_hdr);
/* Force DQ to start on a 64-byte boundary. */
/* Force buffers to start on a 64-byte boundary. */
/*
* Note that while we set the _f and _n queue pointers and the
* queue lengths here, the _l pointers will be set (and the lengths
* may be adjusted) when we connect to the remote xfer segment (see
* connxfer).
*/
/*
* Initialize the delivery and free queues: elements in the free
* queue are valid, and elements in the delivery queue are invalid
* (seqno == 0).
*/
}
/*
* Allocate and init our structures to describe loaned-up buffers.
*/
D1("wrsmdcrexfer: can't alloc rbp structs, returning 1");
"wrsmdcrexfer end; failure kmem_zalloc bufbase",
return (1);
}
for (i = 0; i < rd->rd_numlbufs; i++) {
rbp++;
}
/*
* Set everything in the header of the segment.
*/
D1("wrsmdcrexfer: rx_buf_offset 0x%x fq_offset 0x%x dq_offset 0x%x "
"rd_numlbufs 0x%x rd_lbuflen 0x%x rd_num_fqrs 0x%x "
"rd_num_dqrs 0x%x\n",
rd->rd_lbuflen,
rd->rd_num_dqrs);
/*
* Local xfer segment is now initialized; make it available to the
* remote node.
*/
if (stat != RSM_SUCCESS) {
D1("wrsmdcrexfer: can't create RSM segment, stat 0x%x, "
"return 1", stat);
"wrsmdcrexfer end; failure RSM_SEG_CREATE",
return (1);
}
/*
* Publish this segment. First try using an id that is likely
* to be unique.
*/
}
if (stat == RSMERR_SEGID_IN_USE) {
/* Couldn't use default id; try other ids in allowed range */
rd->rd_lxfersegid++;
}
if (stat != RSM_SUCCESS) {
D1("wrsmdcrexfer: can't publish, stat 0x%x, returning 1",
stat);
"wrsmdcrexfer end; failure wrsmd_export_segment",
return (1);
}
D1("wrsmdcrexfer: returning 0");
return (0);
}
/*
* Send a connect request to the remote.
*
* If we've received a Connect message from the destination, connect to the
* remote transfer segment. Otherwise, send them a Connect Request
* message. On success, return 0. If the connect fails return 1. A
* failure in sending a Connect Request message will result in a retry
* timeout being scheduled, but will not return 1 unless the total timeout
* period has expired. Destination's state must be INPROGRESS when called.
* Destination's state is set to a new state prior to returning.
*/
static int
int fromtmo) /* 0 if this is our first attempt; nonzero if this */
/* is a retry, requested by a timeout routine. */
{
int stat;
D1("wrsmdsconn: rd 0x%p (addr %ld ctlr %d)",
if (rd->rd_segid_valid) {
/*
* We've gotten a Connect Request from the remote side
* while in INPROGRESS state. Don't send our request;
* instead, connect to the remote transfer segment.
*/
return (stat);
}
}
/*
* We haven't gotten a Connect Request from them, so we
* need to send one of our own.
*/
/*
* If this is a timeout retry, send the same Connect Request
* message we sent the first time.
*/
if (fromtmo) {
} else {
}
/*
* xmit lock guarantees new state and timeout setup both occur
* without an intervening state change. See
* wrsmdmsghdlr_req_connect().
*/
/*
* Set up a timeout to remind us if an ACCEPT never
* shows up. This is only a 1-time timeout, no
* backoff is needed.
*/
rd->rd_tmo_tot = 0;
rd->rd_tmo_int);
} else {
/*
* We couldn't send the message, set up a timeout to
* try again a little later.
*/
if (!fromtmo) {
rd->rd_tmo_int =
rd->rd_tmo_tot = 0;
D2("wrsmdsconn: !fromtmo, tmo_int %d, "
rd->rd_tmo_tot);
} else {
/* Do exponential backoff */
/* If we've waited too long, fail */
if (rd->rd_tmo_tot >=
"wrsmdsconn end; failure timeout",
(void) wrsmdgetstate(rd);
D1("wrsmdsconn: tmo limit reached, "
"returning 1");
return (1);
}
/* Clip timeout to maximum */
if (rd->rd_tmo_int >
rd->rd_tmo_int =
D2("wrsmdsconn: tmo_int %d, tmo_tot %d",
}
rd->rd_tmo_int);
}
D1("wrsmdsconn: returning 0");
return (0);
}
/*
* Connect to the transfer segment on the destination machine. If an error
* occurs, return 1. Destination state must be INPROGRESS. It remains
* INPROGRESS during this function.
*/
static int
{
int stat;
int i;
volatile wrsmd_fqe_t *fqep;
volatile wrsmd_dqe_t *dqep;
D1("wrsmdconnxfer: rd 0x%p (addr %ld ctlr %d)",
if (stat != RSM_SUCCESS) {
D1("wrsmdconnxfer: can't connxfer, stat 0x%x, returning 1",
stat);
"wrsmdconnxfer end; failure RSM_CONNECT",
return (1);
}
/*
* Copy entire header struct into local memory
*/
if (stat != RSM_SUCCESS) {
D1("wrsmdconnxfer: can't read xfer header, returning 1");
"wrsmdconnxfer end; failure timeout",
stat);
return (1);
}
/*
* Validate header structure, extract some values from it
*/
D1("wrsmdconnxfer: badxfer, cookie 0x%x, returning 1",
"wrsmdconnxfer end; failure 'bad xfer'",
return (1);
}
D1("wrsmdconnxfer: remote buf_offset 0x%x fq_offset 0x%x "
"dq_offset 0x%x rd_numbufs 0x%x rd_lbuflen 0x%x "
"rd_numfqes 0x%x rd_numdqes 0x%x\n",
/*
* Must be at least one more element in queue than the
* number of buffers, so that we can track when all queue
* elements need to be flushed to remote side.
*/
D1("wrsmd: badxfer, rbufoff too big");
return (1);
}
> segsize) {
D1("wrsmd: badxfer, fqw_f_off too big");
return (1);
}
> segsize) {
D1("wrsmd: badxfer, dqw_f_off too big");
return (1);
}
/*
* Now that we know the number of remote buffers and queue elements,
* shrink everything to fit and calculate the ends of all queues.
*/
D1("rd_numlbufs 0x%x rd_num_fqws 0x%x rd_num_dqrs 0x%x\n",
rd->rd_numlbufs =
D1("rd_numrbuf 0x%x rd_num_fqrs 0x%x rd_num_dqws 0x%x\n",
rd->rd_numrbuf =
/* mark last entry of free queue as invalid */
D1("wrsmdconnxfer: num_fqrs 0x%x num_fqws 0x%x num_dqws 0x%x",
D1("wrsmd: badxfer, bufsize * num buf too big");
return (1);
}
> segsize) {
D1("wrsmd: badxfer, fqesize * num fqe too big");
return (1);
}
> segsize) {
D1("wrsmd: badxfer, dqesize * num dqe too big");
return (1);
}
/*
* Make sure any local queue that will be transferred or sync'd is
* WRSMD_CACHLINE_SIZE'd aligned. This means that when the data is
* loaded into the FPU registers for transfer,it is already aligned.
* This is a minor optimisation. For FireLink, only the remote
* (destination) side needs to be aligned for interconnect
* performance.
*/
D1("wrsmdconnxfer: can't alloc memory for shadow queues, "
"returning 1");
"wrsmdconnxfer end; failure kmem_alloc",
return (1);
}
/*
* Initialize the shadow delivery and free queues: all elements in
* the free queue are valid except the last entry, and all elements in
* the delivery queue are invalid. It is necessary to initialize
* because when we do an wrsmdsyncfqe() or wrsmdsyncdqe(), we may do
* an RSM_PUT of more than the newly changed entries (because we
*/
/* still in first round, on last element */
i = 0;
}
/* last entry is not valid */
rd->rd_shdwfqw_errflag = 0;
D1("wrsmdconnxfer: initialized %d fqe shadow entries", i);
/* in first round, on first element */
}
rd->rd_shdwdqw_errflag = 0;
D1("wrsmdconnxfer: returning 0");
return (0);
}
/*
* Send an ACCEPT message to the destination.
* Return 1 if this send fails, else set state to W_ACK and return 0.
* Destination's state must be INPROGRESS.
* Destination's state is set to a new state on success.
*/
static int
{
int stat;
int retval = 0;
D1("wrsmdsaccept: rd 0x%p (addr %ld ctlr %d)",
rd->rd_tmo_int =
rd->rd_tmo_tot = 0;
rd->rd_tmo_int);
} else { /* Failure */
retval = 1;
}
return (retval);
}
/*
* Send an ACK response to the destination.
* Return 1 if this send fails, else set state to READY and return 0.
* Destination's state must be INPROGRESS.
* Destination's state is set to a new state on success.
*/
static int
{
int stat;
int retval = 0;
D1("wrsmdsack: rd 0x%p (addr %ld ctlr %d)",
if (rd->rd_queue_h)
else
} else { /* Failure */
retval = 1;
}
return (retval);
}
/*
* ****************************************************************
* *
* E N D CONNECTION MANAGEMENT *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* *
* ****************************************************************
*/
/*
* Queue an FQE with the specified buffer number onto the shadow FQ for
* transmission to the remote system.
*/
static void
int bufnum) /* Number of free buffer */
{
D2("wrsmdputfqe: start shdwfqw_i 0x%p (index %ld) shdwfqw_f 0x%p "
"shdwfqw_l 0x%p",
(void *)rd->rd_shdwfqw_i,
/ sizeof (fqe),
D1("wrsmdputfqe: done");
D2("wrsmdputfqe: end shdwfqw_i 0x%p shdwfqw_f 0x%p shdwfqw_l 0x%p",
(void *)rd->rd_shdwfqw_l);
}
/*
* Flush any queued FQEs (free queue entries) from the local shadow copy to
* the remote system's copy.
*/
static void
{
int stat = RSM_SUCCESS;
D1("wrsmdsyncfqe: rd 0x%p (addr %ld ctlr %d) index %ld to %ld",
(void *)rd,
sizeof (*(rd->rd_shdwfqw_o)),
sizeof (*(rd->rd_shdwfqw_i)));
D2("wrsmdsyncfqe: shdwfqw_i 0x%p shdwfqw_f 0x%p shdwfqw_l 0x%p",
(void *)rd->rd_shdwfqw_l);
/* If nothing's queued, nothing to do */
D1("wrsmdsyncfqe: no work, done");
return;
}
/* If network down, nothing to do either */
D1("wrsmdsyncfqe: stopq on, done");
return;
}
/*
* Send each element in the queue separately. Since each
* element is WRSMD_CACHELINE_SIZE, then we know that
* each PUT is atomic, and we won't corrupt the remote
* side's free queue if we get a transient error and retry.
*
* If we try to put more then one element at a time, then it is
* possible for some elements to succeed while others fail.
* If this is the case then the remote side could possibly consume
* those new buffers before the local side retries the put, and
* thus the FQ could get corrupted (buffer listed as free when
* it really isn't).
*
*/
/* Set the sequence number for this FQE */
/*
* Set the offset to transfer one fqe at a time.
*
* NOTE: We already know we are aligned because
* we allocated the remote buffer on a WRSMD_CACHELINE_SIZE'd
* boundary, and each fqe is WRSMD_CACHELINE_SIZE long.
*/
(char *)rd->rd_shdwfqw_f;
/* Push FQE to remote side */
if (stat == RSM_SUCCESS) {
/* Write was sucessful */
/*
* Wrap, and update sequence number if
* this the is the last fqe
*/
rd->rd_fqw_seq++;
if (rd->rd_fqw_seq == 0)
rd->rd_fqw_seq++;
} else {
rd->rd_shdwfqw_o ++;
}
rd->rd_shdwfqw_errflag = 0;
} else {
wrsmdp->wrsmd_errs++;
if (stat == RSMERR_CONN_ABORTED) {
/* permanent connection loss */
} else {
/*
* Schedule an event to retry. Can't do a
* timeout here because it would require
* calling RSM_PUT from a callback.
*/
(void *)rd);
}
return;
}
}
D1("wrsmdsyncfqe: done");
D2("wrsmdsyncfqe: shdwfqw_i 0x%p shdwfqw_f 0x%p shdwfqw_l 0x%p",
(void *)rd->rd_shdwfqw_l);
}
/*
* Queue a DQE with the specified buffer description onto the shadow DQ for
* transmission to the remote system.
*/
static void
int bufnum, /* Number of full buffer */
int offset, /* Offset of packet from start of buffer */
{
D1("wrsmdputdqe: rd 0x%p (ctlr %d addr %ld), bufnum %d, offset %d, "
"length %d, sap 0x%x index %ld",
sizeof (dqe));
D2("wrsmdputdqe: start shdwdqw_i 0x%p (index %ld) shdwdqw_f 0x%p "
"shdwdqw_l 0x%p",
(void *)rd->rd_shdwdqw_i,
sizeof (dqe),
(void *)rd->rd_shdwdqw_f,
(void *)rd->rd_shdwdqw_l);
D1("wrsmdputdqe: done");
D2("wrsmdputdqe: end shdwdqw_i 0x%p shdwdqw_o 0x%p shdwdqw_f 0x%p "
"shdwdqw_l 0x%p",
(void *)rd->rd_shdwdqw_i,
(void *)rd->rd_shdwdqw_o,
(void *)rd->rd_shdwdqw_f,
(void *)rd->rd_shdwdqw_l);
}
/*
* Flush any queued DQEs from local shadow copy to the remote system.
*/
static void
{
int stat = RSM_SUCCESS;
int any_transfers = 0;
int old_error_flag;
D1("wrsmdsyncdqe: rd 0x%p (addr %ld ctlr %d) index %ld to %ld",
sizeof (*(rd->rd_shdwdqw_o)),
sizeof (*(rd->rd_shdwdqw_i)));
D2("wrsmdsyncdqe: shdwdqw_i 0x%p shdwdqw_o 0x%p shdwdqw_f 0x%p "
"shdwdqw_l 0x%p",
(void *)rd->rd_shdwdqw_i,
(void *)rd->rd_shdwdqw_o,
(void *)rd->rd_shdwdqw_f,
(void *)rd->rd_shdwdqw_l);
/* If nothing's queued, nothing to do */
D1("wrsmdsyncdqe: no work, done");
return;
}
/* If network down, nothing to do either */
D1("wrsmdsyncdqe: stopq on, done");
return;
}
/*
* Send each element in the queue separately. Since each
* element is WRSMD_CACHELINE_SIZE, then we know that
* each PUT is atomic, and we won't corrupt the remote
* side's free queue if we get a transient error and retry.
*
* (see explination in wrsmdsyncfqe() )
*
* This is needed on the DQ as well as the FQ because even though
* DQ sends an interrupt when it's complete, there is a small window
* of opportunity if the local side adds more DQ's with the same
* sequence number while the remote side is still consuming them.
*
*/
/* Set the sequence number for this DQE */
/*
* Set the offset to transfer one dqe at a time.
*
* NOTE: We already know we are aligned because
* we allocated the remote buffer on a WRSMD_CACHELINE_SIZE'd
* boundary, and each dqe is WRSMD_CACHELINE_SIZE long.
*/
(char *)rd->rd_shdwdqw_f;
/* Push FQE to remote side */
if (stat == RSM_SUCCESS) {
/* Write was sucessful */
/*
* Wrap, and update sequence number if
* this the is the last dqe
*/
rd->rd_dqw_seq++;
if (rd->rd_dqw_seq == 0)
rd->rd_dqw_seq++;
} else {
rd->rd_shdwdqw_o ++;
}
rd->rd_shdwdqw_errflag = 0;
} else {
wrsmdp->wrsmd_errs++;
if (stat == RSMERR_CONN_ABORTED) {
/* permanent connection loss */
} else {
/*
* Schedule an event to retry. Can't do a
* timeout here because it would require
* calling RSM_PUT from a callback.
*/
(void *)rd);
}
if (!any_transfers)
return;
}
}
/* If error flag was previously set, retry the interrupt */
if (any_transfers || old_error_flag) {
/* send failed */
wrsmdp->wrsmd_errs++;
if (stat == RSMERR_CONN_ABORTED) {
/* permanent connection loss */
} else {
/*
* Schedule an event to retry. Can't do a
* timeout here because it would require
* calling RSM_PUT from a callback.
*/
(void *)rd);
}
} else {
wrsmdp->wrsmd_syncdqes++;
}
}
D1("wrsmdsyncdqe: done");
D2("wrsmdsyncdqe: shdwdqw_i 0x%p shdwdqw_o 0x%p shdwdqw_f 0x%p "
"shdwdqw_l 0x%p",
(void *)rd->rd_shdwdqw_i,
(void *)rd->rd_shdwdqw_o,
(void *)rd->rd_shdwdqw_f,
(void *)rd->rd_shdwdqw_l);
}
/*
* Determine whether there are any available FQEs. If so, return 1; if none
* are available, return 0.
*/
static int
{
if (rd->rd_cached_fqr_cnt) {
D1("wrsmdavailfqe: (cached) returning 1");
return (1);
}
D1("wrsmdavailfqe: returning 1");
return (1);
} else {
D1("wrsmdavailfqe: seq %d, expecting %d, returning 0",
return (0);
}
}
/*
* Attempt to retrieve the next available FQE from the queue. If successful,
* return 1; if none are available, return 0.
*/
static int
int *bufnum) /* Set to number of free buffer, if we got one */
{
/* If we have FQE's cached, return one of those */
if (rd->rd_cached_fqr_cnt) {
D1("wrsmdgetfqe: (cached) returning 1, *bufnum %d",
*bufnum);
return (1);
}
D2("wrsmdgetfqe: start fqr_n 0x%p (index %ld) fqr_f 0x%p fqr_l 0x%p "
"fqr_seq %d",
/* Get next FQE */
/* Is it valid? */
/* Yup, return number */
/* Bump pointer, wrap if needed */
rd->rd_fqr_seq++;
if (rd->rd_fqr_seq == 0)
rd->rd_fqr_seq++;
} else
/* Exercise some paranoia */
D1("wrsmdgetfqe: bogus buffer %d in FQE "
"at 0x%lx (max %d)", *bufnum,
"wrsmdgetfqe: bogus buffer %d in FQE "
"at 0x%lx (max %d)", *bufnum,
return (0);
}
D2("wrsmdgetfqe: end fqr_n 0x%p fqr_f 0x%p fqr_l 0x%p "
return (1);
} else {
D1("wrsmdgetfqe: seq %d, expecting %d, returning 0",
D2("wrsmdgetfqe: end fqr_n 0x%p fqr_f 0x%p fqr_l 0x%p "
return (0);
}
}
/*
* Unget an FQE, making it available to be gotten again. Unlike the C
* ungetc, there is guaranteed to be enough buffering to unget all of the
* FQE's that the remote system can have available. (We do this if we
* get an FQE and then later find that we can't transmit the packet we wanted
* to use it for; for example, if we can't get DMA resources.)
*/
static void
int bufnum) /* Number of buffer we want to save for later */
{
D1("wrsmdungetfqe: done");
}
/*
* Attempt to retrieve the next available DQE from the queue. If successful,
* return 1; if none are available, return 0.
*/
static int
int *bufnum, /* Buffer number, set if we find a DQE */
int *offset, /* Packet offset, set if we find a DQE */
int *length, /* Packet length, set if we find a DQE */
{
D2("wrsmdgetdqe: dqr_n 0x%p (index %ld) dqr_f 0x%p dqr_l 0x%p seq %d",
/* Get next DQE */
/* Is it valid? */
/* Exercise some paranoia */
D1("wrsmdgetdqe: bogus buffer %d "
"in DQE at 0x%p (max %d)", *bufnum,
"in DQE at 0x%p (max %d)", *bufnum,
return (0);
}
if (*offset > WRSMD_CACHELINE_OFFSET) {
D1("wrsmdgetdqe: bogus offset %d "
"in DQE at 0x%lx (max %d)", *offset,
"in DQE at 0x%lx (max %d)", *offset,
return (0);
}
D1("wrsmdgetdqe: bogus "
"offset+length %d+%d in DQE at 0x%lx (max %d)",
rd->rd_lbuflen);
"offset+length %d+%d in DQE at 0x%lx (max %d)",
rd->rd_lbuflen);
return (0);
}
rd->rd_dqr_seq++;
if (rd->rd_dqr_seq == 0)
rd->rd_dqr_seq++;
} else
D1("wrsmdgetdqe: returning 1, *bufnum %d, *offset %d, "
return (1);
} else {
D1("wrsmdgetdqe: seq %d, expecting %d, returning 0",
return (0);
}
}
/*
* We've tried to get an FQE and failed. Set this destination up to retry
* on a timeout. Destination's state must be INPROGRESS.
*/
static void
{
/* Do exponential backoff */
if (rd->rd_tmo_int <= 0) {
rd->rd_tmo_int =
rd->rd_tmo_tot = 0;
} else {
}
/* If we've waited too long, dump queue and drop connection */
/*
* If lostconn() couldn't go to S_DELETE directly,
* then this movestate call will get it noticed.
*/
return;
}
/* Clip timeout to maximum */
/*
* Only schedule a timeout if there isn't one already.
* We hold rd_xmit_lock, so if the timeout has fired,
* it's blocked on the lock.
*/
if (rd->rd_fqe_tmo_id == 0)
}
/*
* ****************************************************************
* *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N COMMUNICATION *
* *
* ****************************************************************
*/
/*
* Attempt to send one or more packets, currently queued on a destination
* structure, to their actual destination. Destination's state must be
* INPROGRESS.
*
* This function gets called holding rd_xmit_lock.
* This code assumes implicit barriers for puts and gets.
*/
static void
{
int bufnum;
int write_err;
int pkts_queued = 0;
D1("wrsmdxfer: rd 0x%p (addr %ld ctlr %d)",
do {
int retries;
/*
* Try to get an FQE. If we can't, and this is the
* first packet we've tried to send on this
* invocation of wrsmdxfer then set up a timeout to
* try again. If we've successfully prepped some
* packets for sending, then go ahead and finish
* the job, on the theory that when they're done
* there may be more FQEs available.
*/
if (pkts_queued == 0) {
D1("wrsmdxfer: no FQEs, start timeout, "
"done");
"wrsmdxfer end; failure noFQEs",
return;
} else
break;
}
/* Take packet off the queue. */
rd->rd_queue_len--;
/*
* Adjust the start pointer and packet length so
* we're copying to and from a 64 byte aligned
* address, if it'll fit in the buffer that way.
* (Note -- this means we may actually be copying
* data that doesn't belong to us!!!)
*/
start_offset = (uint_t)
end_offset = (uint_t)
rd->rd_rbuflen) {
start_offset = 0;
}
rd->rd_rbuflen);
D6("wrsmdxfer: srcaddr 0x%p endaddr 0x%p "
"start_offset 0x%x end_offset 0x%x pktlen 0x%x",
end_offset, pktlen);
"wrsmdxfer XFERstart",
/* Do the packet copy, check for errors. */
WRSMD_CACHELINE_OFFSET) == 0);
write_err = ~RSM_SUCCESS;
retries--) {
D6("wrsmdxfer: put 0x%x bytes at "
"segoffset 0x%lx from addr 0x%p",
(void *)(srcaddr - start_offset));
if (write_err != RSM_SUCCESS)
wrsmdp->wrsmd_errs++;
if (write_err == RSMERR_CONN_ABORTED)
break;
}
if (write_err != RSM_SUCCESS) {
wrsmdp->wrsmd_oerrors++;
"RSMPI", "wrsmdxfer XFERend",
D1("wrsmdxfer: RSM_PUT failed error %d",
if (write_err == RSMERR_CONN_ABORTED) {
return;
}
} else {
/*
* Ditch the spent packet, send a DQE,
* adjust stats.
*/
sap);
pkts_queued++;
if (pkts_queued ==
pkts_queued = 0;
}
wrsmdp->wrsmd_opackets++;
"wrsmdxfer XFERend", tnf_string,
completed, "");
}
}
/*
* We've prepped all the packets we're going to, now finish
* up.
*/
if (pkts_queued) {
}
/*
* If there are more packets to send, and FQE's have become
* available during wrsmdsyncdqe(), try sending them now.
*/
/*
* We weren't able to send all packets.
* Schedule a timeout to retry sending them.
*/
D1("wrsmdxfer: no FQEs, start timeout, done");
"wrsmdxfer end; failure noFQEs",
} else {
}
wrsmdp->wrsmd_xfers++;
D1("wrsmdxfer: done");
}
/*
* Send a message to a remote system. Returns the sequence number of the
* message if one was successfully sent, or -1 if the caller needs to retry
* later. The special message type WRSMD_REXMIT causes us to retransmit the
* last message we sent (unsuccessfully or successfully), without
* incrementing the sequence number. This cannot be called from an
* interrupt.
*/
static int
{
int status;
if (msg_type == WRSMD_REXMIT) {
if (rd->rsm_previous_msg_valid) {
} else {
return (-2);
}
} else {
sizeof (rd->rsm_previous_msg));
/* rd_nseq is a ushort, and will wrap when it gets too big. */
}
/*
* Send fails immediately if message can't be queued.
* On Wildcat, message is sent as soon as it is queued, so
* network failures are reported immediately.
*/
#ifdef DEBUG
if (status != RSM_SUCCESS) {
} else {
D2("wrsmdsendmsg: succeeded\n");
}
#endif
return (status);
}
/*
* ****************************************************************
* *
* E N D COMMUNICATION *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N TIMEOUT-FUNCTIONS *
* *
* ****************************************************************
*/
/*
* Timeout functions
*/
/*
* FQE timeout expired. Try sending any packets that were waiting
* for Free Queue Entries.
*/
static void
wrsmdfqetmo(void * arg)
{
/*
* This mutex doesn't really protect a particular data item in this
* case, it just keeps the movestate from running while we have the
* state messed up in wrsmdmsghdlr_syncdqe(). See that routine for
* more explanation.
*/
rd->rd_fqe_tmo_id = 0;
D1("wrsmdfqetmo: done");
}
/*
* Connect retransmit backoff timer has expired. Retransmit the connect
* request.
*/
static void
wrsmdsconntmo(void * arg)
{
/*
* If the timeout was cancelled, the state will have also change
* from W_SCONNTMO, and wrsmdmovestate() will have no effect.
*/
D1("wrsmdsconntmo: done");
}
/*
* Timer to wait for ACCEPT message from remote has expired.
* This indicates the remote side is not reachable, so tear
* down the WRSMD device (controller).
*/
static void
wrsmdaccepttmo(void * arg)
{
/*
* If the timeout was cancelled, the state will have also changed
* from W_ACCEPT, and wrsmdmovestate() will have no effect.
*/
D1("wrsmdaccepttmo: done");
}
/*
* Timer to wait for ACK message from remote expired.
* This indicates the remote side is not reachable, so tear
* down the WRSMD device (controller).
*/
static void
wrsmdacktmo(void * arg)
{
/*
* If the timeout was cancelled, the state will have also changed
* from W_ACK, and wrsmdmovestate() will have no effect.
*/
D1("wrsmdacktmo: done");
}
/*
* Timer to teardown the WRSMD device (RSM controller) has expired.
* Tear down the connection.
*/
static void
wrsmdteardown_tmo(void * arg)
{
if (wrsmdp->wrsmd_teardown_tmo_id != 0) {
if (wrsmduninit(wrsmdp) != 0) {
/*
* If wrsmduninit() does not complete,
* reschedule timeout to retry later.
*/
} else {
wrsmdp->wrsmd_teardown_tmo_id = 0;
}
}
D1("wrsmdteardown_tmo: done");
}
/*
* ****************************************************************
* *
* E N D TIMEOUT-FUNCTIONS *
* *
* ****************************************************************
*/
/*
* ****************************************************************
* *
* B E G I N EVENT-FUNCTIONS *
* *
* ****************************************************************
*/
/*
* The wrsmd event thread. We can't make RSM_ calls that can block
* from either callbacks (timeouts) or interrupts. Use this thread
* handles freedest and sync events
*/
static void
wrsmd_event_thread(void *arg)
{
callb_generic_cpr, "wrsmd_event_thread");
/* LINTED: E_CONSTANT_CONDITION */
while (1) {
if (wrsmdp->stop_events) {
/*
* CALLB_CPR_EXIT() calls mutex_exit() on the
* lock passed into CALLB_CPR_INIT() above, therefore
* we don't want to call mutex_exit() here. See
*/
thread_exit();
return;
}
}
}
/*
* Helper thread to process events off of the queue. Handles both sync and
* freedest events.
*/
static void
{
case WRSMD_EVT_SYNC:
/* Try sync'ing again */
break;
case WRSMD_EVT_SYNC_DQE:
break;
case WRSMD_EVT_FREEDEST:
break;
default:
break;
}
}
}
/* Allocates and adds an event to the event queue */
static void
{
if (wrsmdp->stop_events) {
return;
}
if (evt) {
} else {
}
}
/*
* ****************************************************************
* *
* E N D EVENT-FUNCTIONS *
* *
* ****************************************************************
*/
#ifdef __lock_lint
void
{
}
void
{
wrsmdbuf_t z;
wrsmdfreebuf(&z);
}
#endif /* __lock_lint */