wrsmd.h revision 3db86aab554edbb4244c8d1a1c90f152eee768af
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* DLPI driver for RSM over Wildcat
*/
#ifndef _SYS_WRSMD_H_
#define _SYS_WRSMD_H_
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/ethernet.h>
/*
* This driver is only supported on sparc systems, so there is no
* need to worry about byte ordering issues.
* However, need to update this version number whenever there is a
* change in the layout of wrsmd remote shared memory that could
* lead to incompatibilities between systems.
*/
#define WRSMD_VERSION (2)
/*
* static limits
*/
#define WRSMD_DUMP_IOCTL (5618)
#define WRSMD_DUMP_DEST (5619)
#define RSM_DLPI_QDEPTH 100
#define RSM_DLPI_QFLAGS (RSM_INTR_SEND_Q_NO_FENCE)
#ifdef _KERNEL
#define WRSMD_CACHELINE_SIZE (0x40)
#define WRSMD_CACHELINE_SHIFT 6
#define WRSMD_CACHELINE_MASK (~WRSMD_CACHELINE_OFFSET)
#define WRSMD_CACHELINE_ROUNDUP(b) \
/*
* Use the same format address as ethernet when interacting with higher
* level modules.
*/
typedef struct dl_rsm_addr {
union {
struct {
unsigned char zeroes[7];
unsigned char addr;
} wrsm;
struct { /* address in ethernet format */
struct ether_addr addr;
} ether;
} m;
/*
* Declarations specific to the medium
*/
/*
* Definitions for module_info.
*/
#define WRSMDMINPSZ (0) /* min packet size */
/*
* Driver parameters, from .conf file
*/
struct wrsmd_param {
/* Size of packet buffers (must be multiple of 64 bytes) */
/* Mask of base ID bits in IP address */
/* Number of packet buffers exported to each communicating peer */
/* Size of communications queues (must be at least wrsmd_buffers) */
/* Number of buffers which won't be loaned upstream */
/* Time to reclaim idle connection after (in seconds) UNIMPLEMENTED */
/* Number of retries after a read or write error */
/* Maximum # of queue packets per destination */
/* Initial FQE timeout interval */
int wrsmd_nobuf_init_tmo;
/* Maximum FQE timeout interval */
int wrsmd_nobuf_max_tmo;
/* Time after which we drop packets instead of doing FQE timeout */
/* Initial message timeout interval */
int wrsmd_msg_init_tmo;
/* Maximum message timeout interval */
int wrsmd_msg_max_tmo;
/* Time after which we drop connection instead of doing msg timeout */
/* Acknowledgment timeout interval */
int wrsmd_ack_tmo;
/* Queue element sync timeout interval */
/*
* timeout interval to wait before tearing down connection
* after last attach to device is removed.
*/
/* Number of packets to try and batch up in one transmission. */
/* Number of free buffers to try and batch up in one transmission. */
};
/*
* Defaults and limits for parameters
* Timeout parameter values now given in milliseconds,
* rather than ticks. Any values modified in wrsmd.conf
* must now be in milliseconds. Values get rounded up to
* the next tick value, with granularity 10 ms for the default
* 100 hz.
*/
#define WRSMD_BUFFERS_DFLT 32
#define WRSMD_BUFFER_SIZE_DFLT 16384
#define WRSMD_QUEUE_SIZE_DFLT 64
#define WRSMD_BUFFERS_RETAINED_DFLT 32
#define WRSMD_IDLE_RECLAIM_TIME_DFLT 36000
#define WRSMD_ERR_RETRIES_DFLT 1
#define WRSMD_MAX_QUEUED_PKTS_DFLT 100
#define WRSMD_NOBUF_INIT_TMO_DFLT 10
#define WRSMD_NOBUF_MAX_TMO_DFLT 2560
#define WRSMD_NOBUF_DROP_TMO_DFLT 5000
#define WRSMD_MSG_INIT_TMO_DFLT 10
#define WRSMD_MSG_MAX_TMO_DFLT 1280
#define WRSMD_MSG_DROP_TMO_DFLT 30000
#define WRSMD_ACK_TMO_DFLT 1000
#define WRSMD_SYNC_TMO_DFLT 10
/*
* We set this to two clock ticks to allow free destination timeouts
* (<= 1 tick) to complete first, before next teardown timeout,
* allowing fewer iterations of the latter.
*/
#define WRSMD_TEARDOWN_TMO_DFLT 20
#define WRSMD_TRAIN_SIZE_DFLT 8
#define WRSMD_FQE_SYNC_SIZE_DFLT 16
/*
* Macro to convert millisecond timeout parameters to clock ticks.
*/
/* Definition of each possible event type */
#define WRSMD_EVT_FREEDEST 0
#define WRSMD_EVT_SYNC 1
#define WRSMD_EVT_SYNC_DQE 2
/*
* Per-Stream instance state information.
*
* Each instance is dynamically allocated at open() and freed at
* close(). Each per-stream instance points to at most one per-device
* structure using the ss_wrsmdp field. All instances are threaded
* together into one list of active instances ordered on minor device
* number.
*/
typedef struct wrsmdstr {
} wrsmdstr_t;
/*
* For performance reasons, we read the following things in wrsmdsendup()
* without getting ss_lock. As long as accesses to these variables are atomic,
* we believe nothing bad will happen.
*/
/* Per-stream flags */
typedef struct wrsmd_event {
struct wrsmd_event *next;
int type;
void *arg;
/*
* Per-Device instance state information.
*
* Each instance is dynamically allocated on first attach.
*/
typedef struct wrsmd {
/* matter which one, to run wsrv */
/* Counters to keep stats for netstat support */
/* Other counters, for internal use */
int wrsmd_numdest; /* Number of valid entries in desttbl */
struct wrsmd_dest /* table for destination structures */
/* Event thread for making RSM calles from non callback context */
} wrsmd_t;
/* RSMPI progress flags */
#define WRSMDREGHANDLER 0x01
#define WRSMDGOTCTLR 0x02
/* Attach progress bitmask */
#define WRSMD_ATT_MUTEX 0x01
#define WRSMD_ATT_LINKED 0x02
#define WRSMD_ATT_MINOR 0x04
#define WRSMD_ATT_KSTAT 0x08
#define WRSMD_ATT_EVT_THREAD 0x10
#define WRSMD_ATT_ALL \
/*
* Number of bytes to add to buffer size to leave room for
* headers from other streams modules:
*
* TCP Header is 14 bytes
* IP Header is 20 bytes
*/
#define WRSMDHEADROOM 34
/*
* Full dlsap address format
*/
typedef struct wrsmddladdr {
/*
* Full DLSAP address length
*/
#define WRSMD_BCAST_ADDRL (sizeof (struct ether_addr))
#define DLADDRL (80)
/*
* Export some of the error counters via the kstats mechanism.
*/
typedef struct wrsmd_stat {
struct kstat_named rsm_ipackets;
struct kstat_named rsm_ipackets64;
struct kstat_named rsm_ierrors;
struct kstat_named rsm_opackets;
struct kstat_named rsm_opackets64;
struct kstat_named rsm_oerrors;
struct kstat_named rsm_collisions;
struct kstat_named rsm_xfers;
struct kstat_named rsm_xfer_pkts;
struct kstat_named rsm_syncdqes;
struct kstat_named rsm_lbufs;
struct kstat_named rsm_nlbufs;
struct kstat_named rsm_pullup;
struct kstat_named rsm_pullup_fail;
struct kstat_named rsm_starts;
struct kstat_named rsm_start_xfers;
struct kstat_named rsm_fqetmo_hint;
struct kstat_named rsm_fqetmo_drops;
struct kstat_named rsm_maxq_drops;
struct kstat_named rsm_errs;
struct kstat_named rsm_in_bytes;
struct kstat_named rsm_in_bytes64;
struct kstat_named rsm_out_bytes;
struct kstat_named rsm_out_bytes64;
} wrsmd_stat_t;
/* Some streams defines */
/*
* On Wildcat, if there is a data delivery problem with one of the 32 byte
* halves of a 64 byte write to the remote node, the remote side writes all
* 0's to that 32 byte region of memory. We guarantee that the 4 byte fqe
* entries and 8 byte byte dqe entries (described below) are aligned in a
* way that guarantees that each fits within a single 32 byte region, so
* checking for any non-zero value within the entry is sufficient to
* guarantee that the write was successful.
*
* We use the seqnum as the write validity check, which means it must never
* be 0. A non-0 value ensures that the remote write was successful.
*
* Each fqe and dqe is 64 bytes in size. This guarantees that
* we can write one entry at a time atomically, without disturbing any other
* entries. This also quarantees alignment to wildcat hardware. It does,
* however, waste some space.
*
*/
struct align_64byte { /* Align to 64 bytes */
};
/*
* Delivery Queue Entry, used to denote buffers containing new packets.
*/
typedef union wrsmd_dqe {
struct wrsmd_dqe_s { /* actual structure */
} s;
} wrsmd_dqe_t;
/*
* Free Queue Entry, used to denote buffers which are available to be filled.
*/
typedef union wrsmd_fqe {
struct wrsmd_fqe_s {
} s;
} wrsmd_fqe_t;
/*
* Segment data formats
*/
/*
* The major version should be bumped whenever the contents of the
* xfer segments are changed in a non-upward-compatible way, to prevent
* confused attempts at communication with machines running older protocol
* versions.
*/
#define WRSMD_VERS_MAJOR 1
#define WRSMD_VERS_MINOR 0
/*
* Header for the data transfer segment.
*/
typedef struct wrsmd_xfer_hdr {
/*
* Structure describing a loaned-up buffer
*/
typedef struct wrsmdbuf {
int rb_bufnum; /* Number of loaned buffer */
} wrsmdbuf_t;
/*
* Structure describing a packet which is currently being sent
*/
typedef struct wrsmd_pkt {
} wrsmd_pkt_t;
/*
* WRSMD message types
*/
#define WRSMD_MSG_REQ_CONNECT 1
#define WRSMD_MSG_CON_ACCEPT 2
#define WRSMD_MSG_CON_ACK 3
#define WRSMD_MSG_SYNC_DQE 4
#define WRSMD_REXMIT 127
/*
*
* R S M D C O N N E C T I O N P R O T O C O L
*
*
* The connection protocol for the RSM DLPI driver is a follows:
*
* INITIATOR RESPONDER
*
* 1 Send RSDM_REQ_CONNECT
* Includes xfer segment ID
*
* 2 Send WRSMD_CON_ACCEPT
* Includes xfer segment ID
*
* 3 Send WRSMD_CON_ACK
*
* If an WRSMD_REQ_CONNECT message is received while an
* WRSMD_REQ_CONNECT is outstanding to the same node ID: if the
* node receiving the duplicate WRSMD_REQ_CONNECT has a higher
* numbered ID, it will accept the connection. The lower numbered
* node will reject the duplicate.
*
* The special message type WRSMD_REXMIT causes us to retransmit the
* last message we sent (unsuccessfully or successfully), without
* incrementing the sequence number on the message. This is used when
* we get a timeout waiting for a response to an WRSMDM_REQ_CONNECT
* request and want to resend it.
*
*/
typedef struct wrsmd_msg_header {
typedef struct wrsmd_con_request {
typedef struct wrsmd_con_accept {
typedef struct wrsmd_con_ack {
typedef struct wrsmd_syncdqe {
typedef union wrsmd_msg {
struct {
union {
} m;
} p;
} wrsmd_msg_t;
/*
* Structure describing someone else communicating with us (a destination)
*/
typedef struct wrsmd_dest {
/* Basics */
/* Interrupt queue */
/* Packet queue */
/* Local transfer segment */
/* Remote transfer segment */
int rd_rxferhdr_valid;
/*
* Free queue we're writing to (describing buffers on our node
* available to partner; lives on partner)
*/
/*
* Delivery queue that we're writing to (describing buffers on
* partner that we've filled with data; lives on partner)
*/
/* Buffers (on partner) that we're writing to */
/*
* Free queue we're reading from (describing buffers on partner
* available to us; lives on our node)
*/
volatile wrsmd_fqe_t /* Pointers to ... */
*rd_fqr_f, /* First usable element in queue */
*rd_fqr_l, /* Last usable element in queue */
*rd_fqr_n; /* Element we'll read next */
/*
* Delivery queue we're reading from (describing buffers on our
* node that the partner has filled with data; lives on our node)
*/
volatile wrsmd_dqe_t /* Pointers to ... */
*rd_dqr_f, /* First usable element in queue */
*rd_dqr_l, /* Last usable element in queue */
*rd_dqr_n; /* Element we'll read next */
/* (Local) buffers we're reading from */
volatile void *rd_lbuf; /* Start of first local buffer */
/* for use in loaning buffers upward */
/* Information on cached FQE's */
/*
* Shadow free queue - local copy of free queue that lives on
* partner
*/
wrsmd_fqe_t /* Pointers to ... */
*rd_shdwfqw_f_addr, /* Start of alloc'd memory for queue */
*rd_shdwfqw_f, /* First usable element */
*rd_shdwfqw_l, /* Last usable element */
*rd_shdwfqw_i, /* Next element added to queue goes here */
*rd_shdwfqw_o; /* Next element transmitted comes from here */
/*
* Shadow delivery queue - local copy of delivery queue that lives
* on partner
*/
wrsmd_dqe_t /* Pointers to ... */
*rd_shdwdqw_f_addr, /* Start of alloc'd memory for queue */
*rd_shdwdqw_f, /* First usable element */
*rd_shdwdqw_l, /* Last usable element */
*rd_shdwdqw_i, /* Next element added to queue goes here */
*rd_shdwdqw_o; /* Next element transmitted comes from here */
/* time we tried to write an FQE */
/* time we tried to write a DQE */
/* sync queues, the network is down */
/* State information */
short rd_refcnt; /* Destination reference count */
/* Last message transmitted, for rexmits if needed */
/* Timeout information */
int rd_tmo_int; /* backoff interval for timeout */
int rd_tmo_tot; /* ticks we've waited so far this timeout */
/* waiting for rd_nlb to go to 0 */
} wrsmd_dest_t;
/*
* Run queue:
*
* Certain operations on destinations are performed by the driver's write
* service routine (wrsmd_wsrv). In order to arrange for this, there is a
* queue of destinations waiting to be processed by the service routine.
* Each device's wrsmd_runq points to the head of this queue of destinations,
* which are linked together via rd_next. Whenever the service routine
* runs, after it has served its usual purpose of processing messages from
* the stream's service queue, it traverses its list of destinations and
* performs appropriate operations on them, depending on their state.
*
* The rd_next pointer is protected by the runq_lock everywhere but in the
* middle of the service routine. Essentially, the service routine takes a
* whole chain of destination entries off of the run queue at once (inside
* the runq_lock), and then traverses the list (outside the runq_lock). Since
* a scheduled destination should never be given a new state except by the
* service routine, there should be no conflicting updates to rd_next.
*
* Destination states:
*
* A scheduled state means the destination is on the run queue; an unscheduled
* state means the destination is not. State transitions are always from
* scheduled to unscheduled or vice versa.
*
* A state with a name of the form WRSMD_STATE_S_xxx is a scheduled state where
* the service routine is going to do xxx next. These states have odd numbers.
*
* A state with a name of the form WRSMD_STATE_W_xxx is an unscheduled state
* where we are waiting for xxx to happen. These states have even numbers.
*/
#define WRSMD_STATE_NEW 0 /* Newly created */
#define WRSMD_SCHED_STATE(s) ((s) & 1)
#define WRSMD_STATE_STR(x) ( \
(x == WRSMD_STATE_NEW) ? "WRSMD_STATE_NEW" : \
(x == WRSMD_STATE_INPROGRESS) ? "WRSMD_STATE_INPROGRESS" : \
(x == WRSMD_STATE_DELETING) ? "WRSMD_STATE_DELETING" : \
(x == WRSMD_STATE_W_SCONNTMO) ? "WRSMD_STATE_W_SCONNTMO" : \
(x == WRSMD_STATE_W_ACCEPT) ? "WRSMD_STATE_W_ACCEPT" : \
(x == WRSMD_STATE_W_ACK) ? "WRSMD_STATE_W_ACK" : \
(x == WRSMD_STATE_W_READY) ? "WRSMD_STATE_W_READY" : \
(x == WRSMD_STATE_W_FQE) ? "WRSMD_STATE_W_FQE" : \
(x == WRSMD_STATE_S_REQ_CONNECT) ? "WRSMD_STATE_S_REQ_CONNECT" :\
(x == WRSMD_STATE_S_NEWCONN) ? "WRSMD_STATE_S_NEWCONN" : \
(x == WRSMD_STATE_S_CONNXFER_ACCEPT) ? \
"WRSMD_STATE_S_CONNXFER_ACCEPT" : \
(x == WRSMD_STATE_S_CONNXFER_ACK) ? "WRSMD_STATE_S_CONNXFER_ACK" : \
(x == WRSMD_STATE_S_XFER) ? "WRSMD_STATE_S_XFER" : \
(x == WRSMD_STATE_S_DELETE) ? "WRSMD_STATE_S_DELETE" : \
(x == WRSMD_STATE_S_SCONN) ? "WRSMD_STATE_S_SCONN" : \
"unknown")
/*
* keep a bitmask of the ones we've done, so that when we delete a
* destination we don't try and undo something we never did. Also, we
* sometimes check to make sure rd_sstate is WRSMD_RSMS_ALL before trying to
* perform an operation on a destination, to ensure we don't get ahead of
* our initialization.
*/
#define WRSMD_RSMS_ALL \
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_WRSMD_H_ */