ibd.h revision 1cfa752f4e24c34133009b0f6c139127a5c461de
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_IB_CLIENTS_IBD_H
#define _SYS_IB_CLIENTS_IBD_H
#ifdef __cplusplus
extern "C" {
#endif
/* The following macros are used in both ibd.c and ibd_cm.c */
/*
* Completion queue polling control
*/
#define IBD_CQ_POLLING 0x1
#define IBD_REDO_CQ_POLLING 0x2
/*
* Maximum length for returning chained mps back to crossbow.
* Also used as the maximum number of rx wc's polled at a time.
*/
#define IBD_MAX_RX_MP_LEN 16
/*
* When doing multiple-send-wr, this value determines how many to do at
* a time (in a single ibt_post_send).
*/
#define IBD_MAX_TX_POST_MULTIPLE 4
/*
* Flag bits for resources to reap
*/
#define IBD_RSRC_SWQE 0x1
#define IBD_RSRC_LSOBUF 0x2
#define IBD_RSRC_RC_SWQE 0x4
#define IBD_RSRC_RC_TX_LARGEBUF 0x8
/*
* Async operation types
*/
#define IBD_ASYNC_GETAH 1
#define IBD_ASYNC_JOIN 2
#define IBD_ASYNC_LEAVE 3
#define IBD_ASYNC_PROMON 4
#define IBD_ASYNC_PROMOFF 5
#define IBD_ASYNC_REAP 6
#define IBD_ASYNC_TRAP 7
#define IBD_ASYNC_SCHED 8
#define IBD_ASYNC_LINK 9
#define IBD_ASYNC_EXIT 10
#define IBD_ASYNC_RC_TOO_BIG 11
#define IBD_ASYNC_RC_CLOSE_ACT_CHAN 12
#define IBD_ASYNC_RC_RECYCLE_ACE 13
/*
* Miscellaneous constants
*/
#define IBD_SEND 0
#define IBD_RECV 1
/* Tunables defaults and limits */
#define IBD_LINK_MODE_UD 0
#define IBD_LINK_MODE_RC 1
#define IBD_DEF_LINK_MODE IBD_LINK_MODE_RC
#define IBD_DEF_LSO_POLICY B_TRUE
#define IBD_DEF_NUM_LSO_BUFS 1024
#define IBD_DEF_CREATE_BCAST_GROUP B_TRUE
#define IBD_DEF_COALESCE_COMPLETIONS B_TRUE
#define IBD_DEF_UD_RX_COMP_COUNT 4
#define IBD_DEF_UD_RX_COMP_USEC 10
#define IBD_DEF_UD_TX_COMP_COUNT 16
#define IBD_DEF_UD_TX_COMP_USEC 300
#define IBD_DEF_RC_RX_COMP_COUNT 4
#define IBD_DEF_RC_RX_COMP_USEC 10
#define IBD_DEF_RC_TX_COMP_COUNT 10
#define IBD_DEF_RC_TX_COMP_USEC 300
#define IBD_DEF_UD_TX_COPY_THRESH 4096
#define IBD_DEF_RC_RX_COPY_THRESH 4096
#define IBD_DEF_RC_TX_COPY_THRESH 4096
#define IBD_DEF_UD_NUM_RWQE 4000
#define IBD_DEF_UD_NUM_SWQE 4000
#define IBD_DEF_RC_ENABLE_SRQ B_TRUE
#define IBD_DEF_RC_NUM_RWQE 2047
#define IBD_DEF_RC_NUM_SWQE 511
#define IBD_DEF_NUM_AH 256
#define IBD_DEF_HASH_SIZE 32
#define IBD_DEF_RC_NUM_SRQ (IBD_DEF_RC_NUM_RWQE - 1)
#define IBD_DEF_RC_RX_RWQE_THRESH (IBD_DEF_RC_NUM_RWQE >> 2)
/* Tunable limits */
#define IBD_MIN_NUM_LSO_BUFS 512
#define IBD_MAX_NUM_LSO_BUFS 4096
#define IBD_MIN_UD_TX_COPY_THRESH 2048
#define IBD_MAX_UD_TX_COPY_THRESH 65536
#define IBD_MIN_UD_NUM_SWQE 512
#define IBD_MAX_UD_NUM_SWQE 8000
#define IBD_MIN_UD_NUM_RWQE 512
#define IBD_MAX_UD_NUM_RWQE 8000
#define IBD_MIN_NUM_AH 32
#define IBD_MAX_NUM_AH 8192
#define IBD_MIN_HASH_SIZE 32
#define IBD_MAX_HASH_SIZE 1024
#define IBD_MIN_RC_NUM_SWQE 511
#define IBD_MAX_RC_NUM_SWQE 8000
#define IBD_MIN_RC_NUM_RWQE 511
#define IBD_MAX_RC_NUM_RWQE 8000
#define IBD_MIN_RC_RX_COPY_THRESH 1500
#define IBD_MAX_RC_RX_COPY_THRESH 65520
#define IBD_MIN_RC_TX_COPY_THRESH 1500
#define IBD_MAX_RC_TX_COPY_THRESH 65520
#define IBD_MIN_RC_NUM_SRQ (IBD_MIN_RC_NUM_RWQE - 1)
#define IBD_MIN_RC_RX_RWQE_THRESH (IBD_MIN_RC_NUM_RWQE >> 2)
/*
* Thresholds
*
* When waiting for resources (swqes or lso buffers) to become available,
* the first two thresholds below determine how long to wait before informing
* the network layer to start sending packets again. The IBD_TX_POLL_THRESH
* determines how low the available swqes should go before we start polling
* the completion queue.
*/
#define IBD_FREE_LSOS_THRESH 8
#define IBD_FREE_SWQES_THRESH 20
#define IBD_TX_POLL_THRESH 80
#ifdef DEBUG
void debug_print(int l, char *fmt, ...);
#define DPRINT debug_print
#else
#define DPRINT 0 &&
#endif
/*
* AH and MCE active list manipulation:
*
* Multicast disable requests and MCG delete traps are two cases
* where the active AH entry for the mcg (if any unreferenced one exists)
* will be moved to the free list (to force the next Tx to the mcg to
* join the MCG in SendOnly mode). Port up handling will also move AHs
* from active to free list.
*
* In the case when some transmits are still pending on an entry
* for an mcg, but a multicast disable has already been issued on the
* mcg, there are some options to consider to preserve the join state
* to ensure the emitted packet is properly routed on the IBA fabric.
* For the AH, we can
* 1. take out of active list at multicast disable time.
* 2. take out of active list only when last pending Tx completes.
* For the MCE, we can
* 3. take out of active list at multicast disable time.
* 4. take out of active list only when last pending Tx completes.
* 5. move from active list to stale list at multicast disable time.
* We choose to use 2,4. We use option 4 so that if a multicast enable
* is tried before the pending Tx completes, the enable code finds the
* mce in the active list and just has to make sure it will not be reaped
* (ie the mcg leave done) when the pending Tx does complete. Alternatively,
* a stale list (#5) that would be checked in the enable code would need
* to be implemented. Option 2 is used, because otherwise, a Tx attempt
* after the multicast disable would try to put an AH in the active list,
* and associate the mce it finds in the active list to this new AH,
* whereas the mce is already associated with the previous AH (taken off
* the active list), and will be removed once the pending Tx's complete
* (unless a reference count on mce's is implemented). One implication of
* using 2,4 is that new Tx's posted before the pending Tx's complete will
* grab new references on the AH, further delaying the leave.
*
* In the case of mcg delete (or create) trap when the port is sendonly
* joined, the AH and MCE handling is different: the AH and MCE has to be
* immediately taken off the active lists (forcing a join and path lookup
* at the next Tx is the only guaranteed means of ensuring a proper Tx
* to an mcg as it is repeatedly created and deleted and goes thru
* reincarnations).
*
* When a port is already sendonly joined, and a multicast enable is
* attempted, the same mce structure is promoted; this ensures only a
* single mce on the active list tracks the most powerful join state.
*
* In the case of port up event handling, the MCE for sendonly membership
* is freed up, and the ACE is put into the free list as soon as possible
* (depending on whether posted Tx's have completed). For fullmembership
* MCE's though, the ACE is similarly handled; but the MCE is kept around
* (a re-JOIN is attempted) only if the DLPI leave has not already been
* done; else the mce is deconstructed (mc_fullreap case).
*
* MCG creation and deletion trap handling:
*
* These traps are unreliable (meaning sometimes the trap might never
* be delivered to the subscribed nodes) and may arrive out-of-order
* since they use UD transport. An alternative to relying on these
* unreliable traps is to poll for mcg presence every so often, but
* instead of doing that, we try to be as conservative as possible
* while handling the traps, and hope that the traps do arrive at
* the subscribed nodes soon. Note that if a node is fullmember
* joined to an mcg, it can not possibly receive a mcg create/delete
* trap for that mcg (by fullmember definition); if it does, it is
* an old trap from a previous incarnation of the mcg.
*
* Whenever a trap is received, the driver cleans up its sendonly
* membership to the group; we choose to do a sendonly leave even
* on a creation trap to handle the case of a prior deletion of the mcg
* having gone unnoticed. Consider an example scenario:
* T1: MCG M is deleted, and fires off deletion trap D1.
* T2: MCG M is recreated, fires off creation trap C1, which is lost.
* T3: Node N tries to transmit to M, joining in sendonly mode.
* T4: MCG M is deleted, and fires off deletion trap D2.
* T5: N receives a deletion trap, but can not distinguish D1 from D2.
* If the trap is D2, then a LEAVE is not required, since the mcg
* is already deleted; but if it is D1, a LEAVE is required. A safe
* approach is to always LEAVE, but the SM may be confused if it
* receives a LEAVE without a prior JOIN.
*
* Management of the non-membership to an mcg is similar to the above,
* except that if the interface is in promiscuous mode, it is required
* to attempt to re-join the mcg after receiving a trap. Unfortunately,
* if the re-join attempt fails (in which case a warning message needs
* to be printed), it is not clear whether it failed due to the mcg not
* existing, or some fabric/hca issues, due to the delayed nature of
* trap delivery. Querying the SA to establish presence/absence of the
* mcg is also racy at best. Thus, the driver just prints a warning
* message when it can not rejoin after receiving a create trap, although
* this might be (on rare occasions) a mis-warning if the create trap is
* received after the mcg was deleted.
*/
/*
* Implementation of atomic "recycle" bits and reference count
* on address handles. This utilizes the fact that max reference
* count on any handle is limited by number of send wqes, thus
* high bits in the ac_ref field can be used as the recycle bits,
* and only the low bits hold the number of pending Tx requests.
* This atomic AH reference counting allows the Tx completion
* handler not to acquire the id_ac_mutex to process every completion,
* thus reducing lock contention problems between completion and
* the Tx path.
*/
#define CYCLEVAL 0x80000
#define CLEAR_REFCYCLE(ace) (ace)->ac_ref = 0
#define CYCLE_SET(ace) (((ace)->ac_ref & CYCLEVAL) == CYCLEVAL)
#define GET_REF(ace) ((ace)->ac_ref)
#define GET_REF_CYCLE(ace) ( \
/* \
* Make sure "cycle" bit is set. \
*/ \
ASSERT(CYCLE_SET(ace)), \
((ace)->ac_ref & ~(CYCLEVAL)) \
)
#define INC_REF(ace, num) { \
atomic_add_32(&(ace)->ac_ref, num); \
}
#define SET_CYCLE_IF_REF(ace) ( \
CYCLE_SET(ace) ? B_TRUE : \
atomic_add_32_nv(&ace->ac_ref, CYCLEVAL) == \
CYCLEVAL ? \
/* \
* Clear the "cycle" bit we just set; \
* ref count known to be 0 from above. \
*/ \
CLEAR_REFCYCLE(ace), B_FALSE : \
/* \
* We set "cycle" bit; let caller know. \
*/ \
B_TRUE \
)
#define DEC_REF_DO_CYCLE(ace) ( \
atomic_dec_32_nv(&ace->ac_ref) == CYCLEVAL ? \
/* \
* Ref count known to be 0 from above. \
*/ \
B_TRUE : \
B_FALSE \
)
/*
* Address handle entries maintained by the driver are kept in the
* free and active lists. Each entry starts out in the free list;
* it migrates to the active list when primed using ibt_get_paths()
* and ibt_modify_ud_dest() for transmission to a specific destination.
* In the active list, the entry has a reference count indicating the
* number of ongoing/uncompleted transmits that reference it. The
* entry is left in the active list even after the reference count
* goes to 0, since successive transmits can find it there and do
* not need to set up another entry (ie the path information is
* cached using the active list). Entries on the active list are
* also hashed using the destination link address as a key for faster
* lookups during transmits.
*
* For any destination address (unicast or multicast, whatever the
* join states), there will be at most one entry in the active list.
* Entries with a 0 reference count on the active list can be reused
* for a transmit to a new destination, if the free list is empty.
*
* The AH free list insertion/deletion is protected with the id_ac_mutex,
* since the async thread and Tx callback handlers insert/delete. The
* active list does not need a lock (all operations are done by the
* async thread) but updates to the reference count are atomically
* done (increments done by Tx path, decrements by the Tx callback handler).
*/
#define IBD_ACACHE_INSERT_FREE(state, ce) \
list_insert_head(&state->id_ah_free, ce)
#define IBD_ACACHE_GET_FREE(state) \
list_get_head(&state->id_ah_free)
#define IBD_ACACHE_INSERT_ACTIVE(state, ce) { \
int _ret_; \
list_insert_head(&state->id_ah_active, ce); \
_ret_ = mod_hash_insert(state->id_ah_active_hash, \
(mod_hash_key_t)&ce->ac_mac, (mod_hash_val_t)ce); \
ASSERT(_ret_ == 0); \
state->id_ac_hot_ace = ce; \
}
#define IBD_ACACHE_PULLOUT_ACTIVE(state, ce) { \
list_remove(&state->id_ah_active, ce); \
if (state->id_ac_hot_ace == ce) \
state->id_ac_hot_ace = NULL; \
(void) mod_hash_remove(state->id_ah_active_hash, \
(mod_hash_key_t)&ce->ac_mac, (mod_hash_val_t)ce); \
}
#define IBD_ACACHE_GET_ACTIVE(state) \
list_get_head(&state->id_ah_active)
/*
* Padding for nd6 Neighbor Solicitation and Advertisement needs to be at
* front of optional src/tgt link layer address. Right now Solaris inserts
* padding by default at the end. The routine which is doing is nce_xmit()
* in ip_ndp.c. It copies the nd_lla_addr after the nd_opt_hdr_t. So when
* the packet comes down from IP layer to the IBD driver, it is in the
* following format: [IPoIB_PTXHDR_T][INET6 packet][ICMP6][OPT_ND_HDR_T]
* This size is 2 bytes followed by [22 bytes of ipoib_machdr]. As a result
* machdr is not 4 byte aligned and had 2 bytes of padding at the end.
*
* The send routine at IBD driver changes this packet as follows:
* [IPoIB_HDR_T][INET6 packet][ICMP6][OPT_ND_HDR_T + 2 bytes of padding]
* followed by [22 bytes of ipoib_machdr] resulting in machdr 4 byte
* aligned.
*
* At the receiving side again ibd_process_rx takes the above packet and
* removes the two bytes of front padding and inserts it at the end. This
* is since the IP layer does not understand padding at the front.
*/
#define IBD_PAD_NSNA(ip6h, len, type) { \
uchar_t *nd_lla_ptr; \
icmp6_t *icmp6; \
nd_opt_hdr_t *opt; \
int i; \
\
icmp6 = (icmp6_t *)&ip6h[1]; \
len -= sizeof (nd_neighbor_advert_t); \
if (((icmp6->icmp6_type == ND_NEIGHBOR_SOLICIT) || \
(icmp6->icmp6_type == ND_NEIGHBOR_ADVERT)) && \
(len != 0)) { \
opt = (nd_opt_hdr_t *)((uint8_t *)ip6h \
+ IPV6_HDR_LEN + sizeof (nd_neighbor_advert_t)); \
ASSERT(opt != NULL); \
nd_lla_ptr = (uchar_t *)&opt[1]; \
if (type == IBD_SEND) { \
for (i = IPOIB_ADDRL; i > 0; i--) \
*(nd_lla_ptr + i + 1) = \
*(nd_lla_ptr + i - 1); \
} else { \
for (i = 0; i < IPOIB_ADDRL; i++) \
*(nd_lla_ptr + i) = \
*(nd_lla_ptr + i + 2); \
} \
*(nd_lla_ptr + i) = 0; \
*(nd_lla_ptr + i + 1) = 0; \
} \
}
/*
* IETF defined IPoIB encapsulation header, with 2b of ethertype
* followed by 2 reserved bytes. This is at the start of the
* datagram sent to and received over the wire by the driver.
*/
typedef struct ipoib_header {
ushort_t ipoib_type;
ushort_t ipoib_mbz;
} ipoib_hdr_t;
#define IPOIB_HDRSIZE sizeof (struct ipoib_header)
/*
* IETF defined IPoIB link address; IBA QPN, followed by GID,
* which has a prefix and suffix, as reported via ARP.
*/
typedef struct ipoib_mac {
uint32_t ipoib_qpn;
uint32_t ipoib_gidpref[2];
uint32_t ipoib_gidsuff[2];
} ipoib_mac_t;
#define IPOIB_ADDRL sizeof (struct ipoib_mac)
/*
* Pseudo header prepended to datagram in DLIOCRAW transmit path
* and when GLD hands the datagram to the gldm_send entry point.
*/
typedef struct ipoib_ptxhdr {
ipoib_mac_t ipoib_dest;
ipoib_hdr_t ipoib_rhdr;
} ipoib_ptxhdr_t;
#define IPOIBDLSAP(p, offset) ((ipoib_ptxhdr_t *)((caddr_t)(p)+offset))
/*
* The pseudo-GRH structure that sits before the data in the
* receive buffer, and is overlaid on top of the real GRH.
* The driver sets the ipoib_vertcflow to 0 if the pseudo-GRH
* does not hold valid information. If it is indicated valid,
* the driver must additionally provide the sender's qpn in
* network byte order in ipoib_sqpn, and not touch the
* remaining parts which were DMA'ed in by the IBA hardware.
*/
typedef struct ipoib_pgrh {
uint32_t ipoib_vertcflow;
uint32_t ipoib_sqpn;
uint32_t ipoib_sgid_pref[2];
uint32_t ipoib_sgid_suff[2];
uint32_t ipoib_dgid_pref[2];
uint32_t ipoib_dgid_suff[2];
} ipoib_pgrh_t;
/*
* The GRH is also dma'ed into recv buffers, thus space needs
* to be allocated for them.
*/
#define IPOIB_GRH_SIZE sizeof (ipoib_pgrh_t)
/* support the RC (reliable connected) mode */
#define IBD_MAC_ADDR_RC 0x80000000
/* support the UC (unreliable connected) mode */
#define IBD_MAC_ADDR_UC 0x40000000
#define IBD_RC_SERVICE_ID 0x100000000000000ULL
/*
* Legacy OFED had used a wrong service ID (one additional zero digit) for
* many years. To interop with legacy OFED, we support this wrong service ID
* here.
*/
#define IBD_RC_SERVICE_ID_OFED_INTEROP 0x1000000000000000ULL
#define IBD_RC_MIN_CQ_SIZE 0x7f
/* Number of ibt_wc_t provided for each RC channel */
#define IBD_RC_MAX_CQ_WC 0x3f
#if defined(_KERNEL) && !defined(_BOOT)
#include <sys/ib/ibtl/ibti.h>
#include <sys/ib/ib_pkt_hdrs.h>
#include <sys/list.h>
#include <sys/mac_provider.h>
#include <sys/mac_ib.h>
#include <sys/modhash.h>
/* State of a reliable connected channel (ibd_rc_chan_t->chan_state) */
typedef enum {
IBD_RC_STATE_INIT = 0,
/* Active side */
IBD_RC_STATE_ACT_REP_RECV, /* reply received */
IBD_RC_STATE_ACT_ESTAB, /* established, ready to send */
IBD_RC_STATE_ACT_REJECT, /* rejected */
/* Someone else is closing this channel, please don't re-close it */
IBD_RC_STATE_ACT_CLOSING,
IBD_RC_STATE_ACT_CLOSED,
IBD_RC_STATE_ACT_ERROR,
/* Passive side */
IBD_RC_STATE_PAS_REQ_RECV, /* request received */
IBD_RC_STATE_PAS_ESTAB, /* established, ready to receive */
IBD_RC_STATE_PAS_REJECT, /* rejected */
IBD_RC_STATE_PAS_CLOSED
} ibd_rc_chan_state_t;
/*
* Structure to encapsulate various types of async requests.
*/
typedef struct ibd_acache_rq {
struct list_node rq_list; /* list of pending work */
int rq_op; /* what operation */
ipoib_mac_t rq_mac;
ib_gid_t rq_gid;
void *rq_ptr;
void *rq_ptr2;
} ibd_req_t;
typedef struct ibd_mcache {
struct list_node mc_list; /* full/non list */
uint8_t mc_jstate;
boolean_t mc_fullreap;
ibt_mcg_info_t mc_info;
ibd_req_t mc_req; /* to queue LEAVE req */
} ibd_mce_t;
typedef struct ibd_acache_s {
struct list_node ac_list; /* free/active list */
ibt_ud_dest_hdl_t ac_dest;
ipoib_mac_t ac_mac;
uint32_t ac_ref;
ibd_mce_t *ac_mce; /* for MCG AHs */
/* For Reliable Connected mode */
struct ibd_rc_chan_s *ac_chan;
/* protect tx_too_big_ongoing */
kmutex_t tx_too_big_mutex;
/* Deal with too big packet */
boolean_t tx_too_big_ongoing;
} ibd_ace_t;
#define IBD_MAX_SQSEG 59
#define IBD_MAX_RQSEG 1
typedef enum {
IBD_WQE_SEND,
IBD_WQE_RECV
} ibd_wqe_type_t;
typedef enum {
IBD_WQE_TXBUF = 1,
IBD_WQE_LSOBUF = 2,
IBD_WQE_MAPPED = 3,
IBD_WQE_RC_COPYBUF = 4
} ibd_wqe_buftype_t;
#ifdef DEBUG
typedef struct ibd_rc_stat_s {
kstat_named_t rc_rcv_trans_byte;
kstat_named_t rc_rcv_trans_pkt;
kstat_named_t rc_rcv_copy_byte;
kstat_named_t rc_rcv_copy_pkt;
kstat_named_t rc_rcv_alloc_fail;
kstat_named_t rc_rcq_invoke;
kstat_named_t rc_rcq_err; /* fail in rcq handler */
kstat_named_t rc_scq_invoke;
kstat_named_t rc_rwqe_short; /* short rwqe */
kstat_named_t rc_xmt_bytes;
/* pkt size <= state->id_rc_tx_copy_thresh */
kstat_named_t rc_xmt_small_pkt;
kstat_named_t rc_xmt_fragmented_pkt;
/* fail in ibt_map_mem_iov() */
kstat_named_t rc_xmt_map_fail_pkt;
/* succ in ibt_map_mem_iov() */
kstat_named_t rc_xmt_map_succ_pkt;
kstat_named_t rc_ace_not_found; /* ace not found */
/* no swqe even after recycle */
kstat_named_t rc_scq_no_swqe;
/* no tx large buf even after recycle */
kstat_named_t rc_scq_no_largebuf;
/* short swqe in ibd_send() */
kstat_named_t rc_swqe_short;
/* call mac_tx_update() when there is enough swqe */
kstat_named_t rc_swqe_mac_update;
/* short large buf in ibd_send() */
kstat_named_t rc_xmt_buf_short;
/* call mac_tx_update() when there is enough Tx large buffers */
kstat_named_t rc_xmt_buf_mac_update;
kstat_named_t rc_conn_succ; /* # of success connect */
kstat_named_t rc_conn_fail; /* # of fail connect */
/* ace->ac_chan == NULL for unicast packet */
kstat_named_t rc_null_conn;
/* not in active established state */
kstat_named_t rc_no_estab_conn;
kstat_named_t rc_act_close; /* call ibd_rc_act_close() */
kstat_named_t rc_pas_close; /* call ibd_rc_pas_close() */
kstat_named_t rc_delay_ace_recycle;
kstat_named_t rc_act_close_simultaneous;
kstat_named_t rc_reset_cnt; /* # of Reset RC channel */
} ibd_rc_stat_t;
#endif
typedef struct ibd_rc_chan_list_s {
/* This mutex protects chan_list and ibd_rc_chan_t.next */
kmutex_t chan_list_mutex;
struct ibd_rc_chan_s *chan_list;
} ibd_rc_chan_list_t;
typedef struct ibd_rc_tx_largebuf_s {
struct ibd_rc_tx_largebuf_s *lb_next;
uint8_t *lb_buf;
} ibd_rc_tx_largebuf_t;
/*
* Pre-registered copybuf used for send and receive
*/
typedef struct ibd_copybuf_s {
ibt_wr_ds_t ic_sgl;
uint8_t *ic_bufaddr;
} ibd_copybuf_t;
typedef struct ibd_wqe_s {
struct ibd_wqe_s *w_next;
ibd_copybuf_t w_copybuf;
mblk_t *im_mblk;
} ibd_wqe_t;
/*
* Send WQE
*/
typedef struct ibd_swqe_s {
ibd_wqe_t w_ibd_swqe;
ibd_wqe_buftype_t w_buftype;
ibt_send_wr_t w_swr;
ibd_ace_t *w_ahandle;
ibt_mi_hdl_t w_mi_hdl;
ibt_wr_ds_t w_sgl[IBD_MAX_SQSEG];
ibd_rc_tx_largebuf_t *w_rc_tx_largebuf;
} ibd_swqe_t;
#define swqe_next w_ibd_swqe.w_next
#define swqe_copybuf w_ibd_swqe.w_copybuf
#define swqe_im_mblk w_ibd_swqe.im_mblk
#define SWQE_TO_WQE(swqe) (ibd_wqe_t *)&((swqe)->w_ibd_swqe)
#define WQE_TO_SWQE(wqe) (ibd_swqe_t *)wqe
/*
* Receive WQE
*/
typedef struct ibd_rwqe_s {
ibd_wqe_t w_ibd_rwqe;
struct ibd_state_s *w_state;
ibt_recv_wr_t w_rwr;
frtn_t w_freemsg_cb;
boolean_t w_freeing_wqe;
struct ibd_rc_chan_s *w_chan;
} ibd_rwqe_t;
#define rwqe_next w_ibd_rwqe.w_next
#define rwqe_copybuf w_ibd_rwqe.w_copybuf
#define rwqe_im_mblk w_ibd_rwqe.im_mblk
#define RWQE_TO_WQE(rwqe) (ibd_wqe_t *)&((rwqe)->w_ibd_rwqe)
#define WQE_TO_RWQE(wqe) (ibd_rwqe_t *)wqe
typedef struct ibd_list_s {
kmutex_t dl_mutex;
ibd_wqe_t *dl_head;
union {
boolean_t pending_sends;
uint32_t bufs_outstanding;
} ustat;
uint32_t dl_cnt;
} ibd_list_t;
#define dl_pending_sends ustat.pending_sends
#define dl_bufs_outstanding ustat.bufs_outstanding
/*
* LSO buffers
*
* Under normal circumstances we should never need to use any buffer
* that's larger than MTU. Unfortunately, IB HCA has limitations
* on the length of SGL that are much smaller than those for regular
* ethernet NICs. Since the network layer doesn't care to limit the
* number of mblk fragments in any send mp chain, we end up having to
* use these larger-than-MTU sized (larger than id_tx_buf_sz actually)
* buffers occasionally.
*/
typedef struct ibd_lsobuf_s {
struct ibd_lsobuf_s *lb_next;
uint8_t *lb_buf;
int lb_isfree;
} ibd_lsobuf_t;
typedef struct ibd_lsobkt_s {
uint8_t *bkt_mem;
ibd_lsobuf_t *bkt_bufl;
ibd_lsobuf_t *bkt_free_head;
ibt_mr_hdl_t bkt_mr_hdl;
ibt_mr_desc_t bkt_mr_desc;
uint_t bkt_nelem;
uint_t bkt_nfree;
} ibd_lsobkt_t;
#define IBD_PORT_DRIVER 0x1
#define IBD_PARTITION_OBJ 0x2
/*
* Posting to a single software rx post queue is contentious,
* so break it out to (multiple) an array of queues.
*
* Try to ensure rx_queue structs fall in different cache lines using a filler.
* Note: the RX_QUEUE_CACHE_LINE needs to change if the struct changes.
*/
#define RX_QUEUE_CACHE_LINE \
(64 - (sizeof (kmutex_t) + sizeof (ibd_wqe_t *) + sizeof (uint_t)))
typedef struct ibd_rx_queue_s {
kmutex_t rx_post_lock;
ibd_wqe_t *rx_head;
uint_t rx_cnt;
uint8_t rx_pad[RX_QUEUE_CACHE_LINE];
} ibd_rx_queue_t;
/*
* This structure maintains information per port per HCA
* (per network interface).
*/
typedef struct ibd_state_s {
uint_t id_type;
dev_info_t *id_dip;
ibt_clnt_hdl_t id_ibt_hdl;
ibt_hca_hdl_t id_hca_hdl;
ibt_pd_hdl_t id_pd_hdl;
kmem_cache_t *id_req_kmc;
ibd_list_t id_tx_rel_list;
uint32_t id_running;
uint32_t id_max_sqseg;
uint32_t id_max_sqseg_hiwm;
ibd_list_t id_tx_list;
ddi_softintr_t id_tx;
uint32_t id_tx_sends;
kmutex_t id_txpost_lock;
ibd_swqe_t *id_tx_head;
ibd_swqe_t *id_tx_tail;
int id_tx_busy;
uint_t id_tx_buf_sz;
uint8_t *id_tx_bufs;
ibd_swqe_t *id_tx_wqes;
ibt_mr_hdl_t id_tx_mr_hdl;
ibt_mr_desc_t id_tx_mr_desc;
kmutex_t id_lso_lock;
ibd_lsobkt_t *id_lso;
kmutex_t id_scq_poll_lock;
int id_scq_poll_busy;
ibt_cq_hdl_t id_scq_hdl;
ibt_wc_t *id_txwcs;
uint32_t id_txwcs_size;
int id_rx_nqueues;
ibd_rx_queue_t *id_rx_queues;
int id_rx_post_queue_index;
uint32_t id_rx_post_active;
ibd_rwqe_t *id_rx_wqes;
uint8_t *id_rx_bufs;
ibt_mr_hdl_t id_rx_mr_hdl;
ibt_mr_desc_t id_rx_mr_desc;
uint_t id_rx_buf_sz;
/*
* id_ud_num_rwqe
* Number of "receive WQE" elements that will be allocated and used
* by ibd. This parameter is limited by the maximum channel size of
* the HCA. Each buffer in the receive wqe will be of MTU size.
*/
uint32_t id_ud_num_rwqe;
ibd_list_t id_rx_list;
ddi_softintr_t id_rx;
uint32_t id_rx_bufs_outstanding_limit;
uint32_t id_rx_allocb;
uint32_t id_rx_allocb_failed;
ibd_list_t id_rx_free_list;
kmutex_t id_rcq_poll_lock;
int id_rcq_poll_busy;
uint32_t id_rxwcs_size;
ibt_wc_t *id_rxwcs;
ibt_cq_hdl_t id_rcq_hdl;
ibt_channel_hdl_t id_chnl_hdl;
ib_pkey_t id_pkey;
uint16_t id_pkix;
uint8_t id_port;
ibt_mcg_info_t *id_mcinfo;
mac_handle_t id_mh;
mac_resource_handle_t id_rh;
ib_gid_t id_sgid;
ib_qpn_t id_qpnum;
ipoib_mac_t id_macaddr;
ib_gid_t id_mgid;
ipoib_mac_t id_bcaddr;
int id_mtu;
uchar_t id_scope;
kmutex_t id_acache_req_lock;
kcondvar_t id_acache_req_cv;
struct list id_req_list;
kt_did_t id_async_thrid;
kmutex_t id_ac_mutex;
ibd_ace_t *id_ac_hot_ace;
struct list id_ah_active;
struct list id_ah_free;
ipoib_mac_t id_ah_addr;
ibd_req_t id_ah_req;
char id_ah_op;
uint64_t id_ah_error;
ibd_ace_t *id_ac_list;
mod_hash_t *id_ah_active_hash;
kmutex_t id_mc_mutex;
struct list id_mc_full;
struct list id_mc_non;
kmutex_t id_trap_lock;
kcondvar_t id_trap_cv;
boolean_t id_trap_stop;
uint32_t id_trap_inprog;
char id_prom_op;
kmutex_t id_sched_lock;
int id_sched_needed;
int id_sched_cnt;
int id_sched_lso_cnt;
kmutex_t id_link_mutex;
link_state_t id_link_state;
uint64_t id_link_speed;
uint64_t id_num_intrs;
uint64_t id_tx_short;
/*
* id_ud_num_swqe
* Number of "send WQE" elements that will be allocated and used by
* ibd. When tuning this parameter, the size of pre-allocated, pre-
* mapped copy buffer in each of these send wqes must be taken into
* account. This copy buffer size is determined by the value of
* IBD_TX_BUF_SZ (this is currently set to the same value of
* ibd_tx_copy_thresh, but may be changed independently if needed).
*/
uint32_t id_ud_num_swqe;
uint64_t id_xmt_bytes;
uint64_t id_rcv_bytes;
uint64_t id_multi_xmt;
uint64_t id_brd_xmt;
uint64_t id_multi_rcv;
uint64_t id_brd_rcv;
uint64_t id_xmt_pkt;
uint64_t id_rcv_pkt;
uint32_t id_hwcksum_capab;
boolean_t id_lso_policy;
boolean_t id_lso_capable;
uint_t id_lso_maxlen;
int id_hca_res_lkey_capab;
ibt_lkey_t id_res_lkey;
boolean_t id_bgroup_created;
kmutex_t id_macst_lock;
kcondvar_t id_macst_cv;
uint32_t id_mac_state;
/* For Reliable Connected Mode */
boolean_t id_enable_rc;
boolean_t rc_enable_srq;
int rc_mtu;
uint32_t rc_tx_max_sqseg;
/*
* In IPoIB over Reliable Connected mode, its mac address is added
* an "IBD_MAC_ADDR_RC" prefix. But for loopback filter in function
* ibd_process_rx(), the input mac address should not include the
* "IBD_MAC_ADDR_RC" prefix.
*
* So, we introduce the rc_macaddr_loopback for the loopback filter in
* IPoIB over Reliable Connected mode.
*
* rc_macaddr_loopback = id_macaddr excludes "IBD_MAC_ADDR_RC" prefix.
*/
ipoib_mac_t rc_macaddr_loopback;
ibt_srv_hdl_t rc_listen_hdl;
ibt_sbind_hdl_t rc_listen_bind;
ibt_srv_hdl_t rc_listen_hdl_OFED_interop;
ibt_sbind_hdl_t rc_listen_bind_OFED_interop;
ibd_rc_chan_list_t rc_pass_chan_list;
/* obsolete active channel list */
ibd_rc_chan_list_t rc_obs_act_chan_list;
kmutex_t rc_ace_recycle_lock;
ibd_ace_t *rc_ace_recycle;
/* Send */
/*
* This mutex protects rc_tx_largebuf_free_head, rc_tx_largebuf_nfree
* and ibd_rc_tx_largebuf_t->lb_next
*/
kmutex_t rc_tx_large_bufs_lock;
ibd_rc_tx_largebuf_t *rc_tx_largebuf_free_head;
uint_t rc_tx_largebuf_nfree;
/* The chunk of whole Tx large buffers */
uint8_t *rc_tx_mr_bufs;
ibt_mr_hdl_t rc_tx_mr_hdl;
ibt_mr_desc_t rc_tx_mr_desc;
ibd_rc_tx_largebuf_t *rc_tx_largebuf_desc_base; /* base addr */
boolean_t rc_enable_iov_map;
uint_t rc_max_sqseg_hiwm;
/* For SRQ */
uint32_t rc_srq_size;
ibt_srq_hdl_t rc_srq_hdl;
ibd_list_t rc_srq_rwqe_list;
ibd_list_t rc_srq_free_list;
ibd_rwqe_t *rc_srq_rwqes;
uint8_t *rc_srq_rx_bufs;
ibt_mr_hdl_t rc_srq_rx_mr_hdl;
ibt_mr_desc_t rc_srq_rx_mr_desc;
/* For chained receive */
kmutex_t rc_rx_lock;
mblk_t *rc_rx_mp;
mblk_t *rc_rx_mp_tail;
uint32_t rc_rx_mp_len;
/* Counters for RC mode */
/* RX */
/*
* # of Received packets. These packets are directly transferred to GLD
* without copy it
*/
uint64_t rc_rcv_trans_byte;
uint64_t rc_rcv_trans_pkt;
/*
* # of Received packets. We will allocate new buffers for these packet,
* copy their content into new buffers, then transfer to GLD
*/
uint64_t rc_rcv_copy_byte;
uint64_t rc_rcv_copy_pkt;
uint64_t rc_rcv_alloc_fail;
#ifdef DEBUG
uint64_t rc_rwqe_short; /* short rwqe */
#endif
/* # of invoke Receive CQ handler */
uint64_t rc_rcq_invoke;
/* wc->wc_status != IBT_WC_SUCCESS */
uint64_t rc_rcq_err;
/* Tx */
uint64_t rc_xmt_bytes;
/* pkt size <= ibd_rc_tx_copy_thresh */
uint64_t rc_xmt_small_pkt;
uint64_t rc_xmt_fragmented_pkt;
/* fail in ibt_map_mem_iov() */
uint64_t rc_xmt_map_fail_pkt;
/* succ in ibt_map_mem_iov() */
uint64_t rc_xmt_map_succ_pkt;
uint64_t rc_ace_not_found;
uint64_t rc_xmt_drop_too_long_pkt;
uint64_t rc_xmt_icmp_too_long_pkt;
uint64_t rc_xmt_reenter_too_long_pkt;
/* short swqe in ibd_send() */
uint64_t rc_swqe_short;
/* call mac_tx_update when there is enough swqe */
uint64_t rc_swqe_mac_update;
/* short tx large copy buf in ibd_send() */
uint64_t rc_xmt_buf_short;
/* call mac_tx_update when there is enough Tx copy buf */
uint64_t rc_xmt_buf_mac_update;
/* No swqe even after call swqe recycle function */
uint64_t rc_scq_no_swqe;
/* No large Tx buf even after call swqe recycle function */
uint64_t rc_scq_no_largebuf;
/* # of invoke Send CQ handler */
uint64_t rc_scq_invoke;
/* Connection setup and close */
uint64_t rc_conn_succ; /* time of succ connect */
uint64_t rc_conn_fail; /* time of fail connect */
/* ace->ac_chan == NULL for unicast packet */
uint64_t rc_null_conn;
/* not in active established state */
uint64_t rc_no_estab_conn;
uint64_t rc_act_close; /* call ibd_rc_act_close() */
uint64_t rc_pas_close; /* call ibd_rc_pas_close() */
uint64_t rc_delay_ace_recycle;
uint64_t rc_act_close_simultaneous;
/* the counter of reset RC channel */
uint64_t rc_reset_cnt;
#ifdef DEBUG
kstat_t *rc_ksp;
#endif
ib_guid_t id_hca_guid;
ib_guid_t id_port_guid;
datalink_id_t id_dlinkid;
datalink_id_t id_plinkid;
int id_port_inst;
struct ibd_state_s *id_next;
boolean_t id_force_create;
boolean_t id_bgroup_present;
uint_t id_hca_max_chan_sz;
/*
* UD Mode Tunables
*
* id_ud_tx_copy_thresh
* This sets the threshold at which ibd will attempt to do a bcopy
* of the outgoing data into a pre-mapped buffer. IPoIB driver's
* send behavior is restricted by various parameters, so setting of
* this value must be made after careful considerations only. For
* instance, IB HCAs currently impose a relatively small limit
* (when compared to ethernet NICs) on the length of the SGL for
* transmit. On the other hand, the ip stack could send down mp
* chains that are quite long when LSO is enabled.
*
* id_num_lso_bufs
* Number of "larger-than-MTU" copy buffers to use for cases when the
* outgoing mblk chain is too fragmented to be used with
* ibt_map_mem_iov() and too large to be used with regular MTU-sized
* copy buffers. It is not recommended to tune this variable without
* understanding the application environment and/or memory resources.
* The size of each of these lso buffers is determined by the value of
* IBD_LSO_BUFSZ.
*
* id_num_ah
* Number of AH cache entries to allocate
*
* id_hash_size
* Hash table size for the active AH list
*
*/
uint_t id_ud_tx_copy_thresh;
uint_t id_num_lso_bufs;
uint_t id_num_ah;
uint_t id_hash_size;
boolean_t id_create_broadcast_group;
boolean_t id_allow_coalesce_comp_tuning;
uint_t id_ud_rx_comp_count;
uint_t id_ud_rx_comp_usec;
uint_t id_ud_tx_comp_count;
uint_t id_ud_tx_comp_usec;
/* RC Mode Tunables */
uint_t id_rc_rx_comp_count;
uint_t id_rc_rx_comp_usec;
uint_t id_rc_tx_comp_count;
uint_t id_rc_tx_comp_usec;
/*
* id_rc_tx_copy_thresh
* This sets the threshold at which ibd will attempt to do a bcopy
* of the outgoing data into a pre-mapped buffer.
*
* id_rc_rx_copy_thresh
* If (the size of incoming buffer <= id_rc_rx_copy_thresh), ibd
* will attempt to allocate a buffer and do a bcopy of the incoming
* data into the allocated buffer.
*
* id_rc_rx_rwqe_thresh
* If (the number of available rwqe < ibd_rc_rx_rwqe_thresh), ibd
* will attempt to allocate a buffer and do a bcopy of the incoming
* data into the allocated buffer.
*
* id_rc_num_swqe
* 1) Send CQ size = ibd_rc_num_swqe
* 2) The send queue size = ibd_rc_num_swqe -1
* 3) Number of pre-allocated Tx buffers for ibt_post_send() =
* ibd_rc_num_swqe - 1.
*
* id_rc_num_rwqe
* 1) For non-SRQ, we pre-post ibd_rc_num_rwqe number of WRs
* via ibt_post_receive() for receive queue of each RC channel.
* 2) For SRQ and non-SRQ, receive CQ size = ibd_rc_num_rwqe
*
* For SRQ
* If using SRQ, we allocate ibd_rc_num_srq number of buffers (the
* size of each buffer is equal to RC mtu). And post them by
* ibt_post_srq().
*
* id_rc_num_srq
* ibd_rc_num_srq should not be larger than ibd_rc_num_rwqe,
* otherwise it will cause a bug with the following warnings:
* NOTICE: hermon0: Device Error: EQE cq overrun or protection error
* NOTICE: hermon0: Device Error: EQE local work queue catastrophic
* error
* NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff
* catastrophic channel error
* NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff
* completion queue error
*/
uint_t id_rc_tx_copy_thresh;
uint_t id_rc_rx_copy_thresh;
uint_t id_rc_rx_rwqe_thresh;
uint_t id_rc_num_swqe;
uint_t id_rc_num_rwqe;
uint_t id_rc_num_srq;
} ibd_state_t;
/*
* Structures to track global IBTF data, data that is shared
* among the IBD device instances. This includes the one ibt_hdl
* and the list of service registrations.
*/
typedef struct ibd_service_s {
struct ibd_service_s *is_link;
ibt_srv_hdl_t is_srv_hdl;
ib_svc_id_t is_sid;
uint_t is_ref_cnt;
} ibd_service_t;
typedef struct ibd_global_state_s {
kmutex_t ig_mutex;
ibt_clnt_hdl_t ig_ibt_hdl;
uint_t ig_ibt_hdl_ref_cnt;
ibd_service_t *ig_service_list;
} ibd_global_state_t;
typedef struct ibd_rc_msg_hello_s {
uint32_t reserved_qpn;
uint32_t rx_mtu;
} ibd_rc_msg_hello_t;
typedef struct ibd_rc_chan_s {
struct ibd_rc_chan_s *next;
/* channel hdl that we'll be using for Reliable Connected Mode */
ibt_channel_hdl_t chan_hdl;
struct ibd_state_s *state;
ibd_ace_t *ace;
ibd_rc_chan_state_t chan_state;
/* used to judge duplicate connection */
ib_gid_t requester_gid;
ib_pkey_t requester_pkey;
ibd_list_t tx_wqe_list; /* free wqe list */
ibd_list_t tx_rel_list; /* for swqe recycle */
ibd_swqe_t *tx_wqes;
/* start address of Tx Buffers */
uint8_t *tx_mr_bufs;
ibt_mr_hdl_t tx_mr_hdl;
ibt_mr_desc_t tx_mr_desc;
ibt_cq_hdl_t scq_hdl; /* Tx completion queue */
ibt_wc_t tx_wc[IBD_RC_MAX_CQ_WC];
ddi_softintr_t scq_softintr;
uint32_t tx_trans_error_cnt;
/* For chained send */
kmutex_t tx_post_lock;
ibd_swqe_t *tx_head;
ibd_swqe_t *tx_tail;
int tx_busy;
/* For tx buffer recycle */
kmutex_t tx_poll_lock;
int tx_poll_busy;
/* Rx */
ibd_list_t rx_wqe_list; /* used by ibt_post_recv */
ibd_list_t rx_free_list; /* free rwqe list */
ibt_cq_hdl_t rcq_hdl; /* Rx completion queue */
ibt_wc_t rx_wc[IBD_RC_MAX_CQ_WC];
ibd_rwqe_t *rx_rwqes; /* the chuck of whole rwqes */
uint8_t *rx_bufs; /* the chuck of whole Rx bufs */
ibt_mr_hdl_t rx_mr_hdl; /* ibt_mr_hdl_t for rx_bufs */
ibt_mr_desc_t rx_mr_desc; /* ibt_mr_desc_t for rx_bufs */
/* For chained receive */
kmutex_t rx_lock;
mblk_t *rx_mp;
mblk_t *rx_mp_tail;
uint32_t rx_mp_len;
uint32_t rcq_size;
uint32_t scq_size;
/*
* We need two channels for each connection.
* One channel for Tx; another channel for Rx.
* If "is_tx_chan == B_TRUE", this is a Tx channel.
*/
boolean_t is_tx_chan;
} ibd_rc_chan_t;
/*
* The following functions are defined in "ibd.c".
* They are also used by "ibd_cm.c"
*/
void ibd_print_warn(ibd_state_t *, char *, ...);
void ibd_unmap_mem(ibd_state_t *, ibd_swqe_t *);
void ibd_queue_work_slot(ibd_state_t *, ibd_req_t *, int);
boolean_t ibd_acache_recycle(ibd_state_t *, ipoib_mac_t *, boolean_t);
void ibd_dec_ref_ace(ibd_state_t *, ibd_ace_t *);
ibd_ace_t *ibd_acache_find(ibd_state_t *, ipoib_mac_t *, boolean_t, int);
/*
* The following functions are defined in "ibd_cm.c".
* They are also used in "ibd.c".
*/
void ibd_async_rc_process_too_big(ibd_state_t *, ibd_req_t *);
void ibd_async_rc_close_act_chan(ibd_state_t *, ibd_req_t *);
void ibd_async_rc_recycle_ace(ibd_state_t *, ibd_req_t *);
/* Connection Setup/Close Functions */
ibt_status_t ibd_rc_listen(ibd_state_t *);
void ibd_rc_stop_listen(ibd_state_t *);
ibt_status_t ibd_rc_connect(ibd_state_t *, ibd_ace_t *, ibt_path_info_t *,
uint64_t);
void ibd_rc_try_connect(ibd_state_t *, ibd_ace_t *, ibt_path_info_t *);
void ibd_rc_signal_act_close(ibd_state_t *, ibd_ace_t *);
void ibd_rc_signal_ace_recycle(ibd_state_t *, ibd_ace_t *);
void ibd_rc_close_all_chan(ibd_state_t *);
/* Receive Functions */
int ibd_rc_init_srq_list(ibd_state_t *);
void ibd_rc_fini_srq_list(ibd_state_t *);
int ibd_rc_repost_srq_free_list(ibd_state_t *);
/* Send Functions */
int ibd_rc_init_tx_largebuf_list(ibd_state_t *);
void ibd_rc_fini_tx_largebuf_list(ibd_state_t *);
ibd_swqe_t *ibd_rc_acquire_swqes(ibd_rc_chan_t *);
void ibd_rc_post_send(ibd_rc_chan_t *, ibd_swqe_t *);
void ibd_rc_drain_scq(ibd_rc_chan_t *, ibt_cq_hdl_t);
void ibd_rc_tx_cleanup(ibd_swqe_t *);
/* Others */
void ibd_rc_get_conf(ibd_state_t *);
int ibd_rc_init_stats(ibd_state_t *);
#endif /* _KERNEL && !_BOOT */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_IB_CLIENTS_IBD_H */