/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/* Copyright (c) 1990 Mentat Inc. */
/*
* An implementation of the IPoIB-CM standard based on PSARC 2009/593.
*/
#include <sys/mac_provider.h>
extern ibd_global_state_t ibd_gstate;
extern int ibd_rc_conn_timeout;
/*
* If the number of WRs in receive queue of each RC connection less than
* IBD_RC_RX_WR_THRESHOLD, we will post more receive WRs into it.
*/
/*
* If the number of free SWQEs (or large Tx buf) is larger than or equal to
* IBD_RC_TX_FREE_THRESH, we will call mac_tx_update to notify GLD to continue
* transmitting packets.
*/
/* For interop with legacy OFED */
/* Internet Header + 64 bits of Data Datagram. Refer to RFC 792 */
/* Functions for Reliable Connected Mode */
static ibt_cm_status_t ibd_rc_dispatch_pass_mad(void *,
static ibt_cm_status_t ibd_rc_dispatch_actv_mad(void *,
static inline void ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *,
ibd_rc_chan_t *);
static inline ibd_rc_chan_t *ibd_rc_rm_header_chan_list(
ibd_rc_chan_t *);
/* CQ handlers */
static void ibd_rc_rcq_handler(ibt_cq_hdl_t, void *);
static void ibd_rc_scq_handler(ibt_cq_hdl_t, void *);
/* Receive Functions */
static void ibd_rc_srq_freemsg_cb(char *);
static void ibd_rc_freemsg_cb(char *);
static void ibd_rc_fini_rxlist(ibd_rc_chan_t *);
/* Send Functions */
static int ibd_rc_init_txlist(ibd_rc_chan_t *);
static void ibd_rc_fini_txlist(ibd_rc_chan_t *);
void
{
/* Close old RC channel */
} else {
}
}
}
void
{
/* Close old RC channel */
} else {
}
}
/* Simple ICMP IP Header Template */
IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP
};
/* Packet is too big. Send ICMP packet to GLD to request a smaller MTU */
void
{
/*
* ipha: IP header for pmtu_pkt
* old_ipha: IP header for old packet
*/
goto too_big_fail;
}
/* move to IP header. */
len_needed));
+ len_needed);
}
if (msg_len > len_needed) {
}
goto too_big_fail;
}
/* Fill IB header */
/*
* If the GRH is not valid, indicate to GLDv3 by setting
* the VerTcFlow field to 0.
*/
/* Fill IP header */
if (msg_len > IP_MAXPACKET) {
}
ipha->ipha_hdr_checksum = 0;
/* Fill ICMP body */
icmph->icmph_checksum = 0;
HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0);
"ip_dst=0x%x, ttl=%d, len_needed=%d, msg_len=%d",
return;
/* Drop packet */
}
/*
* channel has not been used for a long time, close it.
*/
void
{
/* Check all active channels. If chan->is_used == B_FALSE, close it */
state->rc_timeout_act++;
pre_ace);
chan->chan_state =
} else {
}
}
}
}
/* Check all passive channels. If chan->is_used == B_FALSE, close it */
/* remove it */
state->rc_timeout_pas++;
} else {
"alloc ibd_req_t fail");
chan;
} else {
}
}
} else {
} else {
}
}
}
} else {
}
}
}
#ifdef DEBUG
/*
* ibd_rc_update_stats - update driver private kstat counters
*
* This routine will dump the internal statistics counters for ibd's
* Reliable Connected Mode. The current stats dump values will
* be sent to the kernel status area.
*/
static int
{
if (rw == KSTAT_WRITE)
return (EACCES);
return (0);
}
/*
* ibd_rc_init_stats - initialize kstat data structures
*
* This routine will create and initialize the driver private
* statistics counters.
*/
int
{
int inst;
/*
* Create and init kstat
*/
sizeof (ibd_rc_stat_t) / sizeof (kstat_named_t), 0);
"kernel statistics");
return (DDI_FAILURE);
}
/*
* Initialize all the statistics
*/
"transfer mode", KSTAT_DATA_ULONG);
"transfer mode", KSTAT_DATA_ULONG);
"copy mode", KSTAT_DATA_ULONG);
"copy mode", KSTAT_DATA_ULONG);
"RC: Tx pkt small size", KSTAT_DATA_ULONG);
"RC: Tx pkt fragmentary", KSTAT_DATA_ULONG);
"RC: Tx pkt fail ibt_map_mem_iov()", KSTAT_DATA_ULONG);
"RC: Tx pkt succ ibt_map_mem_iov()", KSTAT_DATA_ULONG);
"recycle", KSTAT_DATA_ULONG);
"after recycle", KSTAT_DATA_ULONG);
"#, swqe available", KSTAT_DATA_ULONG);
"ibd_send", KSTAT_DATA_ULONG);
"mac_tx_update #, buf available", KSTAT_DATA_ULONG);
"pkt", KSTAT_DATA_ULONG);
"state", KSTAT_DATA_ULONG);
"recycle", KSTAT_DATA_ULONG);
"simultaneous ibd_rc_act_close", KSTAT_DATA_ULONG);
/*
* Function to provide kernel stat update on demand
*/
/*
* Pointer into provider's raw statistics
*/
/*
* Add kstat to systems kstat chain
*/
return (DDI_SUCCESS);
}
#endif
static ibt_status_t
{
int rv;
/* Allocate IB structures for a new RC channel. */
if (is_tx_chan) {
} else {
}
if (result != IBT_SUCCESS) {
"create scq completion queue (size <%d>)",
goto alloc_scq_err;
} /* if failure to alloc cq */
"interrupt moderation failed");
}
if (result != IBT_SUCCESS) {
goto alloc_rcq_err;
} /* if failure to alloc cq */
"interrupt moderation failed");
}
if (is_tx_chan) {
"ibd_rc_init_txlist failed");
goto init_txlist_err;
}
if (ibd_rc_tx_softintr == 1) {
DDI_SUCCESS) {
"ddi_add_softintr(scq_softintr), ret=%d",
rv);
goto alloc_softintr_err;
}
}
} else {
}
/*
* enable completions
*/
if (result != IBT_SUCCESS) {
"(scq) failed: status %d\n", result);
goto alloc_scq_enable_err;
}
/* We will enable chan->rcq_hdl later. */
/* alloc a RC channel */
/*
* For the number of SGL elements in receive side, I think it
* should be 1. Because ibd driver allocates a whole block memory
* for each ibt_post_recv().
*/
/* The send queue size and the receive queue size */
if (state->id_hca_res_lkey_capab) {
} else {
}
if (state->rc_enable_srq) {
} else {
}
if (result != IBT_SUCCESS) {
" fail:<%d>", result);
goto alloc_scq_enable_err;
}
if (is_tx_chan)
else
/* For the connection reaper routine ibd_rc_conn_timeout_call() */
return (IBT_SUCCESS);
if (is_tx_chan) {
if (ibd_rc_tx_softintr == 1) {
}
}
if (is_tx_chan) {
}
return (result);
}
static void
{
/* DPRINT(30, "ibd_rc_free_chan: chan=%p", chan); */
if (ret != IBT_SUCCESS) {
return;
}
}
if (ret != IBT_SUCCESS) {
return;
}
}
if (ret != IBT_SUCCESS) {
return;
}
}
/* Free buffers */
if (chan->is_tx_chan) {
if (ibd_rc_tx_softintr == 1) {
}
} else {
}
}
/*
* If it is a passive channel, must make sure it has been removed
* from chan->state->rc_pass_chan_list
*/
}
/* Add a RC channel */
static inline void
{
} else {
}
}
static boolean_t
{
return (B_FALSE);
} else {
} else {
}
return (B_TRUE);
}
}
/* Remove a RC channel */
static inline ibd_rc_chan_t *
{
" in chan_list", chan);
} else {
"(middle): found chan(%p)", chan);
break;
}
}
}
return (chan);
}
static inline ibd_rc_chan_t *
{
}
return (rc_chan);
}
static int
{
/*
* Allocate one big chunk for all regular rx copy bufs
*/
sizeof (ibd_rwqe_t), KM_SLEEP);
/*
* Do one memory registration on the entire rxbuf area
*/
!= IBT_SUCCESS) {
"failed");
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static void
{
/*
* Don't change the value of state->rc_mtu at the period from call
* ibd_rc_alloc_srq_copybufs() to call ibd_rc_free_srq_copybufs().
*/
/*
* Unregister rxbuf mr
*/
" failed");
}
/*
* Free rxbuf memory
*/
}
/*
* Allocate and post a certain number of SRQ receive buffers and WRs.
*/
int
{
int i;
if (ret != IBT_SUCCESS) {
/*
* The following code is for CR 6932460 (can't configure ibd
* interface on 32 bits x86 systems). 32 bits x86 system has
* less memory resource than 64 bits x86 system. If current
* resource request can't be satisfied, we request less
* resource here.
*/
while ((ret == IBT_HCA_WR_EXCEEDED) &&
}
if (ret != IBT_SUCCESS) {
"req_sgl_sz=%d, req_wr_sz=0x%x, final_req_wr_sz="
return (DDI_FAILURE);
}
}
if (ret != IBT_SUCCESS) {
"ibt_free_srq fail, ret=%d", ret);
}
return (DDI_FAILURE);
}
/*
* Allocate and setup the rwqe list
*/
"id_running was not 1\n");
}
return (DDI_FAILURE);
}
/* Leave IPOIB_GRH_SIZE space */
}
return (DDI_SUCCESS);
}
/*
* Free the statically allocated Rx buffer list for SRQ.
*/
void
{
int i;
if (ret != IBT_SUCCESS) {
"ibt_free_srq fail, ret=%d", ret);
}
}
}
}
/* Repost the elements in state->ib_rc_free_list */
int
{
/* repost them */
"failed in desballoc()");
do {
return (DDI_FAILURE);
}
}
}
return (DDI_SUCCESS);
}
return (DDI_SUCCESS);
}
/*
* Free an allocated recv wqe.
*/
static void
{
/*
* desballoc() failed (no memory) or the posting of rwqe failed.
*
* This rwqe is placed on a free list so that it
* can be reinstated in future.
*
* NOTE: no code currently exists to reinstate
* these "lost" rwqes.
*/
}
static void
{
/*
* If the driver is stopped, just free the rwqe.
*/
if (!rwqe->w_freeing_wqe) {
}
return;
}
/*
* Upper layer has released held mblk, so we have
* no more use for keeping the old pointer in
* our rwqe.
*/
return;
}
" failed");
return;
}
}
/*
* Post a rwqe to the hardware and add it to the Rx list.
*/
static int
{
/*
* Here we should add dl_cnt before post recv, because
* we would have to make sure dl_cnt is updated before
* the corresponding ibd_rc_process_rx() is called.
*/
IBT_SUCCESS) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* Post a rwqe to the hardware and add it to the Rx list.
*/
static int
{
/*
* Here we should add dl_cnt before post recv, because we would
* have to make sure dl_cnt has already updated before
* corresponding ibd_rc_process_rx() is called.
*/
IBT_SUCCESS) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
/*
* Allocate one big chunk for all regular rx copy bufs
*/
sizeof (ibd_rwqe_t), KM_SLEEP);
/*
* Do one memory registration on the entire rxbuf area
*/
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static void
{
/*
* Don't change the value of state->rc_mtu at the period from call
* ibd_rc_alloc_rx_copybufs() to call ibd_rc_free_rx_copybufs().
*/
/*
* Unregister rxbuf mr
*/
}
/*
* Free rxbuf memory
*/
}
/*
* Post a certain number of receive buffers and WRs on a RC channel.
*/
static int
{
int i;
return (DDI_FAILURE);
/*
* Allocate and setup the rwqe list
*/
return (DDI_FAILURE);
}
}
return (DDI_SUCCESS);
}
/*
* Free the statically allocated Rx buffer list for SRQ.
*/
static void
{
int i;
return;
}
/* bufs_outstanding must be 0 */
}
}
}
/*
* Free an allocated recv wqe.
*/
static void
{
/*
* desballoc() failed (no memory) or the posting of rwqe failed.
*
* This rwqe is placed on a free list so that it
* can be reinstated in future.
*
* NOTE: no code currently exists to reinstate
* these "lost" rwqes.
*/
}
/*
* Processing to be done after receipt of a packet; hand off to GLD
* in the format expected by GLD.
*/
static void
{
int rxcnt;
int len;
/*
* Track number handed to upper layer, and number still
* available to receive packets.
*/
if (state->rc_enable_srq) {
} else {
}
/*
* It can not be a IBA multicast packet.
*/
/* For the connection reaper routine ibd_rc_conn_timeout_call() */
#ifdef DEBUG
state->rc_rwqe_short++;
}
#endif
/*
* Possibly replenish the Rx pool if needed.
*/
/*
* Record how many rwqe has been occupied by upper
* network layer
*/
if (state->rc_enable_srq) {
} else {
}
} else {
if (state->rc_enable_srq) {
DDI_FAILURE) {
}
} else {
DDI_FAILURE) {
}
}
return;
}
if (state->rc_enable_srq) {
}
} else {
}
}
}
/* LINTED: E_CONSTANT_CONDITION */
}
}
sizeof (ipoib_mac_t));
/*
* Can RC mode in IB guarantee its checksum correctness?
*
* (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0,
* HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0);
*/
/*
* Make sure this is NULL or we're in trouble.
*/
"ibd_rc_process_rx: got duplicate mp from rcq?");
}
/*
* Add this mp to the list of processed mp's to send to
* the nw layer
*/
if (state->rc_enable_srq) {
} else {
}
state->rc_rx_mp_len++;
state->rc_rx_mp_len = 0;
} else {
}
} else {
} else {
}
} else {
}
}
}
/*
* Callback code invoked from STREAMs when the recv data buffer is free
* for recycling.
*/
static void
{
/*
* If the wqe is being destructed, do not attempt recycling.
*/
return;
}
return;
}
/*
* Post back to h/w. We could actually have more than
* id_num_rwqe WQEs on the list if there were multiple
* ibd_freemsg_cb() calls outstanding (since the lock is
* not held the entire time). This will start getting
* corrected over subsequent ibd_freemsg_cb() calls.
*/
return;
}
}
/*
* Common code for interrupt handling as well as for polling
* for all completed wqe's while detaching.
*/
static void
{
int i;
/*
* Channel being torn down.
*/
/*
* Do not invoke Rx handler because
* it might add buffers to the Rx pool
* when we are trying to deinitialize.
*/
continue;
}
}
}
}
}
/* Receive CQ handler */
/* ARGSUSED */
static void
{
/*
* Poll for completed entries; the CQ will not interrupt any
* more for incoming (or transmitted) packets.
*/
/*
* Now enable CQ notifications; all packets that arrive now
* (or complete transmission) will cause new interrupts.
*/
IBT_SUCCESS) {
/*
* We do not expect a failure here.
*/
}
/*
* Repoll to catch all packets that might have arrived after
* we finished the first poll loop and before interrupts got
* armed.
*/
if (state->rc_enable_srq) {
state->rc_rx_mp_len = 0;
} else {
}
} else {
} else {
}
}
}
/*
* Allocate the statically allocated Tx buffer list.
*/
int
{
int i;
/*
* Allocate one big chunk for all Tx large copy bufs
*/
/* Don't transfer IPOIB_GRH_SIZE bytes (40 bytes) */
"failed");
return (DDI_FAILURE);
}
sizeof (ibd_rc_tx_largebuf_t), KM_SLEEP);
/*
* Set up the buf chain
*/
for (i = 0; i < num_swqe; i++) {
lbufp++;
}
/*
* Set up the buffer information in ibd state
*/
return (DDI_SUCCESS);
}
void
{
"failed");
}
num_swqe * sizeof (ibd_rc_tx_largebuf_t));
}
static int
{
/*
* Allocate one big chunk for all regular tx copy bufs
*/
/*
* Do one memory registration on the entire txbuf area
*/
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* Allocate the statically allocated Tx buffer list.
*/
static int
{
int i;
return (DDI_FAILURE);
/*
* Allocate and setup the swqe list
*/
sizeof (ibd_swqe_t), KM_SLEEP);
/* Add to list */
}
return (DDI_SUCCESS);
}
/*
* Free the statically allocated Tx buffer list.
*/
static void
{
"failed");
}
}
}
sizeof (ibd_swqe_t));
}
}
/*
* Acquire send wqe from free list.
* Returns error number and send wqe pointer.
*/
{
/* transfer id_tx_rel_list to id_tx_list */
/* clear id_tx_rel_list */
} else { /* no free swqe */
}
return (wqe);
}
/*
* Release send wqe back into free list.
*/
static void
{
/*
* Add back on Tx list for reuse.
*/
}
void
{
uint_t i;
/* post the one request, then check for more */
if (ibt_status != IBT_SUCCESS) {
"posting one wr failed: ret=%d", ibt_status);
}
for (;;) {
return;
}
}
/*
* Collect pending requests, IBD_MAX_TX_POST_MULTIPLE wrs
* at a time if possible, and keep posting them.
*/
}
/*
* If posting fails for some reason, we'll never receive
* completion intimation, so we'll need to cleanup. But
* we need to make sure we don't clean up nodes whose
* wrs have been successfully posted. We assume that the
* hca driver returns on the first failure to post and
* therefore the first 'num_posted' entries don't need
* cleanup here.
*/
num_posted = 0;
if (ibt_status != IBT_SUCCESS) {
"posting multiple wrs failed: "
"requested=%d, done=%d, ret=%d",
for (i = num_posted; i < n_wrs; i++)
ibd_rc_tx_cleanup(nodes[i]);
}
}
}
/*
* Common code that deals with clean ups after a successful or
* erroneous transmission attempt.
*/
void
{
/*
* If this was a dynamic registration in ibd_send(),
* deregister now.
*/
}
} else {
}
}
/*
* Release the send wqe for reuse.
*/
/*
* Drop the reference count on the AH; it can be reused
* now for a different destination if there are no more
* posted sends that will use it. This can be eliminated
* if we can always associate each Tx buffer with an AH.
* The ace can be null if we are cleaning up from the
* ibd_send() error path.
*/
}
void
{
int i;
if (encount_error == B_FALSE) {
/*
* This RC channle is in error status,
* remove it.
*/
if ((chan->chan_state ==
LINK_STATE_UP) &&
chan->chan_state =
state->rc_reset_cnt++;
"wc_status(%d) != SUCC, "
"chan=%p, ace=%p, "
"link_state=%d"
"reset RC channel",
} else {
state->
"wc_status(%d) != SUCC, "
"chan=%p, chan_state=%d,"
"ace=%p, link_state=%d."
"other thread is closing "
}
}
}
}
if (state->id_sched_needed == 0) {
} else {
state->rc_scq_no_swqe++;
}
if (state->rc_tx_largebuf_nfree >
state->id_sched_needed &=
} else {
}
state->id_sched_cnt++;
} else {
}
} else {
}
}
}
/* Send CQ handler, call ibd_rx_tx_cleanup to recycle Tx buffers */
/* ARGSUSED */
static void
{
if (ibd_rc_tx_softintr == 1) {
return;
} else {
}
} else
(void) ibd_rc_tx_recycle(arg);
}
static uint_t
{
"threads");
return (DDI_INTR_CLAIMED);
}
/*
* Poll for completed entries; the CQ will not interrupt any
* more for completed packets.
*/
/*
* Now enable CQ notifications; all completions originating now
* will cause new interrupts.
*/
do {
IBT_SUCCESS) {
/*
* We do not expect a failure here.
*/
" failed");
}
else {
redo = 0;
}
} while (redo);
return (DDI_INTR_CLAIMED);
}
static ibt_status_t
{
ibd_service_t *p;
p->is_ref_cnt++;
*srv_hdl = p->is_srv_hdl;
return (IBT_SUCCESS);
}
}
if (status == IBT_SUCCESS) {
p = kmem_alloc(sizeof (*p), KM_SLEEP);
p->is_srv_hdl = *srv_hdl;
p->is_ref_cnt = 1;
}
return (status);
}
static ibt_status_t
{
p = *pp;
if (--p->is_ref_cnt == 0) {
kmem_free(p, sizeof (*p));
} else {
}
return (status);
}
}
/* Should not ever get here */
return (IBT_FAILURE);
}
/* Listen with corresponding service ID */
{
return (IBT_FAILURE);
}
/*
* Register the service with service id
* Incoming connection requests should arrive on this service id.
*/
if (status != IBT_SUCCESS) {
"ret=%d", status);
return (status);
}
/* pass state as cm_private */
if (status != IBT_SUCCESS) {
" fail to bind port: <%d>", status);
return (status);
}
/*
* Legacy OFED had used a wrong service ID (one additional zero digit)
* for many years. To interop with legacy OFED, we support this wrong
* service ID here.
*/
/*
* Register the service with service id
* Incoming connection requests should arrive on this service id.
*/
if (status != IBT_SUCCESS) {
DPRINT(40,
"ibd_rc_listen: Service Registration for Legacy OFED "
"Failed %d", status);
return (status);
}
/* pass state as cm_private */
if (status != IBT_SUCCESS) {
"Legacy OFED listener", status);
(void) ibd_deregister_service(
return (status);
}
return (IBT_SUCCESS);
}
void
{
int ret;
/* Disable incoming connection requests */
if (ret != 0) {
"ibt_unbind_all_services() failed, ret=%d", ret);
}
if (ret != 0) {
"ibd_deregister_service() failed, ret=%d", ret);
} else {
}
}
/* Disable incoming connection requests */
if (ret != 0) {
"ibt_unbind_all_services() failed: %d", ret);
}
if (ret != 0) {
"ibd_deregister_service() failed: %d", ret);
} else {
}
}
}
void
{
/* Disable all Rx routines */
}
if (state->rc_enable_srq) {
attempts = 10;
if (--attempts == 0) {
/*
* There are pending bufs with the network
* layer and we have no choice but to wait
* for them to be done with. Reap all the
* we turned off the notification and
* return failure.
*/
break;
}
}
}
/* Close all passive RC channels */
}
/* Close all active RC channels */
}
}
} else {
}
}
}
attempts = 400;
while (((state->rc_num_tx_chan != 0) ||
/* Other thread is closing CM channel, wait it */
attempts--;
}
}
void
{
return;
if (status != IBT_SUCCESS) {
/* wait peer side remove stale channel */
return;
}
if (status != IBT_SUCCESS) {
/* wait peer side remove stale channel */
return;
}
}
/*
* Allocates channel and sets the ace->ac_chan to it.
* Opens the channel.
*/
{
return (status);
}
/*
* open the channels
*/
/*
* update path record with the SID
*/
!= IBT_SUCCESS) {
"ret=%d", status);
return (status);
}
/* pre-allocate memory for hello ack message */
/* We don't do RDMA */
open_args.oc_rdma_ra_out = 0;
open_args.oc_rdma_ra_in = 0;
if (status == IBT_SUCCESS) {
/* Success! */
state->rc_conn_succ++;
return (IBT_SUCCESS);
}
/* failure */
/* check open_returns report error and exit */
"ret status = %d, reason=%d, ace=%p, mtu=0x%x, qpn=0x%x,"
return (status);
}
void
{
"ibd_req_t fail");
} else {
}
}
void
{
return;
}
return;
}
}
/*
* Close an active channel
*
* is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel()
*/
static void
{
switch (chan->chan_state) {
case IBD_RC_STATE_ACT_CLOSING: /* stale, close it */
case IBD_RC_STATE_ACT_ESTAB:
/*
* Wait send queue empty. Its old value is 50 (5 seconds). But
* in my experiment, 5 seconds is not enough time to let IBTL
* return all buffers and ace->ac_ref. I tried 25 seconds, it
* works well. As another evidence, I saw IBTL takes about 17
* seconds every time it cleans a stale RC channel.
*/
times = 250;
times--;
if (times == 0) {
"list=%d, tx_rel_list=%d) != chan->"
"scq_size=%d, OR ac_ref(=%d) not clean",
break;
}
"polling threads");
} else {
chan->tx_poll_busy = 0;
}
}
if (times != 0) {
}
if (is_close_rc_chan) {
0);
if (ret != IBT_SUCCESS) {
"channel fail, chan=%p, ret=%d",
} else {
"channel succ, chan=%p", chan);
}
}
break;
break;
case IBD_RC_STATE_ACT_ERROR:
break;
default:
}
}
/*
* Close a passive channel
*
* is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel()
*
* is_timeout_close: if B_TRUE, this function is called by the connection
* reaper (refer to function ibd_rc_conn_timeout_call). When the connection
* reaper calls ibd_rc_pas_close(), and if it finds that dl_bufs_outstanding
* or chan->rcq_invoking is non-zero, then it can simply put that channel back
* on the passive channels list and move on, since it might be an indication
* that the channel became active again by the time we started it's cleanup.
* It is costlier to do the cleanup and then reinitiate the channel
* establishment and hence it will help to be conservative when we do the
* cleanup.
*/
int
{
switch (chan->chan_state) {
case IBD_RC_STATE_PAS_ESTAB:
if (is_timeout_close) {
if ((chan->rcq_invoking != 0) ||
if (ibd_rc_re_add_to_pas_chan_list(chan)) {
return (DDI_FAILURE);
}
}
}
/*
* First, stop receive interrupts; this stops the
* connection from handing up buffers to higher layers.
* Wait for receive buffers to be returned; give up
* after 5 seconds.
*/
/* Wait 0.01 second to let ibt_set_cq_handler() take effect */
times = 50;
if (--times == 0) {
"reclaiming failed");
return (DDI_FAILURE);
}
}
}
times = 50;
while (chan->rcq_invoking != 0) {
if (--times == 0) {
"rcq handler is being invoked");
break;
}
}
if (is_close_rc_chan) {
0);
if (ret != IBT_SUCCESS) {
"channel() fail, chan=%p, ret=%d", chan,
ret);
} else {
"channel() succ, chan=%p", chan);
}
}
break;
break;
default:
}
return (DDI_SUCCESS);
}
/*
* Passive Side:
* Handle an incoming CM REQ from active side.
*
* If success, this function allocates an ibd_rc_chan_t, then
* assigns it to "*ret_conn".
*/
static ibt_cm_status_t
void *ret_priv_data)
{
return (IBT_CM_REJECT);
}
if (!state->rc_enable_srq) {
"failed");
return (IBT_CM_REJECT);
}
}
/* We don't do RDMA */
return (IBT_CM_ACCEPT);
}
/*
* ibd_rc_handle_act_estab -- handler for connection established completion
* for active side.
*/
static ibt_cm_status_t
{
if (result != IBT_SUCCESS) {
"ibt_enable_cq_notify(rcq) "
"failed: status %d", result);
return (IBT_CM_REJECT);
}
break;
default:
return (IBT_CM_REJECT);
}
return (IBT_CM_ACCEPT);
}
/*
* ibd_rc_handle_pas_estab -- handler for connection established completion
* for passive side.
*/
static ibt_cm_status_t
{
switch (chan->chan_state) {
if (result != IBT_SUCCESS) {
"ibt_enable_cq_notify(rcq) "
"failed: status %d", result);
return (IBT_CM_REJECT);
}
break;
default:
return (IBT_CM_REJECT);
}
return (IBT_CM_ACCEPT);
}
/* ARGSUSED */
static ibt_cm_status_t
{
switch (ibt_cm_event->cm_type) {
case IBT_CM_EVENT_REP_RCV:
break;
case IBT_CM_EVENT_CONN_EST:
"ace=%p, act_state=%d, chan=%p",
break;
case IBT_CM_EVENT_CONN_CLOSED:
"rc_chan==NULL, IBT_CM_EVENT_CONN_CLOSED");
return (IBT_CM_ACCEPT);
}
"IBT_CM_EVENT_CONN_CLOSED, ace=%p, chan=%p, "
} else {
"is closing it, IBT_CM_EVENT_CONN_CLOSED, "
return (IBT_CM_ACCEPT);
}
} else {
}
break;
case IBT_CM_EVENT_FAILURE:
"ace=%p, chan=%p, code: %d, msg: %d, reason=%d",
/*
* Don't need free resource here. The resource is freed
* at function ibd_rc_connect()
*/
break;
case IBT_CM_EVENT_MRA_RCV:
break;
case IBT_CM_EVENT_LAP_RCV:
break;
case IBT_CM_EVENT_APR_RCV:
break;
default:
break;
}
return (result);
}
/* ARGSUSED */
static ibt_cm_status_t
{
/* Receive an incoming CM REQ from active side */
return (result);
}
if (ibt_cm_event->cm_channel == 0) {
"ERROR ibt_cm_event->cm_channel == 0");
return (IBT_CM_REJECT);
}
chan =
return (IBT_CM_REJECT);
}
switch (ibt_cm_event->cm_type) {
case IBT_CM_EVENT_CONN_EST:
"chan=%p", chan);
break;
case IBT_CM_EVENT_CONN_CLOSED:
chan);
break;
case IBT_CM_EVENT_FAILURE:
" chan=%p, code: %d, msg: %d, reason=%d", chan,
chan);
return (IBT_CM_ACCEPT);
case IBT_CM_EVENT_MRA_RCV:
break;
case IBT_CM_EVENT_LAP_RCV:
break;
case IBT_CM_EVENT_APR_RCV:
break;
default:
break;
}
return (result);
}