ibcm_sm.c revision d22e11eb92a44ef85ea64989dbff7134a35829cc
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/ib/mgt/ibcm/ibcm_impl.h>
#include <sys/callb.h>
/*
* ibcm_sm.c
* These routines implement the CM state machine (both ACTIVE and PASSIVE)
*
* Points to Note :
*
* o CM uses one ibcm_hca_info_t entry per HCA to store all the
* connection state data belonging to that HCA in the AVL trees, etc.,
*
* o There is one state structure per RC, referenced from three AVL trees
* ie. the HCA active AVL tree, and the HCA passive AVL tree and HCA
* passive comid tree
*
* o SIDR state structures are stored in a linked list
*
* o The term statep generally refers to RC, until explicitly mentioned
* in the notes below
*
* o Any thread that may access statep increments the ref_cnt. This ensures
* that statep is not deleted when it is still being accessed and modified
* by other threads
*
* o Any thread that may want to search the AVL tree(s) holds the hca state
* table reader lock. If it shall insert/delete a new state structure, then
* the lock held is writer lock.
*
* o Incrementing and Decrementing the ref_cnt can happen only after acquiring
* statep mutex
*
* o Deleting a statep can happen only by acquiring the hca state writer lock
* and statep mutex and if ref_cnt is zero.
*
* o Statep mutexes are used to decrease the hca state table lock holding
* times. thus increasing more number of threads that can access hca
* global data structures
*
* o Statep mutexes cannot be hold for long time. They are primarily used to
* check the state of statep, change it and exit the lock. Other threads
* checking this statep find statep's new state, and may exit without
* further processing (as the statep->state has changed).
*
* o Statep mutex must be held while setting and unsetting the timer id
* values and during untimeout
*
* Re-stating, the overall purpose of these various locks are:
* - Minimize the time state table locks are held
* - Writer locks are held only while inserting/deleting into trees,
* so multiple readers can traverse data structures in parallel
* - Minimize the time statep mutex held, so other threads entering the same
* statep mutex are not held for long
*
* The CM state machine logic ensures that the statep is valid and exists
* when timeout callback (ibcm_timeout_cb) is called. This is ensured by
* cancelling timeouts on state changes, where appropriate
*
*
* The timeout processing is handled in the context in which the
* timeout callback is invoked.
*
* The CM STATE MACHINE logic flow:
*
* On an incoming MAD:-
*
* IBMF -> ibcm_process_incoming_mad
* Verify and branch to one of the below connection state routines.
* The callback arg from ibmf has the pointer to ibcm_hca_info_t
*
* 1. INCOMING REQ MAD
*
* Acquire hca state table WRITER lock
* Do lookup in passive AVL tree by remote qpn and remote hca guid
*
* If (new lookup)
*
* create new statep, initialize key fields
* obtain new local com id, insert into hca state AVL tree
* release hca state table WRITER lock
*
* Initialize remaining fields
* If invalid service id,
* send a REJ reply,
* decr ref_cnt holding state mutex
* If existing peer conn, check guids, and break the tie
* Call the cep state transition function
* Send an RTU/REJ reply
* Check and handle for any incoming REJ's during REQ RCVD state
*
* else if (existing lookup)
*
* increment refcnt holding state mutex
* release hca state table WRITER lock
*
* re-acquire the statep mutex
* if (statep->state is REP SENT/REJ SENT/ MRA SENT)
* resend the mad
* else if established
* handle the stale detection
* else
* drop the mad (no processing required)
* decr statep->ref_cnt, release state mutex
*
*
* 2. INCOMING REP MAD
*
* Acquire hca state READER lock
* Do lookup in hca state tree by local com id
* Release hca state table READER lock
*
* if lookup does not exist
* return
*
* if look up exists
* incr statep->ref_cnt holding state mutex
*
* acquire the statep lock
* if (state == ESTABLISHED or REJ SENt or MRA REP SENT)
* resend the MAD
* release state mutex, cancel req sent timer
* decrement ref_cnt holding the statep lock
* return
*
* if (state == REQ_SENT or REP_WAIT)
* first, change state to REP_RCVD
* release statep lock
* cancel timers
* lookup in the passive tree by remote qpn and remote hca guid
* if entry already exists
* handle the stale detection
* else
* add to the passive tree
*
* Initialize fields of statep
* Call the qp state transition function
* Post RTU/REJ reply
* Acquire the state mutex
* decrement the ref cnt
* release the statep lock
*
* 3. INCOMING MRA
*
* Acquire hca state table READER lock
* Do lookup in active hca state tree by local com id
* Release hca state table READER lock
*
* If lookup does not exist
* return
*
* if look up exists
* incr statep->ref_cnt holding state mutex
*
* acquire state mutex
* if (state is REQ_SENT or REP_SENT)
* change state to REP WAIT or MRA REP RCVD
* release state mutex
* cancel the current timer
*
* reacquire state mutex
* if (state is REP_WAIT or MRA_REP_RCVD)
* set new timer, using service timeout for the first timeout
* decr ref cnt, release state mutex
*
* 4. INCOMING RTU
*
* Acquire hca state table READER lock
* Do lookup in active hca state tree by local com id
* Release hca state table READER lock
*
* If lookup does not exist
* return
*
* if look up exists
* incr statep->ref_cnt holding state mutex
*
* acquire statep mutex
* if (state == REP_SENT or MRA REP RCVD))
* change state to ESTABLISHED
* release statep mutex
* cancel timer
*
* Change QP state
*
* acquire the statep mutex
* decrement the ref count
* release statep mutex
*
* 5. INCOMING REJ
*
* Acquire hca state table READER lock
* Do lookup in active hca state tree by local com id
* Release hca state table READER lock
*
* If lookup does not exist
* return
*
* if look up exists
* incr statep->ref_cnt holding state mutex
*
* if (state == REQ RCVD or REP RCVD MRA_SENT or MRA_REP_SNET)
* set statep->delete = true
* decrement the ref_cnt
* release statep mutex;
*
* else if (state == REQ_SENT or REP SENT or MRA REP Rcvd)
* state = IBCM_STATE_DELETE
* Cancel running timers
* decrement the ref_cnt
* release state mutex
* Call the client QP handler
* delete the state data
*
* 6. INCOMING DREQ
*
* Acquire hca state table READER lock
* Do lookup in active hca state tree by local com id
* Release hca state table READER lock
*
* If lookup does not exist
* return
*
* if look up exists
* incr statep->ref_cnt holding state mutex
*
* acquire state mutex
* if (state is ESTABLISHED/DREQ SENT/TIMEWAIT)
* if state is ESTABLISHED/DREQ SENT,
* change state to DREQ RECVD
* start timers
*
* send DREP reply
* decr ref_cnt
* release state mutex
*
* 7. Incoming DREP
*
* Acquire hca state table READER lock
* Do lookup in active hca state tree by local com id
* Release hca state table READER lock
*
* If lookup does not exist
* return
*
* if look up exists
* incr statep->ref_cnt holding state mutex
*
* acquire state mutex
* if state is DREQ_SENT
* change state to DREP_RCVD
* cancel timer
* change state to TIMEWAIT
* set timewait timer
* decr ref_cnt
* release state mutex
*
* 8. Timeout handler
*
* (for states REQ SENT/REP SENT/REJ SENT/DREQ SENT/DREP SENT/TIMEWAIT)
*
* acquire the statep mutex
*
* if (set state != stored_state)
* The thread that changed the state is responsible for any cleanup
* decrement ref cnt
* release statep mutex
* return
* else if (statep's state == REJ SENT)
* change state to DELETE
* decrement ref cnt
* release statep mutex
* delete statep
* return
* else if (state == TIME WAIT)
* do the time wait state processing
* decrement ref cnt
* change state to DELETE
* release statep mutex
* delete statep, and also QP
* else if (remaining retry cnt > 0)
* resend the mad
* decrement ref cnt
* release statep mutex
* else if (state == rep sent or req sent or mra rep rcvd or rep wait)
* (retry counter expired)
* change state to REJ SENT (No one shall delete in REJ SENT)
* decrement the ref_cnt
* release the statep mutex
* Post REJ MAD
* cv_signal anyone blocking
* Invoke client handler
* else if state == DREQ_SENT
* change state to TIME WAIT
* decrement the ref cnt
* set a timer for time wait time
* release the statep mutex
*
*
* SIDR processing
*
* 9. INCOMING SIDR_REQ MAD
*
* Figure out LID/GID
* Do lookup in SIDR LIST based on LID, GID, grh_exists and req_id
* increment ud_statep->ud_ref_cnt
*
* If (new lookup)
*
* validate service id, and the create new statep,
* initialize key fields
* do a lookup based on service id
* if service_id_lookup returns exists
* set sidr_status to QPN_VALID
* else
* set sidr_status to SID_INVALID
* post SIDR_REP mad
* decr ud_statep->ud_ref_cnt, release ud_state_mutex
*
* else if (existing lookup)
*
* if (ud_statep->ud_state is SIDR_REP_SENT)
* resend the mad
*
* decr ud_statep->ud_ref_cnt, release ud_state_mutex
*
*
* 10. INCOMING SIDR_REP MAD
*
* Figure out LID/GID
* Do lookup in SIDR LIST based on LID, GID, grh_exists and req_id
* increment ud_statep->ud_ref_cnt
*
* if look up doesn't exists
* return
*
* if (state == SIDR_REQ_SENT)
* first, change state to SIDR_REP_RCVD
* release statep lock
* cancel timers
* cv_signal anyone blocking
* release the statep lock
* extract return args
* destroy the statep
*
* 11. Timeout handler
*
* (for states SIDR_REQ_SENT/SIDR_REP_SENT)
*
* acquire the statep mutex
*
* if (statep's state == SIDR_REP_SENT SENT)
* change state to DELETE
* decrement ref cnt
* release statep mutex
* delete statep
* return
* else if (remaining retry cnt > 0 and state is SIDR_REQ_SENT)
* resend the mad
* decrement ref cnt
* release statep mutex
* else if (state == SIDR_REQ_SENT)
* (retry counter expired)
* change state to DELETE
* decrement the ref_cnt
* the statep mutex
* cv_signal anyone blocking
* Invoke client handler
* delete statep
*/
/* Function prototypes */
static void ibcm_set_primary_adds_vect(ibcm_state_data_t *,
ibt_adds_vect_t *, ibcm_req_msg_t *);
static void ibcm_set_alt_adds_vect(ibcm_state_data_t *,
ibt_adds_vect_t *, ibcm_req_msg_t *);
static ibt_status_t ibcm_set_primary_cep_path(ibcm_state_data_t *,
ibt_cep_path_t *, ibcm_req_msg_t *);
static ibt_status_t ibcm_set_alt_cep_path(ibcm_state_data_t *,
ibt_cep_path_t *, ibcm_req_msg_t *);
static ibt_status_t ibcm_invoke_qp_modify(ibcm_state_data_t *,
ibcm_req_msg_t *, ibcm_rep_msg_t *);
static ibt_status_t ibcm_invoke_rtu_qp_modify(ibcm_state_data_t *,
ib_time_t, ibcm_rep_msg_t *);
static ibcm_status_t ibcm_sidr_req_ud_handler(ibcm_ud_state_data_t *,
ibcm_sidr_req_msg_t *, ibcm_mad_addr_t *,
ibt_sidr_status_t *);
static void ibcm_sidr_rep_ud_handler(ibcm_ud_state_data_t *,
ibcm_sidr_rep_msg_t *);
static void ibcm_handler_conn_fail(ibcm_state_data_t *,
uint8_t cf_code, uint8_t cf_msg,
ibt_cm_reason_t rej_reason, uint8_t *,
ibt_priv_data_len_t);
static void ibcm_build_n_post_rej_mad(uint8_t *input_madp,
ib_com_id_t, ibcm_mad_addr_t *, int, uint16_t);
static void ibcm_post_drep_mad(ibcm_state_data_t *);
static ibcm_status_t ibcm_verify_req_gids_and_svcid(
ibcm_state_data_t *statep,
ibcm_req_msg_t *cm_req_msgp);
static void ibcm_timeout_client_cb(ibcm_state_data_t *statep);
static void ibcm_ud_timeout_client_cb(
ibcm_ud_state_data_t *ud_statep);
static void ibcm_process_dreq_timeout(ibcm_state_data_t *statep);
static void ibcm_fill_adds_from_lap(ibt_adds_vect_t *adds,
ibcm_lap_msg_t *lap_msg, ibcm_mode_t mode);
static void ibcm_post_stored_apr_mad(ibcm_state_data_t *statep,
uint8_t *input_madp);
static ibcm_status_t ibcm_set_qp_from_apr(ibcm_state_data_t *statep,
ibcm_lap_msg_t *lap_msg);
static boolean_t ibcm_compare_prim_alt_paths(ibt_adds_vect_t *prim,
ibt_adds_vect_t *alt);
static void ibcm_process_get_classport_info(ibcm_hca_info_t *hcap,
uint8_t *input_madp, ibcm_mad_addr_t *cm_mad_addr);
static void ibcm_decode_classport_info(ibcm_hca_info_t *hcap,
uint8_t *input_madp, ibcm_mad_addr_t *cm_mad_addr);
static void ibcm_post_rej_ver_mismatch(uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr);
static void ibcm_init_clp_to_mad(ibcm_classportinfo_msg_t *clp,
ibt_redirect_info_t *rinfo);
static void ibcm_init_clp_from_mad(ibcm_classportinfo_msg_t *clp,
ibt_redirect_info_t *rinfo);
static void ibcm_copy_addl_rej(ibcm_state_data_t *statep,
ibcm_rej_msg_t *rej_msgp,
ibt_cm_conn_failed_t *failed);
static void ibcm_return_open_data(ibcm_state_data_t *statep,
ibcm_rep_msg_t *rep_msgp,
ibt_cm_reason_t reject_reason);
/* limit the number of taskq threads to handle received MADs. */
int ibcm_recv_tasks = 0;
int ibcm_max_recv_tasks = 24;
int ibcm_recv_timeouts = 0;
/*
* Tunable MAX MRA Service Timeout value in MicroSECONDS.
* 0 - Tunable parameter not used.
*
* Ex: 60000000 - Max MRA Service Delay is 60 Seconds.
*/
clock_t ibcm_mra_service_timeout_max = 0;
#ifdef DEBUG
static void print_modify_qp(char *prefix,
ibt_qp_hdl_t ibt_qp,
ibt_cep_modify_flags_t flags,
ibt_qp_info_t *qp_attr);
#endif
/* Warlock annotations */
_NOTE(READ_ONLY_DATA(ibt_arej_info_u))
/*
* ibcm_process_incoming_mad:
* The CM callback that is invoked by IBMF, when a valid CM MAD arrives
* on any of the registered ibmf handles by CM.
*
* It is assumed that the incoming MAD (except for incoming REQ) belongs
* to a connection on the HCA, on which the MAD is received.
* The IBMF callback arg specifies ibcm_hca_info_t
*
* NOTE: IBMF always invokes ibcm_recv_cb() in a taskq. CM does some memory
* allocations and invoke ibcm_sm_funcs_tbl[i]() in the same taskq.
*
* INPUTS:
* ibmf_handle - IBMF Handle
* args - from IBMF. Is a ptr to ibcm_hca_info_t
* status - Callback status. Is mostly IBMF_SUCCESS
* madbuf - IBMF allocated MAD buffer (CM should free it)
* madaddr - IBMF MAD's address
* grhvalid - If GRH is valid or not
*
* RETURN VALUES: NONE
*/
void
ibcm_process_incoming_mad(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
uint8_t method; /* Method type in MAD hdr */
ib_mad_hdr_t *in_mad_hdr; /* Incoming MAD's header */
ibcm_hca_info_t *hcap; /* pointer to HCA entry */
ibcm_port_info_t *portp;
ibcm_mad_addr_t *cm_mad_addr; /* MAD address information */
ibcm_event_type_t attr_id; /* Attribute ID in MAD hdr */
ibcm_mad_addr_t loc_mad_addr; /* MAD address information */
ibcm_qp_list_t *cm_qp_entry;
int ibmf_status;
/* Noticed that IBMF always calls with IBMF_SUCCESS, but still check */
if (msgp->im_msg_status != IBMF_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: "
"bad status %x", msgp->im_msg_status);
/* IBMF allocates Input MAD, so free it here */
if ((ibmf_status = ibmf_free_msg(ibmf_handle, &msgp)) !=
IBMF_SUCCESS)
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: "
"ibmf_free_msg failed %d", ibmf_status);
return;
}
/* Get the HCA entry pointer */
cm_qp_entry = (ibcm_qp_list_t *)args;
IBTF_DPRINTF_L5(cmlog, "ibcm_process_incoming_mad: ibmf_hdl %p "
"msg %p args %p", ibmf_handle, msgp, args);
#ifdef DEBUG
if (ibcm_test_mode > 1)
ibcm_query_qp(ibmf_handle, cm_qp_entry->qp_cm);
#endif
portp = cm_qp_entry->qp_port;
hcap = portp->port_hcap;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_incoming_mad: CM MAD on "
"port %d", portp->port_num);
/* Increment hca ref cnt, if HCA is in attached state, else fail */
if (ibcm_inc_hca_acc_cnt(hcap) != IBCM_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: "
"hca not in attach state");
/* IBMF allocates Input MAD, and ibcm free's it */
if ((ibmf_status = ibmf_free_msg(ibmf_handle, &msgp)) !=
IBMF_SUCCESS)
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: "
"ibmf_free_msg failed %d", ibmf_status);
return;
}
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cm_mad_addr))
/* allocate memory for internal MAD address buffer */
cm_mad_addr = &loc_mad_addr;
bzero(cm_mad_addr, sizeof (ibcm_mad_addr_t));
cm_mad_addr->port_num = portp->port_num;
/* initialize cm_mad_addr field(s) */
in_mad_hdr = msgp->im_msgbufs_recv.im_bufs_mad_hdr;
if (in_mad_hdr->MgmtClass != MAD_MGMT_CLASS_COMM_MGT) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: "
"bad mgmt class %x", in_mad_hdr->MgmtClass);
if ((ibmf_status = ibmf_free_msg(ibmf_handle, &msgp)) !=
IBMF_SUCCESS)
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: "
"ibmf_free_msg failed %d", ibmf_status);
ibcm_dec_hca_acc_cnt(hcap);
return;
}
cm_mad_addr->rcvd_addr = msgp->im_local_addr;
if (msgp->im_msg_flags & IBMF_MSG_FLAGS_GLOBAL_ADDRESS) {
cm_mad_addr->grh_hdr = msgp->im_global_addr;
cm_mad_addr->grh_exists = B_TRUE;
IBTF_DPRINTF_L3(cmlog, "ibcm_process_incoming_mad: "
"CM recv GID GUID %llX sender GID GUID %llX",
msgp->im_global_addr.ig_recver_gid.gid_guid,
msgp->im_global_addr.ig_sender_gid.gid_guid);
}
/* Save IBMF handle and ibmf qp related information */
cm_mad_addr->ibmf_hdl = ibmf_handle;
cm_mad_addr->cm_qp_entry = cm_qp_entry;
/* IBMF does not initialize ia_p_key for non-QP1's */
if (cm_qp_entry->qp_cm != IBMF_QP_HANDLE_DEFAULT)
cm_mad_addr->rcvd_addr.ia_p_key = cm_qp_entry->qp_pkey;
if (cm_mad_addr->rcvd_addr.ia_p_key & 0x8000)
IBTF_DPRINTF_L5(cmlog, "ibcm_process_incoming_mad: PKEY %x",
cm_mad_addr->rcvd_addr.ia_p_key);
else
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: CM MAD "
"arrived from limited PKEY %x",
cm_mad_addr->rcvd_addr.ia_p_key);
/* Retrieve the method and Attr-Id from generic mad header */
method = in_mad_hdr->R_Method;
attr_id = b2h16(in_mad_hdr->AttributeID);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_incoming_mad: "
"Method %x Attribute %x", method, attr_id);
if (in_mad_hdr->ClassVersion != IBCM_MAD_CLASS_VERSION) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: "
"unsupported ibcm class version %x",
in_mad_hdr->ClassVersion);
if (attr_id == (IBCM_INCOMING_REQ + IBCM_ATTR_BASE_ID))
ibcm_post_rej_ver_mismatch(
(uint8_t *)IBCM_IN_HDRP(msgp), cm_mad_addr);
if ((ibmf_status = ibmf_free_msg(ibmf_handle, &msgp)) !=
IBMF_SUCCESS)
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: "
"ibmf_free_msg failed %d", ibmf_status);
ibcm_dec_hca_acc_cnt(hcap);
return;
}
IBTF_DPRINTF_L4(cmlog, "ibcm_process_incoming_mad: "
"Transaction Id 0x%llX", b2h64(in_mad_hdr->TransactionID));
#ifdef DEBUG
ibcm_decode_tranid(b2h64(in_mad_hdr->TransactionID), NULL);
#endif
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cm_mad_addr))
/*
* The following are valid combination of Method type
* and attribute id in the received MAD :-
* o ClassPortInfo with Get method
* o CM messages with Send method
*/
if ((attr_id == MAD_ATTR_ID_CLASSPORTINFO) &&
((method == MAD_METHOD_GET) ||
(method == MAD_METHOD_GET_RESPONSE))) {
if (method == MAD_METHOD_GET)
ibcm_process_get_classport_info(hcap,
(uint8_t *)IBCM_IN_HDRP(msgp), cm_mad_addr);
else if (method == MAD_METHOD_GET_RESPONSE)
ibcm_decode_classport_info(hcap,
(uint8_t *)IBCM_IN_HDRP(msgp), cm_mad_addr);
} else if ((attr_id >= IBCM_ATTR_BASE_ID) &&
(attr_id < (IBCM_ATTR_BASE_ID + IBCM_MAX_EVENTS)) &&
(method == MAD_METHOD_SEND)) {
attr_id -= IBCM_ATTR_BASE_ID; /* figure out CM message id */
ASSERT(msgp->im_msgbufs_recv.im_bufs_mad_hdr != NULL);
/* Call the CM process connection state function */
ibcm_sm_funcs_tbl[attr_id](hcap,
(uint8_t *)IBCM_IN_HDRP(msgp), cm_mad_addr);
} else {
/*
* Any other combination of method and attribute are invalid,
* hence drop the MAD
*/
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: "
"unknown Method %x or Attribute %x", method, attr_id);
}
/* decrement the hcap access reference count */
ibcm_dec_hca_acc_cnt(hcap);
/* ASSERT(NO_LOCKS_HELD); */
/* free up ibmf msgp */
if ((ibmf_status = ibmf_free_msg(ibmf_handle, &msgp)) != IBMF_SUCCESS)
IBTF_DPRINTF_L2(cmlog, "ibcm_process_incoming_mad: "
"ibmf_free_msg failed %d", ibmf_status);
}
/*
* Structure to carry the arguments from ibcm_recv_cb() to
* ibcm_recv_incoming_mad() via taskq_dispatch
*/
typedef struct ibcm_taskq_args_s {
ibmf_handle_t tq_ibmf_handle;
ibmf_msg_t *tq_ibmf_msgp;
void *tq_args;
} ibcm_taskq_args_t;
#define IBCM_RECV_MAX 128
ibcm_taskq_args_t ibcm_recv_array[IBCM_RECV_MAX + 1];
int ibcm_get, ibcm_put;
int ibcm_recv_total;
int ibcm_recv_queued;
_NOTE(READ_ONLY_DATA(ibcm_taskq_args_t))
static int
ibcm_recv_dequeue(ibmf_handle_t *ibmf_handlep, ibmf_msg_t **msgpp, void **argsp)
{
ibcm_taskq_args_t *tq;
if (ibcm_put == ibcm_get)
return (0);
if (++ibcm_get >= IBCM_RECV_MAX)
ibcm_get = 0;
tq = ibcm_recv_array + ibcm_get;
*ibmf_handlep = tq->tq_ibmf_handle;
*msgpp = tq->tq_ibmf_msgp;
*argsp = tq->tq_args;
return (1);
}
static int
ibcm_recv_enqueue(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args)
{
int next;
ibcm_taskq_args_t *tq;
ASSERT(MUTEX_HELD(&ibcm_recv_mutex));
next = ibcm_put + 1;
if (next >= IBCM_RECV_MAX)
next = 0;
if (next != ibcm_get) {
ibcm_recv_queued++;
ibcm_put = next;
tq = ibcm_recv_array + next;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*tq))
tq->tq_ibmf_handle = ibmf_handle;
tq->tq_ibmf_msgp = msgp;
tq->tq_args = args;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*tq))
return (1);
} else {
return (0);
}
}
void
ibcm_drop_msg(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp)
{
int ibmf_status;
IBTF_DPRINTF_L2(cmlog, "ibcm_drop_msg: discarding MAD");
if ((ibmf_status = ibmf_free_msg(ibmf_handle, &msgp)) != IBMF_SUCCESS)
IBTF_DPRINTF_L2(cmlog, "ibcm_drop_msg: "
"ibmf_free_msg failed %d", ibmf_status);
}
/*
* Processing done in taskq thread.
*
* Calls ibcm_process_incoming_mad with all function arguments extracted
* from args. Afterwards, check for queued requests.
*/
static void
ibcm_recv_task(void *args)
{
ibcm_taskq_args_t *taskq_args;
ibmf_handle_t ibmf_handle;
ibmf_msg_t *msgp;
taskq_args = (ibcm_taskq_args_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_recv_task: Processing incoming MAD"
" via taskq");
ibcm_process_incoming_mad(taskq_args->tq_ibmf_handle,
taskq_args->tq_ibmf_msgp, taskq_args->tq_args);
kmem_free(taskq_args, sizeof (ibcm_taskq_args_t));
/* process queued entries before giving up this thread */
mutex_enter(&ibcm_recv_mutex);
while (ibcm_recv_dequeue(&ibmf_handle, &msgp, &args)) {
mutex_exit(&ibcm_recv_mutex);
ibcm_process_incoming_mad(ibmf_handle, msgp, args);
mutex_enter(&ibcm_recv_mutex);
}
--ibcm_recv_tasks;
mutex_exit(&ibcm_recv_mutex);
}
static void
ibcm_recv_timeout_cb(void *args)
{
ibcm_taskq_args_t *tq = (ibcm_taskq_args_t *)args;
int rv = 1;
mutex_enter(&ibcm_recv_mutex);
ibcm_recv_timeouts--;
if (ibcm_recv_tasks == 0) {
ibcm_recv_tasks++;
mutex_exit(&ibcm_recv_mutex);
if (taskq_dispatch(ibcm_taskq, ibcm_recv_task, tq,
TQ_NOQUEUE | TQ_NOSLEEP) == 0) {
mutex_enter(&ibcm_recv_mutex);
if (--ibcm_recv_tasks == 0) {
(void) timeout(ibcm_recv_timeout_cb, tq, 1);
ibcm_recv_timeouts++;
} else {
rv = ibcm_recv_enqueue(tq->tq_ibmf_handle,
tq->tq_ibmf_msgp, tq->tq_args);
kmem_free(tq, sizeof (*tq));
}
mutex_exit(&ibcm_recv_mutex);
}
} else {
/*
* one or more taskq threads are running now
* so just try to enqueue this one.
*/
rv = ibcm_recv_enqueue(tq->tq_ibmf_handle,
tq->tq_ibmf_msgp, tq->tq_args);
kmem_free(tq, sizeof (*tq));
mutex_exit(&ibcm_recv_mutex);
}
if (rv == 0)
ibcm_drop_msg(tq->tq_ibmf_handle, tq->tq_ibmf_msgp);
}
/*
* Dispatch to taskq if we're not using many, else just queue it
* and have the taskq thread pick it up. Return 0 if we're dropping it.
*/
static int
ibcm_recv_add_one(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args)
{
int rv;
ibcm_taskq_args_t *tq;
mutex_enter(&ibcm_recv_mutex);
ibcm_recv_total++;
if (ibcm_recv_tasks >= ibcm_max_recv_tasks) { /* just queue this one */
rv = ibcm_recv_enqueue(ibmf_handle, msgp, args);
mutex_exit(&ibcm_recv_mutex);
return (rv);
} else {
ibcm_recv_tasks++; /* dispatch this one to a taskq thread */
mutex_exit(&ibcm_recv_mutex);
tq = kmem_alloc(sizeof (*tq), KM_NOSLEEP);
if (tq == NULL) {
mutex_enter(&ibcm_recv_mutex);
if (--ibcm_recv_tasks > 0)
rv = ibcm_recv_enqueue(ibmf_handle, msgp, args);
else /* don't enqueue if no threads are running */
rv = 0;
mutex_exit(&ibcm_recv_mutex);
return (rv);
}
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*tq))
tq->tq_ibmf_handle = ibmf_handle;
tq->tq_ibmf_msgp = msgp;
tq->tq_args = args;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*tq))
if (taskq_dispatch(ibcm_taskq, ibcm_recv_task, tq,
TQ_NOQUEUE | TQ_NOSLEEP) == 0) { /* dispatch failed */
mutex_enter(&ibcm_recv_mutex);
if (--ibcm_recv_tasks == 0) {
/* try the dispatch again, after a tick */
(void) timeout(ibcm_recv_timeout_cb, tq, 1);
ibcm_recv_timeouts++;
rv = 1; /* indicate success */
} else {
rv = ibcm_recv_enqueue(ibmf_handle, msgp, args);
kmem_free(tq, sizeof (*tq));
}
mutex_exit(&ibcm_recv_mutex);
return (rv);
} else {
return (1);
}
}
}
/*
* ibcm_recv_cb:
* The CM callback that is invoked by IBMF, when a valid CM MAD arrives
* on any of the registered ibmf handles by CM.
*
* INPUTS:
* ibmf_handle - IBMF Handle
* msgp - IBMF msg containing the MAD (allocated by IBMF)
* args - Ptr to ibcm_hca_info_t
*
* RETURN VALUES: NONE
*/
void
ibcm_recv_cb(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args)
{
if (ibcm_recv_add_one(ibmf_handle, msgp, args) == 0)
ibcm_drop_msg(ibmf_handle, msgp);
}
/*
* ibcm_process_req_msg:
* PASSIVE SIDE CM
* Called from ibcm_process_incoming_mad on reception of a REQ message
*
* Description:
* If it a new REQ (not duplicate)
* creates a new state structure in passive connection mode
* populate state structure fields
* inserts state structure in hca active and passive trees
* validates service id
* validates primary and alternate lid/gid in REQ,
* calls QP state transition function
* generates REP/REJ response
* stores the response MAD in state structure for future re-sends
* initializes timers as required
* If a duplicate REQ, action depends upon current state in the state
* structure
*
* INPUTS:
* hcap - HCA entry ptr
* input_madp - CM MAD that is input to this function
* cm_mad_addr - Address information for the MAD
*
* RETURN VALUE:
* NONE
*/
void
ibcm_process_req_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ibt_priv_data_len_t arej_info_len = 0;
ib_qpn_t remote_qpn;
ib_guid_t remote_hca_guid;
ib_com_id_t remote_comid;
ib_com_id_t local_comid;
ibcm_status_t state_lookup_status;
ibcm_status_t comid_lookup_status;
ibcm_status_t response;
ibcm_req_msg_t *req_msgp =
(ibcm_req_msg_t *)&input_madp[IBCM_MAD_HDR_SIZE];
ibt_cm_reason_t reject_reason = IBT_CM_SUCCESS;
ibcm_state_data_t *statep;
ibcm_state_data_t *stale_statep = NULL;
ibcm_status_t svc_gid_check;
uint32_t psn24_timeout5_retry3;
ibt_tran_srv_t trans;
IBTF_DPRINTF_L5(cmlog, "ibcm_process_req_msg(%p, %p, %p)",
hcap, input_madp, cm_mad_addr);
/*
* Lookup for an existing state structure or create a new state struct
* If there is no entry, the lookup function also allocates a new
* state structure and inserts in the table, initializes remote qpn
* and hca guid from REQ
*/
remote_hca_guid = b2h64(req_msgp->req_local_ca_guid);
remote_qpn = b2h32(req_msgp->req_local_qpn_plus) >> 8;
remote_comid = b2h32(req_msgp->req_local_comm_id);
IBCM_DUMP_RAW_MSG((uchar_t *)input_madp);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_req_msg: remote_comid = %x"
" remote_qpn = %x", remote_comid, remote_qpn);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_req_msg: remote_hcaguid = %llX",
remote_hca_guid);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
new_req:
/* allocate the local_comid before proceeding */
if (ibcm_alloc_comid(hcap, &local_comid) != IBCM_SUCCESS) {
ibcm_build_n_post_rej_mad(input_madp,
b2h32(req_msgp->req_local_comm_id), cm_mad_addr,
IBT_CM_FAILURE_REQ, IBT_CM_NO_RESC);
return;
}
/* allocate ibcm_state_data_t before grabbing the WRITER lock */
statep = kmem_zalloc(sizeof (*statep), KM_SLEEP);
rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
/* NOTE that only a writer lock is held here */
state_lookup_status = ibcm_lookup_msg(IBCM_INCOMING_REQ,
local_comid, remote_qpn, remote_hca_guid, hcap, &statep);
if (state_lookup_status == IBCM_LOOKUP_NEW) {
/* seeing the REQ request for the first time */
mutex_enter(&statep->state_mutex);
/* Release the state table lock */
rw_exit(&hcap->hca_state_rwlock);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_req_msg: New statep 0x%p"
" created", statep);
psn24_timeout5_retry3 = b2h32(req_msgp->req_starting_psn_plus);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
/* if ibmf msg allocation fails, delete the statep */
if (ibcm_alloc_out_msg(cm_mad_addr->ibmf_hdl,
&statep->stored_msg, MAD_METHOD_SEND) != IBT_SUCCESS) {
IBCM_REF_CNT_DECR(statep);
statep->state = IBCM_STATE_DELETE;
mutex_exit(&statep->state_mutex);
/* HCA res cnt decremented via ibcm_delete_state_data */
ibcm_inc_hca_res_cnt(hcap);
ibcm_delete_state_data(statep);
return;
}
/* Allocate dreq_msg buf to be used during teardown. */
if (ibcm_alloc_out_msg(cm_mad_addr->ibmf_hdl,
&statep->dreq_msg, MAD_METHOD_SEND) != IBT_SUCCESS) {
IBCM_REF_CNT_DECR(statep);
statep->state = IBCM_STATE_DELETE;
mutex_exit(&statep->state_mutex);
IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_msg: "
"statep 0x%p: Failed to allocate dreq_msg", statep);
/* HCA res cnt decremented via ibcm_delete_state_data */
ibcm_inc_hca_res_cnt(hcap);
ibcm_delete_state_data(statep);
return;
}
/* initialize some "statep" fields */
statep->mode = IBCM_PASSIVE_MODE;
statep->hcap = hcap;
statep->remote_comid = remote_comid;
statep->svcid = b2h64(req_msgp->req_svc_id);
/*
* get the remote_ack_delay, etc.
*/
statep->remote_ack_delay =
ibt_ib2usec(req_msgp->req_primary_localtime_plus >> 3);
statep->cep_retry_cnt = psn24_timeout5_retry3 & 0x7;
/*
* get the req_max_cm_retries
*/
statep->max_cm_retries = req_msgp->req_max_cm_retries_plus >> 4;
statep->remaining_retry_cnt = statep->max_cm_retries;
/* Approximate pkt life time for now */
statep->pkt_life_time = statep->remote_ack_delay/2;
/* Passive side timer is set to LocalCMRespTime in REQ */
statep->timer_value =
ibt_ib2usec(psn24_timeout5_retry3 >> 3 & 0x1f);
statep->starting_psn = psn24_timeout5_retry3 >> 8;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_req_msg: statep 0x%p "
"active cep timeout(usec) = %u",
statep, statep->remote_ack_delay);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_req_msg: "
"passive timer(usec) = %u", statep->timer_value);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_req_msg: "
"approx pkt lt(usec)= %u ", statep->pkt_life_time);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_req_msg: "
"max cm retries %u", statep->max_cm_retries);
/* The reply ie., REP/REJ transaction id copied from REQ */
IBCM_OUT_HDRP(statep->stored_msg)->TransactionID =
((ib_mad_hdr_t *)(input_madp))->TransactionID;
/*
* Initialize the stale clock. Any other REQ
* messages on this statep are considered as duplicate
* if they arrive within stale clock
* ibcm_adj_btime is used to offset for retry REQ's
* arriving just after expected retry clock
*/
statep->stale_clock = gethrtime() +
(hrtime_t)(ibcm_adj_btime * 1000000000) +
(hrtime_t)statep->remote_ack_delay *
(statep->max_cm_retries * (1000 / 2));
mutex_exit(&statep->state_mutex);
ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_REQ);
/* Increment the hca's resource count */
ibcm_inc_hca_res_cnt(hcap);
ibcm_build_reply_mad_addr(cm_mad_addr,
&statep->stored_reply_addr);
if (statep->stored_reply_addr.cm_qp_entry == NULL) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_msg: "
"statep 0x%p cm_qp_entry alloc failed", statep);
/*
* Not much choice. CM MADs cannot go on QP1, not even
* REJ. Hence delete state data and go away silently.
* The remote will timeout after repeated attempts
*/
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
statep->state = IBCM_STATE_DELETE;
mutex_exit(&statep->state_mutex);
ibcm_delete_state_data(statep);
return;
}
stale_statep = statep;
rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
comid_lookup_status = ibcm_lookup_msg(IBCM_INCOMING_REQ_STALE,
remote_comid, 0, remote_hca_guid, hcap, &stale_statep);
rw_exit(&hcap->hca_state_rwlock);
if (comid_lookup_status == IBCM_LOOKUP_EXISTS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_msg: "
"dup comid %x stale_statep 0x%p statep 0x%p",
remote_comid, stale_statep, statep);
ibcm_insert_trace(stale_statep,
IBCM_TRACE_STALE_DETECT);
/* Send a REJ with duplicate com id */
ibcm_post_rej_mad(statep, IBT_CM_DUP_COM_ID,
IBT_CM_FAILURE_REQ, NULL, 0);
/*
* Don't free the ibmf msg, if stale_statep is not in
* ESTABLISHED state, because probability is very less.
* ibmf msg shall be deleted along with statep
*/
/*
* if stale_statep is in established state, process
* stale connection handling on stale_statep
*/
mutex_enter(&stale_statep->state_mutex);
if (stale_statep->state == IBCM_STATE_ESTABLISHED) {
stale_statep->state =
IBCM_STATE_TRANSIENT_DREQ_SENT;
stale_statep->stale = B_TRUE;
/* Cancel pending ibt_set_alt_path */
ibcm_sync_lapr_idle(stale_statep);
/* The above call releases the state mutex */
if (stale_statep->dreq_msg == NULL)
(void) ibcm_alloc_out_msg(stale_statep->
stored_reply_addr.ibmf_hdl,
&stale_statep->dreq_msg,
MAD_METHOD_SEND);
/*
* Spec says, post DREQ MAD on the stale
* channel. This moves channel into timewait
*/
if (stale_statep->dreq_msg != NULL) {
ibcm_post_dreq_mad(stale_statep);
mutex_enter(&stale_statep->state_mutex);
} else {
mutex_enter(&stale_statep->state_mutex);
/* Set it back to original state. */
stale_statep->state =
IBCM_STATE_ESTABLISHED;
cv_broadcast(
&stale_statep->block_mad_cv);
}
}
IBCM_REF_CNT_DECR(stale_statep);
mutex_exit(&stale_statep->state_mutex);
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
/* If unknown service type, just post a REJ */
trans = ((uint8_t *)&req_msgp->req_remote_eecn_plus)[3] >> 1 &
0x3;
if ((trans != IBT_RC_SRV) && (trans != IBT_UC_SRV) &&
(trans != IBT_RD_SRV)) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_msg: "
"statep 0x%p invalid transport type %x", statep,
trans);
/* Send a REJ with invalid transport type */
ibcm_post_rej_mad(statep, IBT_CM_INVALID_SRV_TYPE,
IBT_CM_FAILURE_REQ, NULL, 0);
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
/* Validate the gids, lids and service id */
svc_gid_check = ibcm_verify_req_gids_and_svcid(statep,
req_msgp);
if (svc_gid_check == IBCM_FAILURE) {
IBTF_DPRINTF_L3(cmlog, "ibcm_process_req_msg: Either "
"gid or sid invalid for statep 0x%p", statep);
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
/* REJ posted from ibcm_verify_req_gids_and_svcid */
return;
}
/* Call the QP state transition processing function */
response = ibcm_cep_state_req(statep, req_msgp,
&reject_reason, &arej_info_len);
/* If defer, return holding the statep ref cnt */
if (response == IBCM_DEFER) {
IBTF_DPRINTF_L4(cmlog, "ibcm_process_req_msg: "
"statep %0xp client returned DEFER response",
statep);
return;
}
/* statep ref cnt decremented in the func below */
ibcm_handle_cep_req_response(statep, response,
reject_reason, arej_info_len);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
return;
} else {
rw_exit(&hcap->hca_state_rwlock);
ibcm_free_comid(hcap, local_comid);
}
if (state_lookup_status == IBCM_LOOKUP_EXISTS) {
hrtime_t cur_time;
mutex_enter(&statep->state_mutex);
/*
* There is an existing state structure entry
* with the same active comid
* Resending REP MAD is necessary only for REP/REJ/MRA Sent
* states
* Any other state implies the active has already received
* the REP/REJ response, and this REQ is an old MAD popping
* out of the fabric, hence no resend is required
*/
cur_time = gethrtime();
if ((remote_comid == statep->remote_comid) &&
(IBCM_OUT_HDRP(statep->stored_msg)->TransactionID ==
((ib_mad_hdr_t *)(input_madp))->TransactionID) &&
(cur_time <= statep->stale_clock)) {
ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_REQ);
if (statep->state == IBCM_STATE_REP_SENT)
ibcm_resend_rep_mad(statep);
else if (statep->state == IBCM_STATE_REJ_SENT)
ibcm_resend_rej_mad(statep);
else if (statep->state == IBCM_STATE_MRA_SENT)
ibcm_resend_mra_mad(statep);
/* decrementing ref cnt and returning from below */
} else if ((statep->state == IBCM_STATE_REJ_SENT) &&
remote_comid != statep->remote_comid) {
timeout_id_t timer_val;
IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_msg: "
"statep 0x%p being retired, REMOTE_QPN %x",
statep, remote_qpn);
/*
* OK, this is reuse of the QPN on the active side
* that was not connected last time. This REQ is
* considered NEW. We delete the statep here,
* then start over from the top.
*/
statep->state = IBCM_STATE_DELETE;
timer_val = statep->timerid;
statep->timerid = 0;
mutex_exit(&statep->state_mutex);
if (timer_val)
(void) untimeout(timer_val);
IBCM_REF_CNT_DECR(statep);
ibcm_delete_state_data(statep);
goto new_req;
/*
* The statep is stale in the following cases :-
* 1) if incoming REQ's comid's doesn't match with what is
* stored in statep
* 2) incoming REQ's local comid matches with statep's
* remote comid, but the REQ is for a new connection.
* This is verified that by comparing the current time
* with stale clock in statep
*/
} else {
/* This is a stale connection on passive side */
ibcm_insert_trace(statep, IBCM_TRACE_STALE_DETECT);
IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_msg: "
"stale detected statep %p state %x",
statep, statep->state);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_req_msg: "
"cur_time 0x%llX stale_clock 0x%llX", cur_time,
statep->stale_clock);
if (statep->state == IBCM_STATE_ESTABLISHED) {
statep->state = IBCM_STATE_TRANSIENT_DREQ_SENT;
statep->stale = B_TRUE;
/* Cancel pending ibt_set_alt_path */
ibcm_sync_lapr_idle(statep);
/* The above call releases the state mutex */
if (statep->dreq_msg == NULL)
(void) ibcm_alloc_out_msg(
statep->stored_reply_addr.ibmf_hdl,
&statep->dreq_msg, MAD_METHOD_SEND);
/*
* Spec says, post DREQ MAD on the stale
* channel. This moves channel into timewait
*/
if (statep->dreq_msg != NULL)
ibcm_post_dreq_mad(statep);
else {
mutex_enter(&statep->state_mutex);
statep->state = IBCM_STATE_ESTABLISHED;
cv_broadcast(&statep->block_mad_cv);
mutex_exit(&statep->state_mutex);
}
} else {
/*
* If not in established state, the CM
* protocol would timeout and delete the
* statep that is stale, eventually
*/
mutex_exit(&statep->state_mutex);
}
/* Post a REJ MAD to the incoming REQ's sender */
ibcm_build_n_post_rej_mad(input_madp,
b2h32(req_msgp->req_local_comm_id),
cm_mad_addr, IBT_CM_FAILURE_REQ, IBT_CM_CONN_STALE);
mutex_enter(&statep->state_mutex);
}
IBCM_REF_CNT_DECR(statep); /* decrement the ref count */
mutex_exit(&statep->state_mutex);
}
}
/*
* ibcm_handle_cep_req_response:
* Processes the response from ibcm_cep_state_req. Called holding a
* statep ref cnt. The statep ref cnt is decremented before returning.
*/
void
ibcm_handle_cep_req_response(ibcm_state_data_t *statep, ibcm_status_t response,
ibt_cm_reason_t reject_reason, uint8_t arej_info_len)
{
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
if (response == IBCM_SEND_REP)
ibcm_post_rep_mad(statep);
else {
ASSERT(response == IBCM_SEND_REJ);
IBTF_DPRINTF_L4(cmlog, "ibcm_handle_cep_req_response: statep %p"
" posting REJ reject_reason = %d", statep, reject_reason);
ibcm_post_rej_mad(statep,
reject_reason, IBT_CM_FAILURE_REQ,
NULL, arej_info_len);
}
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_process_rep_msg:
* ACTIVE SIDE CM
* Called from ibcm_process_incoming_mad on reception of a REP message
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - CM MAD that is input to this function
* cm_mad_addr - Address information for the MAD
*
* RETURN VALUE: NONE
*/
void
ibcm_process_rep_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ibt_priv_data_len_t arej_info_len = 0;
ib_com_id_t local_comid;
timeout_id_t timer_val;
ibcm_status_t lookup_status; /* state lookup status */
ibcm_status_t stale_lookup_status;
ibcm_status_t stale_comid_lookup_status;
ibcm_status_t response;
ibcm_rep_msg_t *rep_msgp; /* Response REP mesg */
ibt_cm_reason_t reject_reason;
ibcm_state_data_t *statep = NULL;
ibcm_state_data_t *stale_qpn = NULL;
ibcm_state_data_t *stale_comid = NULL;
ib_guid_t remote_ca_guid;
IBTF_DPRINTF_L3(cmlog, "ibcm_process_rep_msg:");
/* Lookup for an existing state structure */
rep_msgp = (ibcm_rep_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]);
IBTF_DPRINTF_L5(cmlog, "ibcm_process_rep_msg: active comid: %x",
rep_msgp->rep_remote_comm_id);
local_comid = b2h32(rep_msgp->rep_remote_comm_id);
/* lookup message holding a reader lock */
rw_enter(&hcap->hca_state_rwlock, RW_READER);
lookup_status = ibcm_lookup_msg(IBCM_INCOMING_REP, local_comid, 0, 0,
hcap, &statep);
rw_exit(&hcap->hca_state_rwlock);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_rep_msg: lkup status %x, "
"statep 0x%p active comid %x", lookup_status, statep, local_comid);
if (lookup_status == IBCM_LOOKUP_FAIL) {
ibcm_build_n_post_rej_mad(input_madp,
b2h32(rep_msgp->rep_local_comm_id), cm_mad_addr,
IBT_CM_FAILURE_REP, IBT_CM_INVALID_CID);
return;
}
/* if transaction id is not as expected, drop the REP mad */
if (IBCM_OUT_HDRP(statep->stored_msg)->TransactionID !=
((ib_mad_hdr_t *)(input_madp))->TransactionID) {
IBTF_DPRINTF_L3(cmlog, "ibcm_process_rep_msg: statep 0x%p, "
"An REP MAD with tid expected 0x%llX tid found 0x%llX ",
statep,
b2h64(IBCM_OUT_HDRP(statep->stored_msg)->TransactionID),
b2h64(((ib_mad_hdr_t *)(input_madp))->TransactionID));
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_REP);
/* grab mutex first */
mutex_enter(&statep->state_mutex);
/*
* There is a state structure entry with active comid
* First, handle the re-send cases
* The resend routines below release the state mutex
*/
if (statep->state == IBCM_STATE_ESTABLISHED ||
statep->state == IBCM_STATE_DREQ_SENT)
ibcm_resend_rtu_mad(statep);
else if (statep->state == IBCM_STATE_REJ_SENT)
ibcm_resend_rej_mad(statep);
else if (statep->state == IBCM_STATE_MRA_REP_SENT)
ibcm_resend_mra_mad(statep);
else if ((statep->state == IBCM_STATE_REQ_SENT) ||
(statep->state == IBCM_STATE_REP_WAIT)) {
/* change state */
statep->state = IBCM_STATE_REP_RCVD;
statep->clnt_proceed = IBCM_BLOCK;
/* cancel the REQ timer */
if (statep->timerid != 0) {
timer_val = statep->timerid;
statep->timerid = 0;
mutex_exit(&statep->state_mutex);
(void) untimeout(timer_val);
} else {
mutex_exit(&statep->state_mutex);
}
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
/* Initialize the remote destination QPN for further MADs */
statep->stored_reply_addr.rcvd_addr.ia_remote_qno =
cm_mad_addr->rcvd_addr.ia_remote_qno;
statep->remote_qpn = b2h32(rep_msgp->rep_local_qpn_plus) >> 8;
statep->remote_comid = b2h32(rep_msgp->rep_local_comm_id);
bcopy(rep_msgp->rep_local_ca_guid, &remote_ca_guid,
sizeof (ib_guid_t));
statep->remote_hca_guid = b2h64(remote_ca_guid);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_rep_msg: statep 0x%p "
"passive cid = %x passive qpn = %x", statep,
statep->remote_comid, statep->remote_qpn);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_rep_msg: statep 0x%p "
"passive hcaguid = %llX", statep, statep->remote_hca_guid);
stale_qpn = statep;
stale_comid = statep;
/* Handle stale connection detection on active side */
rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
stale_lookup_status = ibcm_lookup_msg(IBCM_INCOMING_REP_STALE,
0, statep->remote_qpn, statep->remote_hca_guid, hcap,
&stale_qpn);
stale_comid_lookup_status = ibcm_lookup_msg(
IBCM_INCOMING_REQ_STALE, statep->remote_comid, 0,
statep->remote_hca_guid, hcap, &stale_comid);
rw_exit(&hcap->hca_state_rwlock);
/*
* Check for other side reusing QPN that was attempted
* to be used, but somehow we sent a REJ.
*/
mutex_enter(&stale_qpn->state_mutex);
if ((stale_lookup_status == IBCM_LOOKUP_EXISTS) &&
(stale_comid_lookup_status != IBCM_LOOKUP_EXISTS) &&
(stale_qpn->state == IBCM_STATE_REJ_SENT)) {
timeout_id_t timer_val;
IBTF_DPRINTF_L3(cmlog, "ibcm_process_rep_msg: "
"statep 0x%p being retired, REMOTE_QPN %x",
stale_qpn, statep->remote_qpn);
/*
* OK, this is reuse of the QPN on the active side
* that was not connected last time. This REQ is
* considered NEW. We delete the statep here,
* then start over from the top.
*/
stale_qpn->state = IBCM_STATE_DELETE;
timer_val = stale_qpn->timerid;
stale_qpn->timerid = 0;
mutex_exit(&stale_qpn->state_mutex);
if (timer_val)
(void) untimeout(timer_val);
IBCM_REF_CNT_DECR(stale_qpn);
ibcm_delete_state_data(stale_qpn);
stale_qpn = statep;
rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
stale_lookup_status = ibcm_lookup_msg(
IBCM_INCOMING_REP_STALE, 0, statep->remote_qpn,
statep->remote_hca_guid, hcap, &stale_qpn);
rw_exit(&hcap->hca_state_rwlock);
/* OK to continue now */
} else
mutex_exit(&stale_qpn->state_mutex);
/*
* lookup exists implies that there is already an entry with
* the remote qpn/comid and remote hca guid
*/
if ((stale_lookup_status == IBCM_LOOKUP_EXISTS) ||
(stale_comid_lookup_status == IBCM_LOOKUP_EXISTS)) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_rep_msg: "
"statep 0x%p stale detected "
"qpn_lkup %d comid_lkup %d", statep,
stale_lookup_status, stale_comid_lookup_status);
/* Disassociate statep and QP */
IBCM_SET_CHAN_PRIVATE(statep->channel, NULL);
if (stale_lookup_status == IBCM_LOOKUP_EXISTS)
reject_reason = IBT_CM_CONN_STALE;
else
reject_reason = IBT_CM_DUP_COM_ID;
ibcm_handler_conn_fail(statep,
IBT_CM_FAILURE_REJ_SENT, IBT_CM_FAILURE_REP,
reject_reason,
IBCM_REJ_PRIV(statep->stored_msg),
IBT_REJ_PRIV_DATA_SZ);
/* Send a REJ with stale reason for statep */
ibcm_post_rej_mad(statep, reject_reason,
IBT_CM_FAILURE_REP, NULL, 0);
/* Now let's handle the logic for stale connections */
/* If in established state, stale_statep is stale */
if (stale_lookup_status == IBCM_LOOKUP_EXISTS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_rep_msg: "
"state_qpn 0x%p stale QPN detected "
"state %X", stale_qpn, stale_qpn->state);
ibcm_insert_trace(stale_qpn,
IBCM_TRACE_STALE_DETECT);
mutex_enter(&stale_qpn->state_mutex);
if (stale_qpn->state ==
IBCM_STATE_ESTABLISHED) {
/* change state to DREQ sent */
stale_qpn->state =
IBCM_STATE_TRANSIENT_DREQ_SENT;
stale_qpn->stale = B_TRUE;
/* wait for/cancel pending LAP/APR */
ibcm_sync_lapr_idle(stale_qpn);
/* above call releases state mutex */
if (stale_qpn->dreq_msg == NULL)
(void) ibcm_alloc_out_msg(
stale_qpn->
stored_reply_addr.ibmf_hdl,
&stale_qpn->dreq_msg,
MAD_METHOD_SEND);
if (stale_qpn->dreq_msg != NULL) {
ibcm_post_dreq_mad(stale_qpn);
mutex_enter(
&stale_qpn->state_mutex);
} else {
mutex_enter(
&stale_qpn->state_mutex);
stale_qpn->state =
IBCM_STATE_ESTABLISHED;
cv_broadcast(
&stale_qpn->block_mad_cv);
}
}
IBCM_REF_CNT_DECR(stale_qpn);
mutex_exit(&stale_qpn->state_mutex);
}
if (stale_comid_lookup_status == IBCM_LOOKUP_EXISTS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_rep_msg: "
"state_comid 0x%p stale COMID detected "
"state %X", stale_comid,
stale_comid->state);
mutex_enter(&stale_comid->state_mutex);
if (!((stale_lookup_status ==
IBCM_LOOKUP_EXISTS) &&
(stale_qpn == stale_comid)) &&
(stale_comid->state ==
IBCM_STATE_ESTABLISHED)) {
ibcm_insert_trace(stale_comid,
IBCM_TRACE_STALE_DETECT);
/* change state to DREQ sent */
stale_comid->state =
IBCM_STATE_TRANSIENT_DREQ_SENT;
stale_comid->stale = B_TRUE;
/* wait for/cancel pending LAP/APR */
ibcm_sync_lapr_idle(stale_comid);
/* above call releases state mutex */
if (stale_comid->dreq_msg == NULL)
(void) ibcm_alloc_out_msg(
stale_comid->
stored_reply_addr.ibmf_hdl,
&stale_comid->dreq_msg,
MAD_METHOD_SEND);
if (stale_comid->dreq_msg != NULL) {
ibcm_post_dreq_mad(stale_comid);
mutex_enter(
&stale_comid->state_mutex);
} else {
mutex_enter(
&stale_comid->state_mutex);
stale_comid->state =
IBCM_STATE_ESTABLISHED;
cv_broadcast(
&stale_comid->block_mad_cv);
}
}
IBCM_REF_CNT_DECR(stale_comid);
mutex_exit(&stale_comid->state_mutex);
}
ibcm_return_open_data(statep, rep_msgp, reject_reason);
return;
}
/*
* No need to handle out of memory conditions as we called
* ibcm_lookup_msg() with IBT_CHAN_BLOCKING flags.
*/
ASSERT(stale_lookup_status == IBCM_LOOKUP_NEW);
/* Initialize the remote ack delay */
statep->remote_ack_delay =
ibt_ib2usec(rep_msgp->rep_target_delay_plus >> 3);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_rep_msg: statep 0x%p"
" passive hca_ack_delay= %x ", statep,
statep->remote_ack_delay);
response = ibcm_cep_state_rep(statep, rep_msgp,
&reject_reason, &arej_info_len);
if (response == IBCM_DEFER) {
IBTF_DPRINTF_L4(cmlog, "ibcm_process_rep_msg: "
"statep 0x%p client returned DEFER response",
statep);
return;
}
ibcm_handle_cep_rep_response(statep, response,
reject_reason, arej_info_len, rep_msgp);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
return;
} else if (statep->state == IBCM_STATE_DELETE) {
mutex_exit(&statep->state_mutex);
ibcm_build_n_post_rej_mad(input_madp,
b2h32(rep_msgp->rep_local_comm_id), cm_mad_addr,
IBT_CM_FAILURE_REP, IBT_CM_INVALID_CID);
mutex_enter(&statep->state_mutex);
} else {
#ifdef DEBUG
if (ibcm_test_mode > 0)
if (statep->state == IBCM_STATE_REP_RCVD)
IBTF_DPRINTF_L2(cmlog, "ibcm_process_rep_msg: "
"REP re-send from passive for statep 0x%p"
" in state %d", statep, statep->state);
else
IBTF_DPRINTF_L2(cmlog, "ibcm_process_rep_msg: "
"Unexpected REP for statep 0x%p in "
"state %d", statep, statep->state);
#endif
}
/* decrement ref count and return for LOOKUP_EXISTS */
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_handle_cep_req_response:
* Processes the response from ibcm_cep_state_rep. Called holding a
* statep ref cnt. The statep ref cnt is decremented before returning.
*/
void
ibcm_handle_cep_rep_response(ibcm_state_data_t *statep, ibcm_status_t response,
ibt_cm_reason_t reject_reason, uint8_t arej_info_len,
ibcm_rep_msg_t *rep_msgp)
{
/* wait until the send completion callback is invoked for REQ post */
mutex_enter(&statep->state_mutex);
while (statep->send_mad_flags & IBCM_REQ_POST_BUSY)
cv_wait(&statep->block_mad_cv, &statep->state_mutex);
mutex_exit(&statep->state_mutex);
if (response == IBCM_SEND_RTU) {
/* if connection aborted, return */
if (ibcm_post_rtu_mad(statep) != IBCM_SUCCESS) {
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
/*
* Call client handler with cm event IBT_CM_EVENT_CONN_EST to
* indicate RTU posted
*/
ibcm_cep_send_rtu(statep);
} else {
IBTF_DPRINTF_L4(cmlog, "ibcm_handle_cep_rep_response: statep %p"
" posting REJ reject_reason = %d", statep, reject_reason);
ASSERT(response == IBCM_SEND_REJ);
ibcm_post_rej_mad(statep, reject_reason, IBT_CM_FAILURE_REP,
NULL, arej_info_len);
}
ibcm_return_open_data(statep, rep_msgp, reject_reason);
}
/*
* ibcm_return_open_data:
* Initializes the ibt_open_rc_channel return data. The statep ref cnt is
* decremented before returning.
*/
static void
ibcm_return_open_data(ibcm_state_data_t *statep, ibcm_rep_msg_t *rep_msgp,
ibt_cm_reason_t reject_reason)
{
/* signal waiting CV - blocking in ibt_open_channel() */
if (statep->open_return_data != NULL) {
if (statep->open_return_data->rc_priv_data_len > 0)
bcopy(rep_msgp->rep_private_data,
statep->open_return_data->rc_priv_data,
statep->open_return_data->rc_priv_data_len);
statep->open_return_data->rc_rdma_ra_in =
rep_msgp->rep_initiator_depth;
statep->open_return_data->rc_rdma_ra_out =
rep_msgp->rep_resp_resources;
statep->open_return_data->rc_failover_status =
rep_msgp->rep_target_delay_plus >> 1 & 3;
statep->open_return_data->rc_status = reject_reason;
mutex_enter(&statep->state_mutex);
statep->open_done = B_TRUE;
cv_broadcast(&statep->block_client_cv);
} else mutex_enter(&statep->state_mutex);
/* decrement ref count and return for LOOKUP_EXISTS */
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_process_mra_msg:
* Called from ibcm_process_incoming_mad on reception of a MRA message
*
* Cancels existing timer, and sets a new timer based on timeout
* value from MRA message. The remaining retry count of statep is
* not changed, and timer value for the remaining retry timers is
* also not changed
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - CM MAD that is input to this function
* cm_mad_addr - Address information for the MAD
*
* RETURN VALUE: NONE
*/
void
ibcm_process_mra_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ibcm_status_t state_lookup_status;
ibcm_mra_msg_t *mra_msgp =
(ibcm_mra_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]);
ibcm_state_data_t *statep = NULL;
uint8_t mra_msg;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_mra_msg:");
/* Lookup for an existing state structure (as a READER) */
rw_enter(&hcap->hca_state_rwlock, RW_READER);
state_lookup_status = ibcm_lookup_msg(IBCM_INCOMING_MRA,
b2h32(mra_msgp->mra_remote_comm_id), 0, 0, hcap, &statep);
rw_exit(&hcap->hca_state_rwlock);
/* if state doesn't exist just return */
if (state_lookup_status != IBCM_LOOKUP_EXISTS) {
ibcm_build_n_post_rej_mad(input_madp,
b2h32(mra_msgp->mra_local_comm_id), cm_mad_addr,
IBT_CM_FAILURE_UNKNOWN, IBT_CM_INVALID_CID);
return;
}
if (IBCM_OUT_HDRP(statep->stored_msg)->TransactionID !=
((ib_mad_hdr_t *)(input_madp))->TransactionID) {
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
IBTF_DPRINTF_L3(cmlog, "ibcm_process_mra_msg: statep 0x%p "
"MRA MAD with tid expected 0x%llX tid found 0x%llX "
"com id 0x%x arrived", statep,
b2h64(IBCM_OUT_HDRP(statep->stored_msg)->TransactionID),
b2h64(((ib_mad_hdr_t *)(input_madp))->TransactionID),
b2h32(mra_msgp->mra_local_comm_id));
return;
}
ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_MRA);
mutex_enter(&statep->state_mutex);
/*
* Only allow for REQ/REP "mra_msg_typ" ONLY
* (to validate MRA message received)?
*/
mra_msg = mra_msgp->mra_message_type_plus >> 6;
if ((mra_msg != IBT_CM_MRA_TYPE_REQ) &&
(mra_msg != IBT_CM_MRA_TYPE_REP) &&
(mra_msg != IBT_CM_MRA_TYPE_LAP)) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_mra_msg: statep 0x%p "
"Unexpected MRA MSG Type %x", statep, mra_msg);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
if ((statep->state == IBCM_STATE_REQ_SENT) ||
(statep->state == IBCM_STATE_REP_SENT) ||
((statep->state == IBCM_STATE_ESTABLISHED) &&
(statep->ap_state == IBCM_AP_STATE_LAP_SENT))) {
timeout_id_t timer_val = statep->timerid;
clock_t service_timeout;
if (statep->state == IBCM_STATE_REQ_SENT) {
mra_msg = IBT_CM_MRA_TYPE_REQ;
statep->state = IBCM_STATE_REP_WAIT;
} else if (statep->state == IBCM_STATE_REP_SENT) {
mra_msg = IBT_CM_MRA_TYPE_REP;
statep->state = IBCM_STATE_MRA_REP_RCVD;
} else { /* statep->state == IBCM_STATE_LAP_SENT */
mra_msg = IBT_CM_MRA_TYPE_LAP;
statep->ap_state = IBCM_AP_STATE_MRA_LAP_RCVD;
}
/* cancel the timer */
statep->timerid = 0;
mutex_exit(&statep->state_mutex);
(void) untimeout(timer_val);
service_timeout =
ibt_ib2usec(mra_msgp->mra_service_timeout_plus >> 3);
/*
* If tunable MAX MRA Service Timeout parameter is set, then
* verify whether the requested timer value exceeds the MAX
* value and reset the timer value to the MAX value.
*/
if (ibcm_mra_service_timeout_max &&
ibcm_mra_service_timeout_max < service_timeout) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_mra_msg: "
"Unexpected MRA Service Timeout value (%ld), Max "
"allowed is (%ld)", service_timeout,
ibcm_mra_service_timeout_max);
service_timeout = ibcm_mra_service_timeout_max;
}
/*
* Invoke client handler to pass the MRA private data
*/
if (statep->cm_handler != NULL) {
ibt_cm_event_t event;
bzero(&event, sizeof (event));
event.cm_type = IBT_CM_EVENT_MRA_RCV;
event.cm_channel = statep->channel;
event.cm_session_id = NULL;
event.cm_priv_data = mra_msgp->mra_private_data;
event.cm_priv_data_len = IBT_MRA_PRIV_DATA_SZ;
event.cm_event.mra.mra_msg_type = mra_msg;
event.cm_event.mra.mra_service_time = service_timeout;
/* Client cannot return private data */
(void) statep->cm_handler(statep->state_cm_private,
&event, NULL, NULL, 0);
}
/*
* Must re-check state, as an RTU could have come
* after the above mutex_exit and mutex_enter below
*/
mutex_enter(&statep->state_mutex);
if ((statep->state == IBCM_STATE_REP_WAIT) ||
(statep->state == IBCM_STATE_MRA_REP_RCVD) ||
(statep->ap_state == IBCM_AP_STATE_MRA_LAP_RCVD)) {
statep->remaining_retry_cnt = statep->max_cm_retries;
/*
* The timeout interval is changed only for the first
* retry. The later retries use the timeout from
* statep->timer_value
*/
statep->timer_stored_state = statep->state;
statep->timer_value = statep->pkt_life_time +
service_timeout;
statep->timerid = IBCM_TIMEOUT(statep,
statep->timer_value);
}
} else if (statep->state == IBCM_STATE_DELETE) {
mutex_exit(&statep->state_mutex);
ibcm_build_n_post_rej_mad(input_madp,
b2h32(mra_msgp->mra_local_comm_id), cm_mad_addr,
IBT_CM_FAILURE_UNKNOWN, IBT_CM_INVALID_CID);
mutex_enter(&statep->state_mutex);
} else {
#ifdef DEBUG
if (ibcm_test_mode > 0)
IBTF_DPRINTF_L2(cmlog, "ibcm_process_mra_msg: "
"Unexpected mra for statep 0x%p in state %d",
statep, statep->state);
#endif
}
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_process_rtu_msg:
* Called from ibcm_process_incoming_mad on reception of a RTU message
*
* Changes connection state to established if in REP SENT state
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - CM MAD that is input to this function
* cm_mad_addr - Address information for the MAD
*
* RETURN VALUE: NONE
*/
void
ibcm_process_rtu_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
timeout_id_t timer_val;
ibcm_status_t status;
ibcm_rtu_msg_t *rtu_msg =
(ibcm_rtu_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]);
ibcm_state_data_t *statep = NULL;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_rtu_msg:");
/* Lookup for an existing state structure - using a reader lock */
rw_enter(&hcap->hca_state_rwlock, RW_READER);
status = ibcm_lookup_msg(IBCM_INCOMING_RTU,
b2h32(rtu_msg->rtu_remote_comm_id), 0, 0, hcap, &statep);
rw_exit(&hcap->hca_state_rwlock);
/* if state doesn't exist just return */
if (status != IBCM_LOOKUP_EXISTS) {
ibcm_build_n_post_rej_mad(input_madp,
b2h32(rtu_msg->rtu_local_comm_id), cm_mad_addr,
IBT_CM_FAILURE_UNKNOWN, IBT_CM_INVALID_CID);
return;
}
if (IBCM_OUT_HDRP(statep->stored_msg)->TransactionID !=
((ib_mad_hdr_t *)(input_madp))->TransactionID) {
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
IBTF_DPRINTF_L3(cmlog, "ibcm_process_rtu_msg: statep 0x%p "
"An RTU MAD with tid expected 0x%llX tid found 0x%llX "
"com id 0x%x arrived", statep,
b2h64(IBCM_OUT_HDRP(statep->stored_msg)->TransactionID),
b2h64(((ib_mad_hdr_t *)(input_madp))->TransactionID),
b2h32(rtu_msg->rtu_remote_comm_id));
return;
}
ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_RTU);
mutex_enter(&statep->state_mutex);
if ((statep->state == IBCM_STATE_REP_SENT) ||
(statep->state == IBCM_STATE_MRA_REP_RCVD)) {
/* transient until ibt_modify_qp succeeds to RTS */
statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
timer_val = statep->timerid;
statep->timerid = 0;
mutex_exit(&statep->state_mutex);
(void) untimeout(timer_val);
ibcm_cep_state_rtu(statep, rtu_msg);
mutex_enter(&statep->state_mutex);
} else if (statep->state == IBCM_STATE_REJ_SENT) {
ibcm_resend_rej_mad(statep);
} else if (statep->state == IBCM_STATE_DELETE) {
mutex_exit(&statep->state_mutex);
ibcm_build_n_post_rej_mad(input_madp,
b2h32(rtu_msg->rtu_local_comm_id), cm_mad_addr,
IBT_CM_FAILURE_UNKNOWN, IBT_CM_INVALID_CID);
mutex_enter(&statep->state_mutex);
} else {
#ifdef DEBUG
if ((ibcm_test_mode > 0) &&
(statep->state != IBCM_STATE_ESTABLISHED))
IBTF_DPRINTF_L2(cmlog, "ibcm_process_rtu_msg: "
"Unexpected rtu for statep 0x%p in state %d",
statep, statep->state);
#endif
}
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_process_rej_msg:
* Called from ibcm_process_incoming_mad on reception of a REJ message.
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - CM MAD that is input to this function
* cm_mad_addr - Address information for the MAD
*
* RETURN VALUE: NONE
*/
/* ARGSUSED */
void
ibcm_process_rej_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ibcm_status_t state_lookup_status;
ibcm_rej_msg_t *rej_msg =
(ibcm_rej_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]);
ibcm_state_data_t *statep = NULL;
ib_guid_t remote_hca_guid;
ibcm_conn_state_t rej_state;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_rej_msg:");
/* Lookup for an existing state structure */
rw_enter(&hcap->hca_state_rwlock, RW_READER); /* grab READER lock */
if ((b2h32(rej_msg->rej_remote_comm_id) == 0) &&
((rej_msg->rej_reject_info_len_plus >> 1) >= sizeof (ib_guid_t)) &&
(b2h16(rej_msg->rej_rejection_reason) == IBT_CM_TIMEOUT)) {
bcopy(rej_msg->rej_addl_rej_info, &remote_hca_guid,
sizeof (ib_guid_t));
remote_hca_guid = b2h64(remote_hca_guid);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_rej_msg: "
"hca guid in REJ's ARI = %llX", remote_hca_guid);
state_lookup_status = ibcm_lookup_msg(IBCM_INCOMING_REJ_RCOMID,
b2h32(rej_msg->rej_local_comm_id), 0, remote_hca_guid,
hcap, &statep);
} else
state_lookup_status = ibcm_lookup_msg(IBCM_INCOMING_REJ,
b2h32(rej_msg->rej_remote_comm_id), 0, 0, hcap, &statep);
rw_exit(&hcap->hca_state_rwlock);
/* if state doesn't exist just return */
if (state_lookup_status != IBCM_LOOKUP_EXISTS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_rej_msg: no statep with "
"local com id %x remote com id %x reason %d",
b2h32(rej_msg->rej_remote_comm_id),
b2h32(rej_msg->rej_local_comm_id),
b2h16(rej_msg->rej_rejection_reason));
/* Do NOT respond with invalid comid REJ */
return;
}
IBTF_DPRINTF_L2(cmlog, "ibcm_process_rej_msg: statep 0x%p INCOMING_REJ",
statep);
ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_REJ);
if (ibcm_enable_trace & 2)
ibcm_dump_conn_trace(statep);
mutex_enter(&statep->state_mutex);
rej_state = statep->state;
if ((statep->state == IBCM_STATE_REP_SENT) ||
(statep->state == IBCM_STATE_REQ_SENT) ||
(statep->state == IBCM_STATE_REP_WAIT) ||
(statep->state == IBCM_STATE_MRA_REP_RCVD)) {
timeout_id_t timer_val = statep->timerid;
statep->state = IBCM_STATE_DELETE;
/* cancel the REQ/REP timer */
if (timer_val != 0) {
statep->timerid = 0;
mutex_exit(&statep->state_mutex);
(void) untimeout(timer_val);
} else {
mutex_exit(&statep->state_mutex);
}
/*
* Call the QP state transition processing function
* NOTE: Input MAD is the REJ received, there is no output MAD
*/
ibcm_cep_state_rej(statep, rej_msg, rej_state);
/* signal waiting CV - blocking in ibt_open_channel() */
if (statep->open_return_data != NULL) {
statep->open_return_data->rc_status =
b2h16(rej_msg->rej_rejection_reason);
if (statep->open_return_data->rc_priv_data_len > 0)
bcopy(rej_msg->rej_private_data,
statep->open_return_data->rc_priv_data,
min(
statep->open_return_data->rc_priv_data_len,
IBT_REJ_PRIV_DATA_SZ));
mutex_enter(&statep->state_mutex);
statep->open_done = B_TRUE;
cv_broadcast(&statep->block_client_cv);
} else {
mutex_enter(&statep->state_mutex);
}
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
/* Now delete the statep */
ibcm_delete_state_data(statep);
} else if ((statep->state == IBCM_STATE_ESTABLISHED) &&
(statep->mode == IBCM_ACTIVE_MODE)) {
IBTF_DPRINTF_L4(cmlog, "ibcm_process_rej_msg: statep 0x%p "
"REJ in established state", statep);
statep->state = IBCM_STATE_TIMEWAIT;
/* wait for/cancel pending LAP/APR, release state mutex */
ibcm_sync_lapr_idle(statep);
/* wait until client is informed CONN EST event */
mutex_enter(&statep->state_mutex);
while (statep->cep_in_rts == IBCM_BLOCK)
cv_wait(&statep->block_mad_cv, &statep->state_mutex);
mutex_exit(&statep->state_mutex);
/*
* Call the QP state transition processing function
* NOTE: Input MAD is the REJ received, there is no output MAD
*/
ibcm_cep_state_rej_est(statep);
/*
* Start the timewait state timer, as connection is in
* established state
*/
/*
* For passive side CM set it to remote_ack_delay
* For active side CM add the pkt_life_time * 2
*/
mutex_enter(&statep->state_mutex);
statep->timer_value = statep->remote_ack_delay;
/* statep->mode == IBCM_ACTIVE_MODE) */
statep->timer_value += (2 * statep->pkt_life_time);
statep->remaining_retry_cnt = 0;
statep->timer_stored_state = statep->state;
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
} else if (((statep->state == IBCM_STATE_REQ_RCVD) ||
(statep->state == IBCM_STATE_REP_RCVD) ||
(statep->state == IBCM_STATE_MRA_SENT) ||
(statep->state == IBCM_STATE_MRA_REP_SENT)) &&
(b2h16(rej_msg->rej_rejection_reason) == IBT_CM_TIMEOUT)) {
if (statep->abort_flag == IBCM_ABORT_INIT)
statep->abort_flag = IBCM_ABORT_REJ;
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
} else {
#ifdef DEBUG
if ((ibcm_test_mode > 0) &&
(statep->state != IBCM_STATE_DELETE))
IBTF_DPRINTF_L2(cmlog, "ibcm_process_rej_msg: "
"Unexpected rej for statep 0x%p in state %d",
statep, statep->state);
#endif
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
}
/*
* ibcm_process_dreq_msg:
* Processes incoming DREQ message on active/passive side
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - CM MAD that is input to this function
* cm_mad_addr - Address information for the MAD
*
* RETURN VALUE: NONE
*/
/*ARGSUSED*/
void
ibcm_process_dreq_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
void *priv_data = NULL;
ibcm_status_t state_lookup_status;
ib_qpn_t local_qpn;
ibcm_dreq_msg_t *dreq_msgp =
(ibcm_dreq_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]);
ibcm_state_data_t *statep = NULL;
uint8_t close_event_type;
ibt_cm_status_t cb_status;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_dreq_msg:");
/* Lookup for an existing state structure */
rw_enter(&hcap->hca_state_rwlock, RW_READER);
state_lookup_status = ibcm_lookup_msg(IBCM_INCOMING_DREQ,
b2h32(dreq_msgp->dreq_remote_comm_id), 0, 0, hcap, &statep);
rw_exit(&hcap->hca_state_rwlock);
local_qpn = b2h32(dreq_msgp->dreq_remote_qpn_eecn_plus) >> 8;
if (state_lookup_status != IBCM_LOOKUP_EXISTS) {
IBTF_DPRINTF_L3(cmlog, "ibcm_process_dreq_msg: no statep with"
"com id %x", b2h32(dreq_msgp->dreq_remote_comm_id));
/* implies a bogus message */
return;
}
IBTF_DPRINTF_L4(cmlog, "ibcm_process_dreq_msg: statep 0x%p "
"lookup status %x dreq qpn = %x", statep, state_lookup_status,
local_qpn);
/*
* Local QPN check is necessary. There could be a DREQ from
* a remote stale connection processing with the same com id, but
* not intended for this statep
*/
mutex_enter(&statep->state_mutex);
if ((statep->local_qpn != local_qpn) ||
(statep->remote_comid != b2h32(dreq_msgp->dreq_local_comm_id))) {
IBTF_DPRINTF_L3(cmlog, "ibcm_process_dreq_msg:"
"statep->local_qpn = %x qpn in dreq = %x"
"statep->remote_comid = %x local comid in dreq = %x",
statep->local_qpn, local_qpn, statep->remote_comid,
b2h32(dreq_msgp->dreq_local_comm_id));
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
/*
* If another thread is processing a copy of this same DREQ,
* bail out here.
*/
if (statep->state == IBCM_STATE_TRANSIENT_DREQ_SENT ||
statep->drep_in_progress) {
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
switch (statep->state) {
case IBCM_STATE_ESTABLISHED:
case IBCM_STATE_DREQ_SENT:
case IBCM_STATE_TIMEWAIT:
break;
default:
/* All other states ignore DREQ */
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
statep->drep_in_progress = 1;
/*
* If drep msg wasn't really required, it shall be deleted finally
* when statep goes away
*/
if (statep->drep_msg == NULL) {
mutex_exit(&statep->state_mutex);
if (ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl,
&statep->drep_msg, MAD_METHOD_SEND) != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_dreq_msg: "
"statep 0x%p ibcm_alloc_out_msg failed", statep);
mutex_enter(&statep->state_mutex);
statep->drep_in_progress = 0;
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
mutex_enter(&statep->state_mutex);
}
if (statep->state == IBCM_STATE_TRANSIENT_DREQ_SENT) {
IBCM_REF_CNT_DECR(statep);
statep->drep_in_progress = 0;
mutex_exit(&statep->state_mutex);
return;
}
/*
* Need to generate drep, as time wait can be reached either by an
* outgoing dreq or an incoming dreq
*/
if ((statep->state == IBCM_STATE_ESTABLISHED) ||
(statep->state == IBCM_STATE_DREQ_SENT)) {
timeout_id_t timer_val = statep->timerid;
if (statep->state == IBCM_STATE_DREQ_SENT) {
statep->state = IBCM_STATE_DREQ_RCVD;
statep->timerid = 0;
ibcm_close_done(statep, 0);
mutex_exit(&statep->state_mutex);
close_event_type = IBT_CM_CLOSED_DUP;
if (timer_val != 0) {
/* Cancel the timer set for DREP reception */
(void) untimeout(timer_val);
}
} else { /* In ESTABLISHED State */
statep->state = IBCM_STATE_DREQ_RCVD;
statep->clnt_proceed = IBCM_BLOCK;
/* Cancel or wait for LAP/APR to complete */
ibcm_sync_lapr_idle(statep);
/* The above function releases the state mutex */
/* wait until client knows CONN EST event */
mutex_enter(&statep->state_mutex);
while (statep->cep_in_rts == IBCM_BLOCK)
cv_wait(&statep->block_mad_cv,
&statep->state_mutex);
mutex_exit(&statep->state_mutex);
close_event_type = IBT_CM_CLOSED_DREQ_RCVD;
/* Move CEP to error state */
(void) ibcm_cep_to_error_state(statep);
}
mutex_enter(&statep->state_mutex);
statep->drep_in_progress = 0;
IBCM_OUT_HDRP(statep->drep_msg)->TransactionID =
((ib_mad_hdr_t *)(input_madp))->TransactionID;
priv_data = &(((ibcm_drep_msg_t *)
IBCM_OUT_MSGP(statep->drep_msg))->drep_private_data[0]);
if (statep->close_ret_status)
*statep->close_ret_status = close_event_type;
if (statep->close_nocb_state != IBCM_FAIL) {
ibtl_cm_chan_is_closing(statep->channel);
statep->close_nocb_state = IBCM_BLOCK;
}
mutex_exit(&statep->state_mutex);
/*
* if close_nocb_state is IBCM_FAIL, then cm_handler is NULL
* if close_nocb_state is IBCM_BLOCK, client cannot go away
*/
if (statep->cm_handler != NULL) {
ibt_cm_event_t event;
ibt_cm_return_args_t ret_args;
bzero(&event, sizeof (event));
bzero(&ret_args, sizeof (ret_args));
event.cm_type = IBT_CM_EVENT_CONN_CLOSED;
event.cm_channel = statep->channel;
event.cm_session_id = statep;
event.cm_priv_data = dreq_msgp->dreq_private_data;
event.cm_priv_data_len = IBT_DREQ_PRIV_DATA_SZ;
event.cm_event.closed = close_event_type;
ibcm_insert_trace(statep,
IBCM_TRACE_CALLED_CONN_CLOSE_EVENT);
cb_status = statep->cm_handler(statep->state_cm_private,
&event, &ret_args, priv_data,
IBT_DREP_PRIV_DATA_SZ);
ibcm_insert_trace(statep,
IBCM_TRACE_RET_CONN_CLOSE_EVENT);
if (cb_status == IBT_CM_DEFER) {
mutex_enter(&statep->state_mutex);
statep->clnt_proceed =
IBCM_UNBLOCK;
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_dreq_msg:"
" statep 0x%p client returned DEFER "
"response", statep);
return;
}
}
/* fail/resume any blocked cm api call */
mutex_enter(&statep->state_mutex);
/* Signal for cm proceed api */
statep->clnt_proceed = IBCM_FAIL;
/* Signal for close with no callbacks */
statep->close_nocb_state = IBCM_FAIL;
/* Signal any waiting close channel thread */
statep->close_done = B_TRUE;
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
ibcm_handle_cep_dreq_response(statep, NULL, 0);
} else if (statep->state == IBCM_STATE_TIMEWAIT) {
statep->drep_in_progress = 0;
if (statep->send_mad_flags & IBCM_DREP_POST_BUSY) {
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
statep->send_mad_flags |= IBCM_DREP_POST_BUSY;
/* Release statep mutex before posting the MAD */
mutex_exit(&statep->state_mutex);
IBCM_OUT_HDRP(statep->drep_msg)->TransactionID =
((ib_mad_hdr_t *)(input_madp))->TransactionID;
ibcm_post_drep_mad(statep);
/* ref cnt decremented in ibcm_post_drep_complete */
} else {
#ifdef DEBUG
if ((ibcm_test_mode > 0) &&
(statep->state != IBCM_STATE_DELETE))
IBTF_DPRINTF_L2(cmlog, "ibcm_process_dreq_msg: "
"Unexpected dreq for statep 0x%p in state %d",
statep, statep->state);
#endif
IBCM_REF_CNT_DECR(statep);
statep->drep_in_progress = 0;
mutex_exit(&statep->state_mutex);
}
}
/*
* ibcm_handle_cep_dreq_response:
* Processes the response from client handler for an incoming DREQ.
* The statep ref cnt is decremented before returning.
*/
void
ibcm_handle_cep_dreq_response(ibcm_state_data_t *statep, void *priv_data,
ibt_priv_data_len_t priv_data_len)
{
if ((priv_data != NULL) && (priv_data_len > 0))
bcopy(priv_data,
&(((ibcm_drep_msg_t *)
IBCM_OUT_MSGP(statep->drep_msg))->drep_private_data[0]),
min(priv_data_len, IBT_DREP_PRIV_DATA_SZ));
ibcm_post_drep_mad(statep);
}
/*
* ibcm_post_dreq_mad:
* Posts a DREQ MAD
* Post DREQ now for TIMEWAIT state and DREQ_RCVD
*
* INPUTS:
* statep - state pointer
*
* RETURN VALUE:
* NONE
*/
void
ibcm_post_dreq_mad(void *vstatep)
{
ibcm_state_data_t *statep = vstatep;
ibcm_dreq_msg_t *dreq_msgp;
ASSERT(statep->dreq_msg != NULL);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dreq_msgp))
/* Fill in the DREQ message */
dreq_msgp = (ibcm_dreq_msg_t *)IBCM_OUT_MSGP(statep->dreq_msg);
dreq_msgp->dreq_local_comm_id = h2b32(statep->local_comid);
dreq_msgp->dreq_remote_comm_id = h2b32(statep->remote_comid);
dreq_msgp->dreq_remote_qpn_eecn_plus = h2b32(statep->remote_qpn << 8);
IBCM_OUT_HDRP(statep->dreq_msg)->AttributeID =
h2b16(IBCM_INCOMING_DREQ + IBCM_ATTR_BASE_ID);
/* wait until client knows CONN EST event */
mutex_enter(&statep->state_mutex);
while (statep->cep_in_rts == IBCM_BLOCK)
cv_wait(&statep->block_mad_cv, &statep->state_mutex);
mutex_exit(&statep->state_mutex);
/* Transition QP/EEC state to ERROR state */
(void) ibcm_cep_to_error_state(statep);
IBCM_OUT_HDRP(statep->dreq_msg)->TransactionID =
h2b64(ibcm_generate_tranid(IBCM_INCOMING_DREQ, statep->local_comid,
0));
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*dreq_msgp))
/* post the first DREQ via timeout callback */
mutex_enter(&statep->state_mutex);
statep->state = IBCM_STATE_DREQ_SENT;
cv_broadcast(&statep->block_mad_cv);
statep->timer_stored_state = statep->state;
/* client cannot specify more than 16 retries */
statep->timer_value = statep->remote_ack_delay;
if (statep->mode == IBCM_ACTIVE_MODE) {
statep->timer_value += (2 * statep->pkt_life_time);
}
statep->remaining_retry_cnt = statep->max_cm_retries + 1;
statep->timerid = IBCM_TIMEOUT(statep, 0);
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_post_drep_mad:
* Posts a DREP MAD
* Post DREP now for TIMEWAIT state and DREQ_RCVD
*
* INPUTS:
* statep - state pointer
*
* RETURN VALUE:
* NONE
*/
static void
ibcm_post_drep_mad(ibcm_state_data_t *statep)
{
ibcm_drep_msg_t *drep_msgp;
drep_msgp = (ibcm_drep_msg_t *)IBCM_OUT_MSGP(statep->drep_msg);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*drep_msgp))
IBTF_DPRINTF_L4(cmlog, "ibcm_post_drep_mad:");
/* Fill up DREP fields */
drep_msgp->drep_local_comm_id = h2b32(statep->local_comid);
drep_msgp->drep_remote_comm_id = h2b32(statep->remote_comid);
IBCM_OUT_HDRP(statep->drep_msg)->AttributeID =
h2b16(IBCM_INCOMING_DREP + IBCM_ATTR_BASE_ID);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*drep_msgp))
ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_DREP);
/* Post the DREP MAD now. */
ibcm_post_rc_mad(statep, statep->drep_msg, ibcm_post_drep_complete,
statep);
}
/*
* ibcm_process_drep_msg:
* Processes incoming DREP message on active/passive side
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - CM MAD that is input to this function
* cm_mad_addr - Address information for the MAD
*
* RETURN VALUE: NONE
*/
/* ARGSUSED */
void
ibcm_process_drep_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ibcm_status_t state_lookup_status;
ibcm_drep_msg_t *drep_msgp =
(ibcm_drep_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]);
ibcm_state_data_t *statep = NULL;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_drep_msg:");
/* Lookup for an existing state structure */
rw_enter(&hcap->hca_state_rwlock, RW_READER);
state_lookup_status = ibcm_lookup_msg(IBCM_INCOMING_DREP,
b2h32(drep_msgp->drep_remote_comm_id), 0, 0, hcap, &statep);
rw_exit(&hcap->hca_state_rwlock);
if (state_lookup_status != IBCM_LOOKUP_EXISTS) {
IBTF_DPRINTF_L3(cmlog, "ibcm_process_drep_msg: no statep with"
"com id %x", b2h32(drep_msgp->drep_remote_comm_id));
return;
}
/* if transaction id is not as expected, drop the DREP mad */
if (IBCM_OUT_HDRP(statep->dreq_msg)->TransactionID !=
((ib_mad_hdr_t *)(input_madp))->TransactionID) {
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
IBTF_DPRINTF_L3(cmlog, "ibcm_process_drep_msg: statep 0x%p "
"DREP with tid expected 0x%llX tid found 0x%llX", statep,
b2h64(IBCM_OUT_HDRP(statep->dreq_msg)->TransactionID),
b2h64(((ib_mad_hdr_t *)(input_madp))->TransactionID));
return;
}
ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_DREP);
mutex_enter(&statep->state_mutex);
if (statep->state == IBCM_STATE_DREQ_SENT) {
timeout_id_t timer_val = statep->timerid;
statep->state = IBCM_STATE_DREP_RCVD;
statep->timerid = 0;
mutex_exit(&statep->state_mutex);
(void) untimeout(timer_val);
if (statep->stale == B_TRUE)
IBTF_DPRINTF_L2(cmlog, "ibcm_process_drep_msg: "
"statep 0x%p Unexpected DREP received for a stale "
"DREQ sent", statep);
mutex_enter(&statep->state_mutex);
/* allow free qp, if close channel with NOCALLBACKS didn't */
if (statep->close_nocb_state != IBCM_FAIL) {
ibtl_cm_chan_is_closing(statep->channel);
statep->close_nocb_state = IBCM_BLOCK;
}
mutex_exit(&statep->state_mutex);
/* if close_nocb_state is IBCM_FAIL, then cm_handler is NULL */
if (statep->cm_handler != NULL) {
ibt_cm_event_t event;
ibt_cm_return_args_t ret_args;
bzero(&event, sizeof (event));
bzero(&ret_args, sizeof (ret_args));
event.cm_type = IBT_CM_EVENT_CONN_CLOSED;
event.cm_channel = statep->channel;
event.cm_session_id = NULL;
if (statep->stale == B_TRUE) {
event.cm_event.closed = IBT_CM_CLOSED_STALE;
event.cm_priv_data = NULL;
event.cm_priv_data_len = 0;
} else {
event.cm_event.closed = IBT_CM_CLOSED_DREP_RCVD;
event.cm_priv_data =
drep_msgp->drep_private_data;
event.cm_priv_data_len = IBT_DREP_PRIV_DATA_SZ;
}
ibcm_insert_trace(statep,
IBCM_TRACE_CALLED_CONN_CLOSE_EVENT);
(void) statep->cm_handler(statep->state_cm_private,
&event, &ret_args, NULL, 0);
ibcm_insert_trace(statep,
IBCM_TRACE_RET_CONN_CLOSE_EVENT);
}
/* copy the private to close channel, if specified */
if ((statep->close_ret_priv_data != NULL) &&
(statep->close_ret_priv_data_len != NULL) &&
(*statep->close_ret_priv_data_len > 0)) {
bcopy(drep_msgp->drep_private_data,
statep->close_ret_priv_data,
min(*statep->close_ret_priv_data_len,
IBT_DREP_PRIV_DATA_SZ));
}
mutex_enter(&statep->state_mutex);
if (statep->close_ret_status)
*statep->close_ret_status = IBT_CM_CLOSED_DREP_RCVD;
/* signal waiting CV - blocking in ibt_close_channel() */
statep->close_done = B_TRUE;
/* signal any blocked close channels with no callbacks */
statep->close_nocb_state = IBCM_FAIL;
cv_broadcast(&statep->block_client_cv);
/* Set the timer wait state timer */
statep->state = statep->timer_stored_state =
IBCM_STATE_TIMEWAIT;
ibcm_close_done(statep, 0);
statep->remaining_retry_cnt = 0;
/*
* For passive side CM set it to remote_ack_delay
* For active side CM add the pkt_life_time * 2
*/
statep->timer_value = statep->remote_ack_delay;
if (statep->mode == IBCM_ACTIVE_MODE) {
statep->timer_value += (2 * statep->pkt_life_time);
}
/* start TIMEWAIT processing */
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
}
/* There is no processing required for other states */
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*
* Following are the routines used to resend various CM MADs as a response to
* incoming MADs
*/
void
ibcm_resend_rtu_mad(ibcm_state_data_t *statep)
{
ASSERT(MUTEX_HELD(&statep->state_mutex));
IBTF_DPRINTF_L3(cmlog, "ibcm_resend_rtu_mad statep %p ", statep);
/* don't care, if timer is running or not. Timer may be from LAP */
if (!(statep->send_mad_flags & IBCM_RTU_POST_BUSY)) {
statep->send_mad_flags |= IBCM_RTU_POST_BUSY;
IBCM_REF_CNT_INCR(statep); /* for non-blocking RTU post */
mutex_exit(&statep->state_mutex);
ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_RTU);
ibcm_post_rc_mad(statep, statep->stored_msg,
ibcm_post_rtu_complete, statep);
mutex_enter(&statep->state_mutex);
}
/* ref cnt is decremented in ibcm_post_rtu_complete */
}
void
ibcm_resend_rej_mad(ibcm_state_data_t *statep)
{
timeout_id_t timer_val = statep->timerid;
ASSERT(MUTEX_HELD(&statep->state_mutex));
IBTF_DPRINTF_L3(cmlog, "ibcm_resend_rej_mad statep %p ", statep);
/* It's a too fast of a REQ or REP */
if (timer_val == 0)
return;
statep->timerid = 0;
if (!(statep->send_mad_flags & IBCM_REJ_POST_BUSY)) {
statep->send_mad_flags |= IBCM_REJ_POST_BUSY;
IBCM_REF_CNT_INCR(statep); /* for nonblocking REJ post */
mutex_exit(&statep->state_mutex);
(void) untimeout(timer_val);
ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REJ);
if (ibcm_enable_trace & 2)
ibcm_dump_conn_trace(statep);
else
IBTF_DPRINTF_L2(cmlog, "ibcm_resend_rej_mad statep %p "
"OUTGOING_REJ", statep);
ibcm_post_rc_mad(statep, statep->stored_msg,
ibcm_post_rej_complete, statep);
mutex_enter(&statep->state_mutex);
}
/* return, holding the state mutex */
}
void
ibcm_resend_rep_mad(ibcm_state_data_t *statep)
{
timeout_id_t timer_val = statep->timerid;
ASSERT(MUTEX_HELD(&statep->state_mutex));
IBTF_DPRINTF_L3(cmlog, "ibcm_resend_rep_mad statep %p ", statep);
/* REP timer that is set by ibcm_post_rep_mad */
if (timer_val != 0) {
/* Re-start REP timeout */
statep->remaining_retry_cnt = statep->max_cm_retries;
if (!(statep->send_mad_flags & IBCM_REP_POST_BUSY)) {
statep->send_mad_flags |= IBCM_REP_POST_BUSY;
/* for nonblocking REP post */
IBCM_REF_CNT_INCR(statep);
mutex_exit(&statep->state_mutex);
ibcm_insert_trace(statep, IBCM_TRACE_OUT_REP_RETRY);
ibcm_post_rc_mad(statep, statep->stored_msg,
ibcm_resend_post_rep_complete, statep);
mutex_enter(&statep->state_mutex);
}
}
/*
* else, timer is not yet set by ibcm_post_rep_mad. This is too fast
* of a REQ being re-transmitted.
*/
}
void
ibcm_resend_mra_mad(ibcm_state_data_t *statep)
{
ASSERT(MUTEX_HELD(&statep->state_mutex));
IBTF_DPRINTF_L3(cmlog, "ibcm_resend_mra_mad statep %p ", statep);
if (statep->send_mad_flags & IBCM_MRA_POST_BUSY)
return;
statep->send_mad_flags |= IBCM_MRA_POST_BUSY;
statep->mra_time = gethrtime();
IBCM_REF_CNT_INCR(statep); /* for non-blocking MRA post */
/* Exit the statep mutex, before sending the MAD */
mutex_exit(&statep->state_mutex);
ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_MRA);
/* Always resend the response MAD to the original reply destination */
ibcm_post_rc_mad(statep, statep->mra_msg, ibcm_post_mra_complete,
statep);
mutex_enter(&statep->state_mutex);
/* return, holding the state mutex */
}
/*
* ibcm_post_rej_mad:
* Posts a REJ MAD and starts timer
*
* INPUTS:
* statep - state pointer
* which_msg - which message is being MRAed
* reject_reason - Rejection reason See Section 12.6.7.2 rev1.0a IB Spec
* addl_rej_info - Additional rej Information
* arej_info_len - Additional rej Info length
*
* RETURN VALUE:
* NONE
* Notes
* There is no need to hold the statep->mutex and call ibcm_post_rej_mad
* REJ can be posted either in IBCM_STATE_REQ_RCVD or IBCM_STATE_REP_RCVD
* In these states, there is no timer active, and an incoming REJ shall
* not modify the state or cancel timers
* An incoming REJ doesn't affect statep in state = IBCM_STATE_REJ_SENT/BUSY
*/
void
ibcm_post_rej_mad(ibcm_state_data_t *statep, ibt_cm_reason_t reject_reason,
int which_msg, void *addl_rej_info, ibt_priv_data_len_t arej_info_len)
{
ibcm_rej_msg_t *rej_msg =
(ibcm_rej_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
/* Message printed if connection gets REJed */
IBTF_DPRINTF_L3(cmlog, "ibcm_post_rej_mad: "
"statep = %p, reject_reason = %d", statep, reject_reason);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rej_msg))
/* Initialize rej_msg fields */
rej_msg->rej_local_comm_id = h2b32(statep->local_comid);
rej_msg->rej_remote_comm_id = h2b32(statep->remote_comid);
rej_msg->rej_msg_type_plus = (which_msg & 0x3) << 6;
rej_msg->rej_reject_info_len_plus = arej_info_len << 1;
rej_msg->rej_rejection_reason = h2b16((uint16_t)reject_reason);
if ((arej_info_len != 0) && (addl_rej_info != NULL))
bcopy(addl_rej_info, rej_msg->rej_addl_rej_info, arej_info_len);
IBCM_OUT_HDRP(statep->stored_msg)->AttributeID =
h2b16(IBCM_INCOMING_REJ + IBCM_ATTR_BASE_ID);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rej_msg))
mutex_enter(&statep->state_mutex);
/* signal any waiting close channels with blocking or no callbacks */
statep->close_done = B_TRUE;
statep->close_nocb_state = IBCM_FAIL;
cv_signal(&statep->block_client_cv);
statep->timer_stored_state = statep->state = IBCM_STATE_REJ_SENT;
statep->send_mad_flags |= IBCM_REJ_POST_BUSY;
IBCM_REF_CNT_INCR(statep); /* for non-blocking post */
mutex_exit(&statep->state_mutex);
ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REJ);
if (ibcm_enable_trace & 2)
ibcm_dump_conn_trace(statep);
else
IBTF_DPRINTF_L2(cmlog, "ibcm_post_rej_mad statep %p "
"OUTGOING_REJ", statep);
ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_rej_complete,
statep);
}
/*
* ibcm_build_n_post_rej_mad:
* Builds and posts a REJ MAD for "reject_reason"
* Doesn't set a timer, and doesn't need statep
*
* INPUTS:
* input_madp - Incoming MAD
* remote_comid - Local comid in the message being rejected
* cm_mad_addr - Address information for the MAD to be posted
* which_msg - REJ message type ie., REJ for REQ/REP
*
* RETURN VALUE:
* NONE
*/
static void
ibcm_build_n_post_rej_mad(uint8_t *input_madp, ib_com_id_t remote_comid,
ibcm_mad_addr_t *cm_mad_addr, int which_msg, uint16_t reject_reason)
{
ibcm_rej_msg_t *rej_msg;
ibmf_msg_t *cm_rej_msg;
ibcm_mad_addr_t rej_reply_addr;
IBTF_DPRINTF_L3(cmlog, "ibcm_build_n_post_rej_mad: "
"remote_comid: %x reject_reason %d", remote_comid, reject_reason);
if (ibcm_alloc_out_msg(cm_mad_addr->ibmf_hdl, &cm_rej_msg,
MAD_METHOD_SEND) != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_build_n_post_rej_mad: "
"ibcm_alloc_out_msg failed");
return;
}
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rej_msg))
IBCM_OUT_HDRP(cm_rej_msg)->TransactionID =
((ib_mad_hdr_t *)(input_madp))->TransactionID;
/* Initialize rej_msg fields */
rej_msg = (ibcm_rej_msg_t *)IBCM_OUT_MSGP(cm_rej_msg);
rej_msg->rej_local_comm_id = 0;
rej_msg->rej_remote_comm_id = h2b32(remote_comid);
rej_msg->rej_msg_type_plus = (which_msg & 0x3) << 6;
rej_msg->rej_reject_info_len_plus = 0;
rej_msg->rej_rejection_reason = h2b16(reject_reason);
IBCM_OUT_HDRP(cm_rej_msg)->AttributeID =
h2b16(IBCM_INCOMING_REJ + IBCM_ATTR_BASE_ID);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rej_msg))
ibcm_build_reply_mad_addr(cm_mad_addr, &rej_reply_addr);
if (rej_reply_addr.cm_qp_entry != NULL) {
(void) ibcm_post_mad(cm_rej_msg, &rej_reply_addr, NULL, NULL);
ibcm_release_qp(rej_reply_addr.cm_qp_entry);
}
(void) ibcm_free_out_msg(cm_mad_addr->ibmf_hdl, &cm_rej_msg);
}
/* posts a REJ for an incoming REQ with unsupported class version */
static void
ibcm_post_rej_ver_mismatch(uint8_t *input_madp, ibcm_mad_addr_t *cm_mad_addr)
{
ibcm_req_msg_t *req_msgp =
(ibcm_req_msg_t *)&input_madp[IBCM_MAD_HDR_SIZE];
ibcm_rej_msg_t *rej_msg;
ibmf_msg_t *cm_rej_msg;
ibcm_mad_addr_t rej_reply_addr;
IBTF_DPRINTF_L3(cmlog, "ibcm_post_rej_ver_mismatch: remote comid %x",
b2h32(req_msgp->req_local_comm_id));
if (ibcm_alloc_out_msg(cm_mad_addr->ibmf_hdl, &cm_rej_msg,
MAD_METHOD_SEND) != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_post_rej_ver_mismatch: "
"ibcm_alloc_out_msg failed");
return;
}
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rej_msg))
IBCM_OUT_HDRP(cm_rej_msg)->TransactionID =
((ib_mad_hdr_t *)(input_madp))->TransactionID;
/* Initialize rej_msg fields */
rej_msg = (ibcm_rej_msg_t *)IBCM_OUT_MSGP(cm_rej_msg);
rej_msg->rej_local_comm_id = 0;
rej_msg->rej_remote_comm_id = req_msgp->req_local_comm_id;
rej_msg->rej_msg_type_plus = IBT_CM_FAILURE_REQ << 6;
rej_msg->rej_rejection_reason = h2b16(IBT_CM_CLASS_NO_SUPPORT);
rej_msg->rej_reject_info_len_plus = 1 << 1;
rej_msg->rej_addl_rej_info[0] = IBCM_MAD_CLASS_VERSION;
IBCM_OUT_HDRP(cm_rej_msg)->AttributeID =
h2b16(IBCM_INCOMING_REJ + IBCM_ATTR_BASE_ID);
IBCM_OUT_HDRP(cm_rej_msg)->Status = h2b16(MAD_STATUS_BAD_VERSION);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rej_msg))
ibcm_build_reply_mad_addr(cm_mad_addr, &rej_reply_addr);
if (rej_reply_addr.cm_qp_entry != NULL) {
(void) ibcm_post_mad(cm_rej_msg, &rej_reply_addr, NULL, NULL);
ibcm_release_qp(rej_reply_addr.cm_qp_entry);
}
(void) ibcm_free_out_msg(cm_mad_addr->ibmf_hdl, &cm_rej_msg);
}
/*
* ibcm_post_rep_mad:
* Posts a REP MAD and starts timer
*
* INPUTS:
* statep - state pointer
*
* RETURN VALUE:
* NONE
*/
void
ibcm_post_rep_mad(ibcm_state_data_t *statep)
{
ibcm_rep_msg_t *rep_msgp =
(ibcm_rep_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
ibmf_msg_t *mra_msg = NULL;
boolean_t ret = B_FALSE;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_rep_mad: statep 0x%p", statep);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rep_msgp))
/*
* All other REP fields, other that the 2 below, are filled in
* the ibcm_cep_state_req() function.
*/
rep_msgp->rep_local_comm_id = h2b32(statep->local_comid);
rep_msgp->rep_remote_comm_id = h2b32(statep->remote_comid);
IBCM_OUT_HDRP(statep->stored_msg)->AttributeID =
h2b16(IBCM_INCOMING_REP + IBCM_ATTR_BASE_ID);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rep_msgp))
/*
* Changing state and attempt to delete the mra msg must be done
* together holding the state_mutex
*/
mutex_enter(&statep->state_mutex);
/* Now, attempt to delete the mra_msg, if there is one allocated */
if (statep->mra_msg != NULL) {
if (!(statep->send_mad_flags & IBCM_MRA_POST_BUSY)) {
mra_msg = statep->mra_msg;
statep->mra_msg = NULL;
} else statep->delete_mra_msg = B_TRUE;
}
if (statep->abort_flag == IBCM_ABORT_CLIENT) {
statep->state = IBCM_STATE_ABORTED;
mutex_exit(&statep->state_mutex);
ibcm_process_abort(statep);
/* Now post a REJ MAD, rej reason consumer abort */
ibcm_post_rej_mad(statep, IBT_CM_CONSUMER, IBT_CM_FAILURE_REQ,
NULL, 0);
ret = B_TRUE;
} else if (statep->abort_flag & IBCM_ABORT_REJ) {
statep->state = IBCM_STATE_DELETE;
mutex_exit(&statep->state_mutex);
ibcm_process_abort(statep);
ibcm_delete_state_data(statep);
ret = B_TRUE;
} else {
statep->state = statep->timer_stored_state =
IBCM_STATE_REP_SENT;
statep->remaining_retry_cnt = statep->max_cm_retries;
statep->send_mad_flags |= IBCM_REP_POST_BUSY;
IBCM_REF_CNT_INCR(statep); /* for nonblocking REP Post */
mutex_exit(&statep->state_mutex);
}
if (mra_msg != NULL)
(void) ibcm_free_out_msg(statep->stored_reply_addr.ibmf_hdl,
&mra_msg);
if (ret == B_TRUE)
return;
ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REP);
ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_rep_complete,
statep);
}
/*
* ibcm_post_rtu_mad:
* From active side post RTU MAD
*
* INPUTS:
* statep - state pointer
*
* RETURN VALUE: NONE
*
* NOTE: No timer set after posting RTU
*/
ibcm_status_t
ibcm_post_rtu_mad(ibcm_state_data_t *statep)
{
ibcm_rtu_msg_t *rtu_msg;
ibmf_msg_t *mra_msg = NULL;
boolean_t ret = B_FALSE;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_rtu_mad: statep 0x%p", statep);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rtu_msg))
rtu_msg = (ibcm_rtu_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
rtu_msg->rtu_local_comm_id = h2b32(statep->local_comid);
rtu_msg->rtu_remote_comm_id = h2b32(statep->remote_comid);
IBCM_OUT_HDRP(statep->stored_msg)->AttributeID =
h2b16(IBCM_INCOMING_RTU + IBCM_ATTR_BASE_ID);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rtu_msg))
mutex_enter(&statep->state_mutex);
/* Now, attempt to delete the mra_msg, if there is one allocated */
if (statep->mra_msg != NULL) {
if (!(statep->send_mad_flags & IBCM_MRA_POST_BUSY)) {
mra_msg = statep->mra_msg;
statep->mra_msg = NULL;
} else statep->delete_mra_msg = B_TRUE;
}
if (statep->abort_flag == IBCM_ABORT_CLIENT) {
statep->state = IBCM_STATE_ABORTED;
mutex_exit(&statep->state_mutex);
ibcm_process_abort(statep);
/* Now post a REJ MAD */
ibcm_post_rej_mad(statep, IBT_CM_CONSUMER, IBT_CM_FAILURE_REP,
NULL, 0);
ret = B_TRUE;
} else if (statep->abort_flag & IBCM_ABORT_REJ) {
statep->state = IBCM_STATE_DELETE;
mutex_exit(&statep->state_mutex);
ibcm_process_abort(statep);
ibcm_delete_state_data(statep);
ret = B_TRUE;
} else {
statep->state = IBCM_STATE_ESTABLISHED;
ibtl_cm_chan_is_open(statep->channel);
statep->send_mad_flags |= IBCM_RTU_POST_BUSY;
IBCM_REF_CNT_INCR(statep); /* for nonblocking RTU post */
mutex_exit(&statep->state_mutex);
}
if (mra_msg != NULL)
(void) ibcm_free_out_msg(statep->stored_reply_addr.ibmf_hdl,
&mra_msg);
if (ret == B_TRUE) /* Abort case, no RTU posted */
return (IBCM_FAILURE);
ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_RTU);
ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_rtu_complete,
statep);
return (IBCM_SUCCESS);
}
/*
* ibcm_process_abort:
* Processes abort, if client requested abort connection attempt
*
* INPUTS:
* statep - pointer to ibcm_state_data_t is passed
*
* RETURN VALUES: None
*/
void
ibcm_process_abort(ibcm_state_data_t *statep)
{
IBTF_DPRINTF_L3(cmlog, "ibcm_process_abort: statep 0x%p", statep);
/* move CEP to error state, before calling client handler */
(void) ibcm_cep_to_error_state(statep);
/* Now disassociate the link between statep and qp */
IBCM_SET_CHAN_PRIVATE(statep->channel, NULL);
/* invoke cm handler, for non-blocking open/close rc channel calls */
if (statep->cm_handler) { /* cannot be NULL, but still .. */
ibt_cm_event_t event;
ibt_cm_return_args_t ret_args;
bzero(&event, sizeof (event));
bzero(&ret_args, sizeof (ret_args));
if (statep->abort_flag & IBCM_ABORT_REJ)
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_RCV,
IBT_CM_FAILURE_UNKNOWN, IBT_CM_TIMEOUT, NULL, 0);
else {
ibcm_path_cache_purge();
event.cm_type = IBT_CM_EVENT_CONN_CLOSED;
event.cm_channel = statep->channel;
event.cm_event.closed = IBT_CM_CLOSED_ABORT;
ibcm_insert_trace(statep,
IBCM_TRACE_CALLED_CONN_CLOSE_EVENT);
(void) statep->cm_handler(statep->state_cm_private,
&event, &ret_args, NULL, 0);
ibcm_insert_trace(statep,
IBCM_TRACE_RET_CONN_CLOSE_EVENT);
mutex_enter(&statep->state_mutex);
ibcm_open_done(statep);
mutex_exit(&statep->state_mutex);
}
}
/*
* Unblock an ibt_open_rc_channel called in a blocking mode, though
* it is an unlikely scenario
*/
mutex_enter(&statep->state_mutex);
statep->cm_retries++; /* cause connection trace to be printed */
statep->open_done = B_TRUE;
statep->close_done = B_TRUE;
statep->close_nocb_state = IBCM_FAIL; /* sanity sake */
if (statep->open_return_data != NULL) {
/* REJ came first, and then client aborted connection */
if (statep->abort_flag & IBCM_ABORT_REJ)
statep->open_return_data->rc_status = IBT_CM_TIMEOUT;
else statep->open_return_data->rc_status = IBT_CM_ABORT;
}
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
if (ibcm_enable_trace != 0)
ibcm_dump_conn_trace(statep);
}
/*
* ibcm_timeout_cb:
* Called when the timer expires
*
* INPUTS:
* arg - ibcm_state_data_t is passed
*
* RETURN VALUES: NONE
*/
void
ibcm_timeout_cb(void *arg)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)arg;
mutex_enter(&statep->state_mutex);
/*
* The blocking operations are handled in a separate thread.
* All other non-blocking operations, including ibmf non-blocking
* posts are done from timeout context
*/
if ((statep->timer_stored_state != statep->state) ||
((statep->timer_stored_state == IBCM_STATE_ESTABLISHED) &&
(statep->ap_state != statep->timer_stored_ap_state))) {
mutex_exit(&statep->state_mutex);
return;
}
IBTF_DPRINTF_L3(cmlog, "ibcm_timeout_cb: statep 0x%p state %x "
"ap_state %x", statep, statep->state, statep->ap_state);
/* Processing depends upon current state */
if (statep->state == IBCM_STATE_REJ_SENT) {
statep->state = IBCM_STATE_DELETE;
mutex_exit(&statep->state_mutex);
/* Deallocate the CM state structure */
ibcm_delete_state_data(statep);
return;
} else if (statep->state == IBCM_STATE_TIMEWAIT) {
statep->state = IBCM_STATE_DELETE;
/* TIME_WAIT timer expired, so cleanup */
mutex_exit(&statep->state_mutex);
if (statep->channel)
ibtl_cm_chan_is_closed(statep->channel);
if (statep->recycle_arg) {
struct ibcm_taskq_recycle_arg_s *recycle_arg;
recycle_arg = statep->recycle_arg;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(
statep->recycle_arg))
statep->recycle_arg = NULL;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
/* if possible, do not slow down calling recycle func */
if (taskq_dispatch(ibcm_taskq, ibcm_process_rc_recycle,
recycle_arg, TQ_NOQUEUE | TQ_NOSLEEP) == 0) {
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(
statep->recycle_arg))
statep->recycle_arg = recycle_arg;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(
statep->recycle_arg))
ibcm_add_tlist(statep);
return;
}
}
ibcm_delete_state_data(statep);
return;
} else if (statep->remaining_retry_cnt > 0) {
ibcm_conn_state_t stored_state;
ibcm_ap_state_t stored_ap_state;
statep->remaining_retry_cnt--;
IBTF_DPRINTF_L3(cmlog, "ibcm_timeout_cb: statep 0x%p "
"attr-id= 0x%x, retries remaining = 0x%x", statep,
b2h16(IBCM_OUT_HDRP(statep->stored_msg)->AttributeID),
statep->remaining_retry_cnt);
/*
* REP could be resent, either because of timeout or an
* incoming REQ. Any other MAD below can be resent, because
* of timeout only, hence send_mad_flag manipulation not
* required for those cases.
* If REP is already being retransmitted, then just set the
* timer and return. Else post REP in non-blocking mode
*/
if (statep->timer_stored_state == IBCM_STATE_REP_SENT) {
if (statep->send_mad_flags & IBCM_REP_POST_BUSY) {
statep->timerid = IBCM_TIMEOUT(statep,
statep->timer_value);
mutex_exit(&statep->state_mutex);
ibcm_insert_trace(statep,
IBCM_TRACE_TIMEOUT_REP);
return;
}
/*
* Set REP busy flag, so any incoming REQ's will not
* initiate new REP transmissions
*/
statep->send_mad_flags |= IBCM_REP_POST_BUSY;
/* Since REQ/RTU/REJ on active side use same MAD, synchronize */
} else if (statep->timer_stored_state == IBCM_STATE_REQ_SENT) {
ASSERT((statep->send_mad_flags & IBCM_REQ_POST_BUSY)
== 0);
statep->send_mad_flags |= IBCM_REQ_POST_BUSY;
}
IBCM_REF_CNT_INCR(statep); /* for non-blocking post */
stored_state = statep->timer_stored_state;
stored_ap_state = statep->timer_stored_ap_state;
mutex_exit(&statep->state_mutex);
/* Post REQ MAD in non-blocking mode */
if (stored_state == IBCM_STATE_REQ_SENT) {
ibcm_insert_trace(statep, IBCM_TRACE_OUT_REQ_RETRY);
ibcm_post_rc_mad(statep, statep->stored_msg,
ibcm_post_req_complete, statep);
/* Post REQ MAD in non-blocking mode */
} else if (stored_state == IBCM_STATE_REP_WAIT) {
ibcm_insert_trace(statep, IBCM_TRACE_OUT_REQ_RETRY);
ibcm_post_rc_mad(statep, statep->stored_msg,
ibcm_post_rep_wait_complete, statep);
/* Post REP MAD in non-blocking mode */
} else if (stored_state == IBCM_STATE_REP_SENT) {
ibcm_insert_trace(statep, IBCM_TRACE_OUT_REP_RETRY);
ibcm_post_rc_mad(statep, statep->stored_msg,
ibcm_post_rep_complete, statep);
/* Post REP MAD in non-blocking mode */
} else if (stored_state == IBCM_STATE_MRA_REP_RCVD) {
ibcm_insert_trace(statep, IBCM_TRACE_OUT_REP_RETRY);
mutex_enter(&statep->state_mutex);
statep->mra_time = gethrtime();
mutex_exit(&statep->state_mutex);
ibcm_post_rc_mad(statep, statep->stored_msg,
ibcm_post_mra_rep_complete, statep);
/* Post DREQ MAD in non-blocking mode */
} else if (stored_state == IBCM_STATE_DREQ_SENT) {
mutex_enter(&statep->state_mutex);
if (statep->remaining_retry_cnt ==
statep->max_cm_retries)
ibcm_insert_trace(statep,
IBCM_TRACE_OUTGOING_DREQ);
else {
ibcm_insert_trace(statep,
IBCM_TRACE_OUT_DREQ_RETRY);
statep->cm_retries++;
ibcm_close_done(statep, 0);
}
mutex_exit(&statep->state_mutex);
ibcm_post_rc_mad(statep, statep->dreq_msg,
ibcm_post_dreq_complete, statep);
/* post LAP MAD in non-blocking mode */
} else if (stored_ap_state == IBCM_AP_STATE_LAP_SENT) {
ibcm_insert_trace(statep, IBCM_TRACE_OUT_LAP_RETRY);
ibcm_post_rc_mad(statep, statep->lapr_msg,
ibcm_post_lap_complete, statep);
/* post LAP MAD in non-blocking mode */
} else if (stored_ap_state == IBCM_AP_STATE_MRA_LAP_RCVD) {
ibcm_insert_trace(statep, IBCM_TRACE_OUT_LAP_RETRY);
mutex_enter(&statep->state_mutex);
statep->mra_time = gethrtime();
mutex_exit(&statep->state_mutex);
ibcm_post_rc_mad(statep, statep->lapr_msg,
ibcm_post_mra_lap_complete, statep);
}
return;
} else if ((statep->state == IBCM_STATE_REQ_SENT) ||
(statep->state == IBCM_STATE_REP_SENT) ||
(statep->state == IBCM_STATE_MRA_REP_RCVD) ||
(statep->state == IBCM_STATE_REP_WAIT)) {
/*
* MAX retries reached, send a REJ to the remote,
* and close the connection
*/
statep->timedout_state = statep->state;
statep->state = IBCM_STATE_TIMED_OUT;
IBTF_DPRINTF_L3(cmlog, "ibcm_timeout_cb: "
"max retries done for statep 0x%p", statep);
statep->cm_retries++; /* cause conn trace to print */
mutex_exit(&statep->state_mutex);
if ((statep->timedout_state == IBCM_STATE_REP_SENT) ||
(statep->timedout_state == IBCM_STATE_MRA_REP_RCVD))
(void) ibcm_cep_to_error_state(statep);
/* Disassociate statep from QP */
IBCM_SET_CHAN_PRIVATE(statep->channel, NULL);
/*
* statep is in REJ SENT state, the only way to get deleted is
* the timeout callback that is set after posting REJ
* The thread processing is required where cm handler is
* specified
*/
if (statep->cm_handler != NULL) {
/* Attach the statep to timeout list */
ibcm_add_tlist(statep);
} else {
ib_guid_t local_hca_guid;
mutex_enter(&statep->state_mutex);
/*
* statep->open_return_data is set for blocking
* No handler specified, hence signal blocked
* ibt_open_rc_channel from here
*/
if (statep->open_return_data != NULL) {
statep->open_return_data->rc_status =
IBT_CM_TIMEOUT;
statep->open_done = B_TRUE;
cv_broadcast(&statep->block_client_cv);
}
mutex_exit(&statep->state_mutex);
local_hca_guid = h2b64(statep->local_hca_guid);
ibcm_post_rej_mad(statep, IBT_CM_TIMEOUT,
(statep->timedout_state == IBCM_STATE_REP_SENT ||
statep->timedout_state == IBCM_STATE_MRA_REP_RCVD) ?
IBT_CM_FAILURE_REP: IBT_CM_FAILURE_REQ,
&local_hca_guid, sizeof (ib_guid_t));
}
} else if ((statep->ap_state == IBCM_AP_STATE_LAP_SENT) ||
(statep->ap_state == IBCM_AP_STATE_MRA_LAP_RCVD)) {
IBTF_DPRINTF_L4(cmlog, "ibcm_timeout_cb: statep 0x%p "
"LAP timed out", statep);
statep->timedout_state = statep->state;
/*
* This state setting ensures that the processing of DREQ is
* sequentialized, once this ap_state is set. If statep is
* attached to timeout list, it cannot be re-attached as long
* as in this state
*/
statep->ap_state = IBCM_AP_STATE_TIMED_OUT;
ibcm_open_done(statep);
if (statep->cm_handler != NULL) {
/* Attach statep to timeout list - thread handling */
ibcm_add_tlist(statep);
} else if (statep->ap_return_data != NULL) {
/*
* statep->ap_return_data is initialized for blocking in
* ibt_set_alt_path(), signal the waiting CV
*/
statep->ap_return_data->ap_status = IBT_CM_AP_TIMEOUT;
statep->ap_done = B_TRUE;
cv_broadcast(&statep->block_client_cv);
statep->ap_state = IBCM_AP_STATE_IDLE;
/* Wake up threads waiting for LAP/APR to complete */
cv_broadcast(&statep->block_mad_cv);
}
mutex_exit(&statep->state_mutex);
} else if (statep->state == IBCM_STATE_DREQ_SENT) {
statep->timedout_state = statep->state;
statep->state = IBCM_STATE_TIMED_OUT;
/*
* The logic below is necessary, for a race situation between
* ibt_close_rc_channel with no callbacks option and CM's
* internal stale connection handling on the same connection
*/
if (statep->close_nocb_state != IBCM_FAIL) {
ASSERT(statep->close_nocb_state == IBCM_UNBLOCK);
ibtl_cm_chan_is_closing(statep->channel);
statep->close_nocb_state = IBCM_BLOCK;
}
mutex_exit(&statep->state_mutex);
/*
* If cm handler is specified, then invoke handler for
* the DREQ timeout
*/
if (statep->cm_handler != NULL) {
ibcm_add_tlist(statep);
return;
}
ibcm_process_dreq_timeout(statep);
} else {
#ifdef DEBUG
if (ibcm_test_mode > 0)
IBTF_DPRINTF_L2(cmlog, "ibcm_timeout_cb: "
"Unexpected unhandled timeout for statep 0x%p "
"state %d", statep, statep->state);
#endif
mutex_exit(&statep->state_mutex);
}
}
/*
* Following are set of ibmf send callback routines that are used when posting
* various CM MADs in non-blocking post mode
*/
/*ARGSUSED*/
void
ibcm_post_req_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_req_complete statep %p ", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->post_time, "REQ");
ibcm_insert_trace(statep, IBCM_TRACE_REQ_POST_COMPLETE);
statep->send_mad_flags &= ~IBCM_REQ_POST_BUSY;
/* signal any waiting threads for REQ MAD to become available */
cv_signal(&statep->block_mad_cv);
if (statep->state == IBCM_STATE_REQ_SENT)
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_rep_wait_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_rep_wait_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->post_time, "REQ_RETRY");
ibcm_insert_trace(statep, IBCM_TRACE_REQ_POST_COMPLETE);
if (statep->state == IBCM_STATE_REP_WAIT)
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_rep_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_rep_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->post_time, "REP");
ibcm_insert_trace(statep, IBCM_TRACE_REP_POST_COMPLETE);
statep->send_mad_flags &= ~IBCM_REP_POST_BUSY;
if (statep->state == IBCM_STATE_REP_SENT)
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_resend_post_rep_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_resend_post_rep_complete(%p)", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->post_time, "REP_RETRY");
ibcm_insert_trace(statep, IBCM_TRACE_REP_POST_COMPLETE);
statep->send_mad_flags &= ~IBCM_REP_POST_BUSY;
/* No new timeout is set for resending a REP MAD for an incoming REQ */
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_mra_rep_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_mra_rep_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->mra_time, "MRA_REP");
ibcm_insert_trace(statep, IBCM_TRACE_REP_POST_COMPLETE);
if (statep->state == IBCM_STATE_MRA_REP_RCVD)
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_mra_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_mra_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->mra_time, "MRA");
ibcm_insert_trace(statep, IBCM_TRACE_MRA_POST_COMPLETE);
if (statep->delete_mra_msg == B_TRUE) {
ibmf_msg_t *mra_msg;
mra_msg = statep->mra_msg;
statep->mra_msg = NULL;
mutex_exit(&statep->state_mutex);
(void) ibcm_free_out_msg(statep->stored_reply_addr.ibmf_hdl,
&mra_msg);
mutex_enter(&statep->state_mutex);
}
statep->send_mad_flags &= ~IBCM_MRA_POST_BUSY;
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_dreq_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_dreq_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->post_time, "DREQ");
ibcm_insert_trace(statep, IBCM_TRACE_DREQ_POST_COMPLETE);
if (statep->state == IBCM_STATE_DREQ_SENT)
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
ibcm_close_done(statep, 1);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_lap_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_lap_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->post_time, "LAP");
ibcm_insert_trace(statep, IBCM_TRACE_LAP_POST_COMPLETE);
if (statep->ap_state == IBCM_AP_STATE_LAP_SENT)
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_mra_lap_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_mra_lap_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->mra_time, "MRA_LAP");
ibcm_insert_trace(statep, IBCM_TRACE_LAP_POST_COMPLETE);
if (statep->ap_state == IBCM_AP_STATE_MRA_LAP_RCVD)
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_rej_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_rej_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->post_time, "REJ");
ibcm_insert_trace(statep, IBCM_TRACE_REJ_POST_COMPLETE);
statep->send_mad_flags &= ~IBCM_REJ_POST_BUSY;
if (statep->state == IBCM_STATE_REJ_SENT) {
statep->remaining_retry_cnt = 0;
/* wait until all possible retransmits of REQ/REP happened */
statep->timerid = IBCM_TIMEOUT(statep,
statep->timer_value * statep->max_cm_retries);
}
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_rtu_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_rtu_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->post_time, "RTU");
ibcm_insert_trace(statep, IBCM_TRACE_RTU_POST_COMPLETE);
statep->send_mad_flags &= ~IBCM_RTU_POST_BUSY;
IBCM_REF_CNT_DECR(statep);
ibcm_open_done(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_apr_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_apr_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->post_time, "APR");
ibcm_insert_trace(statep, IBCM_TRACE_APR_POST_COMPLETE);
/* As long as one APR mad in transit, no retransmits are allowed */
statep->ap_state = IBCM_AP_STATE_IDLE;
/* unblock any DREQ threads and close channels */
cv_broadcast(&statep->block_mad_cv);
IBCM_REF_CNT_DECR(statep); /* decrement the ref count */
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_stored_apr_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibmf_msg_t *ibmf_apr_msg = (ibmf_msg_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_stored_apr_complete args %p", args);
ibcm_flow_dec(0, "APR_RESEND");
(void) ibcm_free_out_msg(ibmf_handle, &ibmf_apr_msg);
}
/*ARGSUSED*/
void
ibcm_post_drep_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_state_data_t *statep = (ibcm_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_drep_complete statep %p", statep);
mutex_enter(&statep->state_mutex);
ibcm_flow_dec(statep->post_time, "DREP");
ibcm_insert_trace(statep, IBCM_TRACE_DREP_POST_COMPLETE);
statep->send_mad_flags &= ~IBCM_REJ_POST_BUSY;
if (statep->state == IBCM_STATE_DREQ_RCVD) {
ibcm_close_done(statep, 1);
statep->state = IBCM_STATE_TIMEWAIT;
/*
* For passive side CM set it to remote_ack_delay
* For active side CM add the pkt_life_time * 2
*/
statep->timer_value = statep->remote_ack_delay;
if (statep->mode == IBCM_ACTIVE_MODE)
statep->timer_value += (2 * statep->pkt_life_time);
statep->remaining_retry_cnt = 0;
statep->timer_stored_state = statep->state;
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
}
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_sidr_rep_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_ud_state_data_t *ud_statep = (ibcm_ud_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_sidr_rep_complete ud_statep %p",
ud_statep);
ibcm_flow_dec(0, "SIDR_REP");
mutex_enter(&ud_statep->ud_state_mutex);
ud_statep->ud_send_mad_flags &= ~IBCM_SREP_POST_BUSY;
ud_statep->ud_remaining_retry_cnt = 0;
if (ud_statep->ud_state == IBCM_STATE_SIDR_REP_SENT)
ud_statep->ud_timerid = IBCM_UD_TIMEOUT(ud_statep,
ud_statep->ud_timer_value);
IBCM_UD_REF_CNT_DECR(ud_statep);
mutex_exit(&ud_statep->ud_state_mutex);
}
/*ARGSUSED*/
void
ibcm_post_sidr_req_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
void *args)
{
ibcm_ud_state_data_t *ud_statep = (ibcm_ud_state_data_t *)args;
IBTF_DPRINTF_L4(cmlog, "ibcm_post_sidr_req_complete ud_statep %p",
ud_statep);
ibcm_flow_dec(0, "SIDR_REQ");
mutex_enter(&ud_statep->ud_state_mutex);
if (ud_statep->ud_state == IBCM_STATE_SIDR_REQ_SENT)
ud_statep->ud_timerid = IBCM_UD_TIMEOUT(ud_statep,
ud_statep->ud_timer_value);
IBCM_UD_REF_CNT_DECR(ud_statep);
mutex_exit(&ud_statep->ud_state_mutex);
}
/*
* ibcm_process_dreq_timeout:
* Called when the timer expires on DREP
*
* INPUTS:
* arg - ibcm_state_data_t is passed
*
* RETURN VALUES: NONE
*/
void
ibcm_process_dreq_timeout(ibcm_state_data_t *statep)
{
mutex_enter(&statep->state_mutex);
/* Max retries reached, move to the time wait state */
statep->state = statep->timer_stored_state =
IBCM_STATE_TIMEWAIT;
ibcm_close_done(statep, 0);
/* Set the TIME_WAIT state timer value */
statep->timer_value = statep->remote_ack_delay;
if (statep->mode == IBCM_ACTIVE_MODE) {
statep->timer_value += (2 * statep->pkt_life_time);
}
statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value);
if (statep->close_ret_status)
if (statep->stale == B_TRUE)
*statep->close_ret_status = IBT_CM_CLOSED_STALE;
else *statep->close_ret_status = IBT_CM_CLOSED_DREQ_TIMEOUT;
/* signal waiting CVs - blocking in ibt_close_channel() */
statep->close_done = B_TRUE;
if (statep->close_ret_priv_data_len != NULL)
*statep->close_ret_priv_data_len = 0;
/* unblock any close channel with no callbacks option */
statep->close_nocb_state = IBCM_FAIL;
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_add_tlist:
* Adds the given RC statep to timeout list
*
* INPUTS:
* arg - ibcm_state_data_t is passed
*
* RETURN VALUES: NONE
*/
void
ibcm_add_tlist(ibcm_state_data_t *statep)
{
mutex_enter(&ibcm_timeout_list_lock);
statep->timeout_next = NULL;
if (ibcm_timeout_list_hdr == NULL) {
ibcm_timeout_list_hdr = statep;
} else {
ibcm_timeout_list_tail->timeout_next = statep;
}
ibcm_timeout_list_tail = statep;
cv_signal(&ibcm_timeout_list_cv);
mutex_exit(&ibcm_timeout_list_lock);
IBTF_DPRINTF_L3(cmlog, "ibcm_add_tlist: "
"attached state = %p to timeout list", statep);
}
void
ibcm_run_tlist_thread(void)
{
mutex_enter(&ibcm_timeout_list_lock);
cv_signal(&ibcm_timeout_list_cv);
mutex_exit(&ibcm_timeout_list_lock);
}
/*
* ibcm_add_ud_tlist:
* Adds the given UD statep to timeout list
*
* INPUTS:
* arg - ibcm_ud_state_data_t is passed
*
* RETURN VALUES: NONE
*/
void
ibcm_add_ud_tlist(ibcm_ud_state_data_t *ud_statep)
{
mutex_enter(&ibcm_timeout_list_lock);
ud_statep->ud_timeout_next = NULL;
if (ibcm_ud_timeout_list_hdr == NULL) {
ibcm_ud_timeout_list_hdr = ud_statep;
} else {
ibcm_ud_timeout_list_tail->ud_timeout_next = ud_statep;
}
ibcm_ud_timeout_list_tail = ud_statep;
cv_signal(&ibcm_timeout_list_cv);
mutex_exit(&ibcm_timeout_list_lock);
IBTF_DPRINTF_L3(cmlog, "ibcm_add_ud_tlist: "
"attached state = %p to ud timeout list", ud_statep);
}
/*
* ibcm_process_tlist:
* Thread that processes all the RC and UD statep's from
* the appropriate lists
*
* INPUTS:
* NONE
*
* RETURN VALUES: NONE
*/
void
ibcm_process_tlist()
{
ibcm_state_data_t *statep;
ibcm_ud_state_data_t *ud_statep;
callb_cpr_t cprinfo;
IBTF_DPRINTF_L5(cmlog, "ibcm_process_tlist: thread started");
mutex_enter(&ibcm_timeout_list_lock);
CALLB_CPR_INIT(&cprinfo, &ibcm_timeout_list_lock, callb_generic_cpr,
"ibcm_process_tlist");
for (;;) {
if (ibcm_timeout_list_flags & IBCM_TIMEOUT_THREAD_EXIT) {
/* The thread needs to exit */
cv_signal(&ibcm_timeout_thread_done_cv);
break;
}
mutex_exit(&ibcm_timeout_list_lock);
ibcm_check_for_opens();
ibcm_check_for_async_close();
mutex_enter(&ibcm_timeout_list_lock);
/* First, handle pending RC statep's, followed by UD's */
if (ibcm_timeout_list_hdr != NULL) {
statep = ibcm_timeout_list_hdr;
ibcm_timeout_list_hdr = statep->timeout_next;
if (ibcm_timeout_list_hdr == NULL)
ibcm_timeout_list_tail = NULL;
statep->timeout_next = NULL;
mutex_exit(&ibcm_timeout_list_lock);
IBTF_DPRINTF_L3(cmlog, "ibcm_process_tlist: "
"scheduling state = %p", statep);
ibcm_timeout_client_cb(statep);
mutex_enter(&ibcm_timeout_list_lock);
} else if (ibcm_ud_timeout_list_hdr != NULL) {
ud_statep = ibcm_ud_timeout_list_hdr;
ibcm_ud_timeout_list_hdr = ud_statep->ud_timeout_next;
if (ibcm_ud_timeout_list_hdr == NULL)
ibcm_ud_timeout_list_tail = NULL;
ud_statep->ud_timeout_next = NULL;
mutex_exit(&ibcm_timeout_list_lock);
IBTF_DPRINTF_L3(cmlog, "ibcm_process_tlist: "
"ud scheduling state = %p", ud_statep);
ibcm_ud_timeout_client_cb(ud_statep);
mutex_enter(&ibcm_timeout_list_lock);
} else {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
cv_wait(&ibcm_timeout_list_cv, &ibcm_timeout_list_lock);
CALLB_CPR_SAFE_END(&cprinfo, &ibcm_timeout_list_lock);
}
}
#ifndef __lock_lint
CALLB_CPR_EXIT(&cprinfo); /* mutex_exit */
#endif
}
/*
* ibcm_timeout_client_cb:
* Called from timeout thread processing
* Primary purpose is to call client handler
*
* INPUTS:
* arg - ibcm_state_data_t is passed
*
* RETURN VALUES: NONE
*/
void
ibcm_timeout_client_cb(ibcm_state_data_t *statep)
{
mutex_enter(&statep->state_mutex);
if ((statep->state == IBCM_STATE_DELETE) &&
(statep->recycle_arg != NULL)) {
struct ibcm_taskq_recycle_arg_s *recycle_arg;
recycle_arg = statep->recycle_arg;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
statep->recycle_arg = NULL;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
mutex_exit(&statep->state_mutex);
(void) ibcm_process_rc_recycle(recycle_arg);
ibcm_delete_state_data(statep);
return;
}
if ((statep->state == IBCM_STATE_DELETE) &&
(statep->delete_state_data == B_TRUE)) {
mutex_exit(&statep->state_mutex);
ibcm_dealloc_state_data(statep);
return;
}
/* Else, it must be in TIMEOUT state, do the necessary processing */
if (statep->state == IBCM_STATE_TIMED_OUT) {
void *data;
uint8_t cf_msg;
ib_guid_t local_hca_guid;
mutex_exit(&statep->state_mutex);
if (statep->timedout_state == IBCM_STATE_DREQ_SENT) {
ibt_cm_event_t event;
ibt_cm_return_args_t ret_args;
bzero(&event, sizeof (event));
bzero(&ret_args, sizeof (ret_args));
event.cm_type = IBT_CM_EVENT_CONN_CLOSED;
event.cm_channel = statep->channel;
event.cm_session_id = NULL;
event.cm_priv_data = NULL;
event.cm_priv_data_len = 0;
if (statep->stale == B_TRUE)
event.cm_event.closed = IBT_CM_CLOSED_STALE;
else event.cm_event.closed = IBT_CM_CLOSED_DREQ_TIMEOUT;
/*
* cm handler cannot be non-NULL, as that check is
* already made in ibcm_timeout_cb
*/
ibcm_insert_trace(statep,
IBCM_TRACE_CALLED_CONN_CLOSE_EVENT);
(void) statep->cm_handler(statep->state_cm_private,
&event, &ret_args, NULL, 0);
ibcm_insert_trace(statep,
IBCM_TRACE_RET_CONN_CLOSE_EVENT);
ibcm_process_dreq_timeout(statep);
return;
}
data = ((ibcm_rej_msg_t *)
IBCM_OUT_MSGP(statep->stored_msg))->rej_private_data;
if ((statep->timedout_state == IBCM_STATE_REQ_SENT) ||
(statep->timedout_state == IBCM_STATE_REP_WAIT)) {
cf_msg = IBT_CM_FAILURE_REQ;
} else {
ASSERT(
(statep->timedout_state == IBCM_STATE_REP_SENT) ||
(statep->timedout_state ==
IBCM_STATE_MRA_REP_RCVD));
cf_msg = IBT_CM_FAILURE_REP;
}
/*
* Invoke the CM handler w/ event IBT_CM_EVENT_TIMEOUT
* This callback happens for only active non blocking or
* passive client
*/
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_TIMEOUT,
cf_msg, IBT_CM_TIMEOUT, data, IBT_REJ_PRIV_DATA_SZ);
/* signal the blocked ibt_open_rc_channel */
mutex_enter(&statep->state_mutex);
/*
* statep->open_return_data is set for blocking
* signal the blocked ibt_open_rc_channel
*/
if (statep->open_return_data != NULL) {
statep->open_return_data->rc_status = IBT_CM_TIMEOUT;
statep->open_done = B_TRUE;
cv_broadcast(&statep->block_client_cv);
}
mutex_exit(&statep->state_mutex);
local_hca_guid = h2b64(statep->local_hca_guid);
ibcm_post_rej_mad(statep, IBT_CM_TIMEOUT,
IBT_CM_FAILURE_UNKNOWN, &local_hca_guid,
sizeof (ib_guid_t));
} else if (statep->ap_state == IBCM_AP_STATE_TIMED_OUT) {
mutex_exit(&statep->state_mutex);
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_TIMEOUT,
IBT_CM_FAILURE_LAP, IBT_CM_TIMEOUT, NULL, 0);
/* Now wake up threads waiting for LAP/APR to complete */
mutex_enter(&statep->state_mutex);
/*
* statep->ap_return_data is initialized for blocking in
* ibt_set_alt_path(), signal the waiting CV
*/
if (statep->ap_return_data != NULL) {
statep->ap_return_data->ap_status = IBT_CM_AP_TIMEOUT;
statep->ap_done = B_TRUE;
cv_broadcast(&statep->block_client_cv);
}
statep->ap_state = IBCM_AP_STATE_IDLE;
cv_broadcast(&statep->block_mad_cv);
mutex_exit(&statep->state_mutex);
} else {
IBTF_DPRINTF_L2(cmlog, "ibcm_timeout_client_cb "
"Unexpected else path statep %p state %d ap_state %d",
statep, statep->state, statep->ap_state);
mutex_exit(&statep->state_mutex);
}
}
/*
* ibcm_ud_timeout_client_cb:
* Called from UD timeout thread processing
* Primary purpose is to call client handler
*
* INPUTS:
* arg - ibcm_ud_state_data_t is passed
*
* RETURN VALUES: NONE
*/
void
ibcm_ud_timeout_client_cb(ibcm_ud_state_data_t *ud_statep)
{
ibt_cm_ud_event_t ud_event;
mutex_enter(&ud_statep->ud_state_mutex);
if ((ud_statep->ud_state == IBCM_STATE_DELETE) &&
(ud_statep->ud_delete_state_data == B_TRUE)) {
mutex_exit(&ud_statep->ud_state_mutex);
ibcm_dealloc_ud_state_data(ud_statep);
return;
} else
mutex_exit(&ud_statep->ud_state_mutex);
/* Fill in ibt_cm_ud_event_t */
ud_event.cm_type = IBT_CM_UD_EVENT_SIDR_REP;
ud_event.cm_session_id = NULL;
ud_event.cm_event.sidr_rep.srep_status = IBT_CM_SREP_TIMEOUT;
(void) ud_statep->ud_cm_handler(ud_statep->ud_state_cm_private,
&ud_event, NULL, NULL, 0);
/* Delete UD state data now, finally done with it */
ibcm_delete_ud_state_data(ud_statep);
}
/*
* ibcm_process_sidr_req_msg:
* This call processes an incoming SIDR REQ
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - Incoming CM SIDR REQ MAD
* cm_mad_addr - Address information for the MAD to be posted
*
* RETURN VALUE:
* NONE
*/
void
ibcm_process_sidr_req_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ib_gid_t gid;
ib_lid_t lid;
uint32_t req_id;
ibcm_status_t state_lookup_status;
ibcm_status_t cm_status;
ibt_sidr_status_t sidr_status;
ibcm_svc_info_t *svc_infop;
ibcm_svc_bind_t *svc_bindp;
ibcm_svc_bind_t *tmp_bindp;
ibcm_sidr_req_msg_t *sidr_reqp = (ibcm_sidr_req_msg_t *)
(&input_madp[IBCM_MAD_HDR_SIZE]);
ibcm_ud_state_data_t *ud_statep = NULL;
ibcm_sidr_srch_t srch_sidr;
ib_pkey_t pkey;
uint8_t port_num;
ib_guid_t hca_guid;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_sidr_req_msg:");
hca_guid = hcap->hca_guid;
port_num = cm_mad_addr->port_num;
/* Figure out LID, GID, RequestId for svc_id lookup */
lid = cm_mad_addr->rcvd_addr.ia_remote_lid;
req_id = b2h32(sidr_reqp->sidr_req_request_id);
pkey = b2h16(sidr_reqp->sidr_req_pkey);
if (cm_mad_addr->grh_exists == B_TRUE)
gid = cm_mad_addr->grh_hdr.ig_sender_gid;
else
gid.gid_prefix = gid.gid_guid = 0;
/*
* Lookup for an existing state structure
* - if lookup fails it creates a new ud_state struct
* No need to hold a lock across the call to ibcm_find_sidr_entry() as
* the list lock is held in that function to find the matching entry.
*/
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(srch_sidr))
srch_sidr.srch_lid = lid;
srch_sidr.srch_gid = gid;
srch_sidr.srch_grh_exists = cm_mad_addr->grh_exists;
srch_sidr.srch_req_id = req_id;
srch_sidr.srch_mode = IBCM_PASSIVE_MODE;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(srch_sidr))
rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
state_lookup_status = ibcm_find_sidr_entry(&srch_sidr, hcap, &ud_statep,
IBCM_FLAG_LOOKUP_AND_ADD);
rw_exit(&hcap->hca_sidr_list_lock);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_sidr_req_msg: ud_statep 0x%p "
"lookup status %x", ud_statep, state_lookup_status);
if (state_lookup_status == IBCM_LOOKUP_NEW) {
/* Increment hca's resource count */
ibcm_inc_hca_res_cnt(hcap);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ud_statep))
/*
* Allocate CM MAD for a response
* This MAD is deallocated on state structure delete
* and re-used for all outgoing MADs for this connection.
* If MAD allocation fails, delete the ud statep
*/
if (ibcm_alloc_out_msg(cm_mad_addr->ibmf_hdl,
&ud_statep->ud_stored_msg, MAD_METHOD_SEND) !=
IBT_SUCCESS) {
mutex_enter(&ud_statep->ud_state_mutex);
IBCM_UD_REF_CNT_DECR(ud_statep);
mutex_exit(&ud_statep->ud_state_mutex);
ibcm_dec_hca_res_cnt(hcap);
ibcm_delete_ud_state_data(ud_statep);
return;
}
/* Lookup for service */
ud_statep->ud_svc_id = b2h64(sidr_reqp->sidr_req_service_id);
ud_statep->ud_state = IBCM_STATE_SIDR_REQ_RCVD;
ud_statep->ud_clnt_proceed = IBCM_BLOCK;
ud_statep->ud_hcap = hcap;
mutex_enter(&ibcm_svc_info_lock);
svc_infop = ibcm_find_svc_entry(ud_statep->ud_svc_id);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_sidr_req_msg: "
" ud_statep 0x%p svc_info %p", ud_statep, svc_infop);
/*
* No need to hold the ud state mutex, as no other thread
* modifies ud statep in IBCM_STATE_SIDR_REQ_RCVD state
*/
if (svc_infop != NULL) {
/* find the "bind" entry that enables this port */
svc_bindp = NULL;
tmp_bindp = svc_infop->svc_bind_list;
while (tmp_bindp) {
if (tmp_bindp->sbind_hcaguid == hca_guid &&
tmp_bindp->sbind_port == port_num) {
if (gid.gid_guid ==
tmp_bindp->sbind_gid.gid_guid &&
gid.gid_prefix ==
tmp_bindp->sbind_gid.gid_prefix) {
/* a really good match */
svc_bindp = tmp_bindp;
if (pkey ==
tmp_bindp->sbind_pkey)
/* absolute best */
break;
} else if (svc_bindp == NULL) {
/* port match => a good match */
svc_bindp = tmp_bindp;
}
}
tmp_bindp = tmp_bindp->sbind_link;
}
if (svc_bindp == NULL) {
svc_infop = NULL;
}
}
IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->TransactionID =
((ib_mad_hdr_t *)(input_madp))->TransactionID;
ibcm_build_reply_mad_addr(cm_mad_addr,
&ud_statep->ud_stored_reply_addr);
if (ud_statep->ud_stored_reply_addr.cm_qp_entry == NULL) {
mutex_exit(&ibcm_svc_info_lock);
/* Not much choice. CM MADs cannot go on QP1 */
mutex_enter(&ud_statep->ud_state_mutex);
IBCM_UD_REF_CNT_DECR(ud_statep);
ud_statep->ud_state = IBCM_STATE_DELETE;
mutex_exit(&ud_statep->ud_state_mutex);
ibcm_delete_ud_state_data(ud_statep);
return;
}
if (svc_infop == NULL || svc_infop->svc_ud_handler == NULL) {
/*
* Don't have a record of Service ID in CM's
* internal list registered at this gid/lid.
* So, send out Service ID not supported SIDR REP msg
*/
sidr_status = IBT_CM_SREP_SID_INVALID;
} else {
ud_statep->ud_cm_handler = svc_infop->svc_ud_handler;
ud_statep->ud_state_cm_private =
svc_bindp->sbind_cm_private;
IBCM_SVC_INCR(svc_infop);
mutex_exit(&ibcm_svc_info_lock);
/* Call Client's UD handler */
cm_status = ibcm_sidr_req_ud_handler(ud_statep,
sidr_reqp, cm_mad_addr, &sidr_status);
mutex_enter(&ibcm_svc_info_lock);
IBCM_SVC_DECR(svc_infop);
}
mutex_exit(&ibcm_svc_info_lock);
if (cm_status == IBCM_DEFER) {
IBTF_DPRINTF_L4(cmlog, "ibcm_process_sidr_req_msg: "
"ud_statep 0x%p client returned DEFER response",
ud_statep);
return;
}
ibcm_post_sidr_rep_mad(ud_statep, sidr_status);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ud_statep))
mutex_enter(&ud_statep->ud_state_mutex);
IBCM_UD_REF_CNT_DECR(ud_statep);
mutex_exit(&ud_statep->ud_state_mutex);
} else {
ASSERT(state_lookup_status == IBCM_LOOKUP_EXISTS);
mutex_enter(&ud_statep->ud_state_mutex);
if (ud_statep->ud_state == IBCM_STATE_SIDR_REP_SENT)
ibcm_resend_srep_mad(ud_statep);
IBCM_UD_REF_CNT_DECR(ud_statep);
mutex_exit(&ud_statep->ud_state_mutex);
}
}
/*
* ibcm_process_sidr_rep_msg:
* This call processes an incoming SIDR REP
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - incoming CM SIDR REP MAD
* cm_mad_addr - Address information for the MAD to be posted
*
* RETURN VALUE:
* NONE
*/
void
ibcm_process_sidr_rep_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ib_lid_t lid;
ib_gid_t gid;
ibcm_status_t status;
ib_svc_id_t tmp_svc_id;
ibcm_sidr_rep_msg_t *sidr_repp = (ibcm_sidr_rep_msg_t *)
(&input_madp[IBCM_MAD_HDR_SIZE]);
ibcm_ud_state_data_t *ud_statep = NULL;
ibcm_sidr_srch_t srch_sidr;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_sidr_rep_msg:");
lid = cm_mad_addr->rcvd_addr.ia_local_lid;
if (cm_mad_addr->grh_exists == B_TRUE)
gid = cm_mad_addr->grh_hdr.ig_recver_gid;
else
gid.gid_prefix = gid.gid_guid = 0;
IBTF_DPRINTF_L3(cmlog, "ibcm_process_sidr_rep_msg: QPN rcvd = %x",
h2b32(sidr_repp->sidr_rep_qpn_plus) >> 8);
/*
* Lookup for an existing state structure.
* No need to hold a lock as ibcm_find_sidr_entry() holds the
* list lock to find the matching entry.
*/
IBTF_DPRINTF_L4(cmlog, "ibcm_process_sidr_rep: lid=%x, (%llX, %llX), "
"grh = %x, id = %x", lid, gid.gid_prefix, gid.gid_guid,
cm_mad_addr->grh_exists, sidr_repp->sidr_rep_request_id);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(srch_sidr))
srch_sidr.srch_lid = lid;
srch_sidr.srch_gid = gid;
srch_sidr.srch_grh_exists = cm_mad_addr->grh_exists;
srch_sidr.srch_req_id = b2h32(sidr_repp->sidr_rep_request_id);
srch_sidr.srch_mode = IBCM_ACTIVE_MODE;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(srch_sidr))
rw_enter(&hcap->hca_sidr_list_lock, RW_READER);
status = ibcm_find_sidr_entry(&srch_sidr, hcap, &ud_statep,
IBCM_FLAG_LOOKUP);
rw_exit(&hcap->hca_sidr_list_lock);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_sidr_rep_msg: ud_statep 0x%p "
"find sidr entry status = %x", ud_statep, status);
if (status != IBCM_LOOKUP_EXISTS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_sidr_rep_msg: "
"No matching ud_statep for SIDR REP");
return;
}
if (IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->TransactionID !=
((ib_mad_hdr_t *)(input_madp))->TransactionID) {
mutex_enter(&ud_statep->ud_state_mutex);
IBCM_UD_REF_CNT_DECR(ud_statep);
mutex_exit(&ud_statep->ud_state_mutex);
IBTF_DPRINTF_L2(cmlog, "ibcm_process_sidr_rep_msg: "
"ud_statep 0x%p. A SIDR REP MAD with tid expected 0x%llX "
"tid found 0x%llX req_id %x arrived", ud_statep,
b2h64(
IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->TransactionID),
b2h64(((ib_mad_hdr_t *)(input_madp))->TransactionID),
b2h32(sidr_repp->sidr_rep_request_id));
return;
}
mutex_enter(&ud_statep->ud_state_mutex);
/*
* We need to check service ID received against the one sent?
* If they don't match just return.
*/
bcopy(sidr_repp->sidr_rep_service_id, &tmp_svc_id, sizeof (tmp_svc_id));
bcopy(&tmp_svc_id, sidr_repp->sidr_rep_service_id, sizeof (tmp_svc_id));
if (ud_statep->ud_svc_id != b2h64(tmp_svc_id)) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_sidr_rep_msg: "
"ud_statep -0x%p svcids do not match %llx %llx",
ud_statep, ud_statep->ud_svc_id, b2h64(tmp_svc_id));
IBCM_UD_REF_CNT_DECR(ud_statep);
mutex_exit(&ud_statep->ud_state_mutex);
return;
}
if (ud_statep->ud_state == IBCM_STATE_SIDR_REQ_SENT) {
timeout_id_t timer_val = ud_statep->ud_timerid;
ud_statep->ud_state = IBCM_STATE_SIDR_REP_RCVD;
ud_statep->ud_timerid = 0;
mutex_exit(&ud_statep->ud_state_mutex);
/* Cancel timer set after sending SIDR REQ */
(void) untimeout(timer_val);
/*
* Call Client's UD handler
*/
ibcm_sidr_rep_ud_handler(ud_statep, sidr_repp);
mutex_enter(&ud_statep->ud_state_mutex);
ud_statep->ud_state = IBCM_STATE_DELETE;
/*
* ud_statep->ud_return_data is initialized for blocking in
* ibt_ud_get_dqpn(). Initialize its fields and
* signal the blocking call in ibt_ud_get_dqpn().
*/
if (ud_statep->ud_return_data != NULL) {
/* get rep_qpn and rep_status */
ibt_priv_data_len_t len;
/* Copy the SIDR private data */
len = min(ud_statep->ud_return_data->ud_priv_data_len,
IBT_SIDR_REP_PRIV_DATA_SZ);
if ((ud_statep->ud_return_data->ud_priv_data != NULL) &&
(len > 0)) {
bcopy(sidr_repp->sidr_rep_private_data,
ud_statep->ud_return_data->ud_priv_data,
len);
}
/* get status first */
ud_statep->ud_return_data->ud_status =
sidr_repp->sidr_rep_rep_status;
if (ud_statep->ud_return_data->ud_status ==
IBT_CM_SREP_QPN_VALID) {
ud_statep->ud_return_data->ud_dqpn =
h2b32(sidr_repp->sidr_rep_qpn_plus) >> 8;
ud_statep->ud_return_data->ud_qkey =
b2h32(sidr_repp->sidr_rep_qkey);
}
ud_statep->ud_blocking_done = B_TRUE;
cv_broadcast(&ud_statep->ud_block_client_cv);
}
IBCM_UD_REF_CNT_DECR(ud_statep);
mutex_exit(&ud_statep->ud_state_mutex);
/* Delete UD state data now, finally done with it */
ibcm_delete_ud_state_data(ud_statep);
} else {
IBTF_DPRINTF_L3(cmlog, "ibcm_process_sidr_rep_msg: "
"ud state is = 0x%x", ud_statep->ud_state);
IBCM_UD_REF_CNT_DECR(ud_statep);
mutex_exit(&ud_statep->ud_state_mutex);
}
}
/*
* ibcm_post_sidr_rep_mad:
* This call posts a SIDR REP MAD
*
* INPUTS:
* ud_statep - pointer to ibcm_ud_state_data_t
* status - Status information
*
* RETURN VALUE: NONE
*/
void
ibcm_post_sidr_rep_mad(ibcm_ud_state_data_t *ud_statep,
ibt_sidr_status_t status)
{
ib_svc_id_t tmp_svc_id;
ibcm_sidr_rep_msg_t *sidr_repp =
(ibcm_sidr_rep_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg);
clock_t timer_value;
IBTF_DPRINTF_L5(cmlog, "ibcm_post_sidr_rep_mad:");
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sidr_repp))
IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->AttributeID =
h2b16(IBCM_INCOMING_SIDR_REP + IBCM_ATTR_BASE_ID);
/*
* Initialize SIDR REP message. (Other fields were
* already filled up in ibcm_sidr_req_ud_handler()
*/
sidr_repp->sidr_rep_request_id = h2b32(ud_statep->ud_req_id);
tmp_svc_id = h2b64(ud_statep->ud_svc_id);
bcopy(&tmp_svc_id, sidr_repp->sidr_rep_service_id, sizeof (tmp_svc_id));
sidr_repp->sidr_rep_rep_status = (uint8_t)status;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sidr_repp))
/* post the SIDR REP MAD */
ibcm_post_ud_mad(ud_statep, ud_statep->ud_stored_msg, NULL, NULL);
timer_value = ibt_ib2usec(ibcm_max_sidr_rep_store_time);
/*
* Hold the statep lock, as a SIDR REQ may come in after setting state
* but before timeout. This can result in a dangling timeout ie.,
* the incoming SIDR REQ would be unable to cancel this timeout
*/
mutex_enter(&ud_statep->ud_state_mutex);
ud_statep->ud_remaining_retry_cnt = 1;
ud_statep->ud_timer_value = timer_value;
ud_statep->ud_timer_stored_state = ud_statep->ud_state =
IBCM_STATE_SIDR_REP_SENT;
ud_statep->ud_timerid = IBCM_UD_TIMEOUT(ud_statep,
ud_statep->ud_timer_value);
mutex_exit(&ud_statep->ud_state_mutex);
}
/*
* ibcm_sidr_timeout_cb:
* Called when the timer expires on SIDR request
*
* INPUTS:
* arg - ibcm_ud_state_data_t with all the info
*
* RETURN VALUE: NONE
*/
void
ibcm_sidr_timeout_cb(void *arg)
{
ibcm_ud_state_data_t *ud_statep = (ibcm_ud_state_data_t *)arg;
mutex_enter(&ud_statep->ud_state_mutex);
IBTF_DPRINTF_L3(cmlog, "ibcm_sidr_timeout_cb: ud_statep 0x%p "
"state = 0x%x", ud_statep, ud_statep->ud_state);
/* Processing depends upon current state */
if (ud_statep->ud_state == IBCM_STATE_SIDR_REP_SENT) {
ud_statep->ud_state = IBCM_STATE_DELETE;
mutex_exit(&ud_statep->ud_state_mutex);
/* Deallocate the CM state structure */
ibcm_delete_ud_state_data(ud_statep);
} else if ((ud_statep->ud_remaining_retry_cnt > 0) &&
(ud_statep->ud_state == IBCM_STATE_SIDR_REQ_SENT)) {
ud_statep->ud_remaining_retry_cnt--;
IBCM_UD_REF_CNT_INCR(ud_statep); /* for non-blocking post */
IBTF_DPRINTF_L4(cmlog, "ibcm_sidr_timeout_cb: "
"ud_statep = %p, retries remaining = 0x%x",
ud_statep, ud_statep->ud_remaining_retry_cnt);
mutex_exit(&ud_statep->ud_state_mutex);
/* Post mad in non blocking mode */
ibcm_post_ud_mad(ud_statep, ud_statep->ud_stored_msg,
ibcm_post_sidr_req_complete, ud_statep);
} else if (ud_statep->ud_state == IBCM_STATE_SIDR_REQ_SENT) {
/* This is on SIDR REQ Sender side processing */
/* set state to IBCM_STATE_DELETE */
ud_statep->ud_state = IBCM_STATE_DELETE;
/*
* retry counter expired, clean up
*
* Invoke the client/server handler with a "status" of
* IBT_CM_SREP_TIMEOUT.
*/
if (ud_statep->ud_return_data != NULL) {
ud_statep->ud_return_data->ud_status =
IBT_CM_SREP_TIMEOUT;
ud_statep->ud_blocking_done = B_TRUE;
cv_broadcast(&ud_statep->ud_block_client_cv);
}
mutex_exit(&ud_statep->ud_state_mutex);
/* Invoke the client handler in a separate thread */
if (ud_statep->ud_cm_handler != NULL) {
/* UD state data is delete in timeout thread */
ibcm_add_ud_tlist(ud_statep);
return;
}
/* Delete UD state data now, finally done with it */
ibcm_delete_ud_state_data(ud_statep);
} else {
#ifdef DEBUG
if (ibcm_test_mode > 0)
IBTF_DPRINTF_L2(cmlog, "ibcm_sidr_timeout_cb: "
"Nop timeout for ud_statep 0x%p in ud_state %d",
ud_statep, ud_statep->ud_state);
#endif
mutex_exit(&ud_statep->ud_state_mutex);
}
}
/*
* ibcm_resend_srep_mad:
* Called on a duplicate incoming SIDR REQ on server side
* Posts the stored MAD from ud state structure using ud_stored_reply_addr
* Cancels any running timer, and then re-starts the timer
* This routine must be called with state structure table lock held
*
* INPUTS:
* ud_statep - ibcm_ud_state_data_t
*
* RETURN VALUE: NONE
*/
void
ibcm_resend_srep_mad(ibcm_ud_state_data_t *ud_statep)
{
timeout_id_t timer_val;
ASSERT(MUTEX_HELD(&ud_statep->ud_state_mutex));
IBTF_DPRINTF_L3(cmlog, "ibcm_resend_srep_mad: ud_statep 0x%p",
ud_statep);
if (ud_statep->ud_send_mad_flags & IBCM_SREP_POST_BUSY)
return;
ud_statep->ud_send_mad_flags |= IBCM_SREP_POST_BUSY;
/* for nonblocking SIDR REP Post */
IBCM_UD_REF_CNT_INCR(ud_statep);
/* Cancel currently running timer */
timer_val = ud_statep->ud_timerid;
if (ud_statep->ud_timerid != 0) {
ud_statep->ud_timerid = 0;
mutex_exit(&ud_statep->ud_state_mutex);
(void) untimeout(timer_val);
} else {
mutex_exit(&ud_statep->ud_state_mutex);
}
/* Always resend the response MAD to the original reply destination */
ibcm_post_ud_mad(ud_statep, ud_statep->ud_stored_msg,
ibcm_post_sidr_rep_complete, ud_statep);
mutex_enter(&ud_statep->ud_state_mutex);
}
/*
* ibcm_build_reply_mad_addr:
* Forms the reply MAD address based on "incoming mad addr" that is
* supplied as an arg.
*
* Swaps the source and destination gids in ib_grh_t
*
* INPUTS:
* inp_mad_addr: Address information in the incoming MAD
* out_mad_addr: Derived address for the reply MAD
* The reply MAD address is derived based
* address information of incoming CM MAD
* RETURN VALUE: NONE
*/
void
ibcm_build_reply_mad_addr(ibcm_mad_addr_t *inp_mad_addr,
ibcm_mad_addr_t *out_mad_addr)
{
IBTF_DPRINTF_L5(cmlog, "ibcm_build_reply_mad_addr:");
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*out_mad_addr))
bcopy(inp_mad_addr, out_mad_addr, sizeof (ibcm_mad_addr_t));
/* Swap the GIDs in the GRH */
if (inp_mad_addr->grh_exists == B_TRUE) {
ib_gid_t sgid = inp_mad_addr->grh_hdr.ig_sender_gid;
/* swap the SGID and DGID */
out_mad_addr->grh_hdr.ig_sender_gid =
inp_mad_addr->grh_hdr.ig_recver_gid;
out_mad_addr->grh_hdr.ig_recver_gid = sgid;
}
/*
* CM posts response MAD on a new/existing internal QP on the same port
* and pkey
*/
out_mad_addr->cm_qp_entry =
ibcm_find_qp(inp_mad_addr->cm_qp_entry->qp_port->port_hcap,
inp_mad_addr->port_num, inp_mad_addr->rcvd_addr.ia_p_key);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*out_mad_addr))
}
/*
* ibcm_post_rc_mad
* Posts a CM MAD associated with a RC statep
*
* INPUTS:
* statep : RC statep associated with the post
* msgp : CM MAD to be posted
* post_cb : non-NULL callback address implies non-blocking post
* args : Args to ibmf send callback
*
* RETURN VALUE: based on ibmf_send_mad
*/
void
ibcm_post_rc_mad(ibcm_state_data_t *statep, ibmf_msg_t *msgp,
ibmf_msg_cb_t post_cb, void *args)
{
ibt_status_t status;
mutex_enter(&statep->state_mutex);
statep->post_time = gethrtime();
mutex_exit(&statep->state_mutex);
status = ibcm_post_mad(msgp, &statep->stored_reply_addr, post_cb,
args);
if ((status != IBT_SUCCESS) && (post_cb != NULL))
/* Call ibmf callback directly */
(*post_cb)(NULL, msgp, args);
}
/*
* ibcm_post_ud_mad
* Posts a CM MAD associated with a UD statep
*
* INPUTS:
* ud_statep : UD statep associated with the post
* msgp : CM MAD to be posted
* post_cb : non-NULL callback address implies non-blocking post
* args : Args to ibmf send callback
*
* RETURN VALUE: based on ibmf_send_mad
*/
void
ibcm_post_ud_mad(ibcm_ud_state_data_t *ud_statep, ibmf_msg_t *msgp,
ibmf_msg_cb_t ud_post_cb, void *args)
{
ibt_status_t status;
status = ibcm_post_mad(msgp, &ud_statep->ud_stored_reply_addr,
ud_post_cb, args);
if ((status != IBT_SUCCESS) && (ud_post_cb != NULL))
/* Call ibmf callback directly */
(*ud_post_cb)(NULL, msgp, args);
}
/*
* ibcm_post_mad:
* Posts CM MAD using IBMF in blocking mode
*
* INPUTS:
* msgp : CM MAD to be posted
* cm_mad_addr : Address information for the MAD to be posted
* post_cb : non-NULL callback address implies non-blocking post
* args : Args to ibmf send callback
*
* RETURN VALUE: based on ibmf_send_mad
*/
ibt_status_t
ibcm_post_mad(ibmf_msg_t *msgp, ibcm_mad_addr_t *cm_mad_addr,
ibmf_msg_cb_t post_cb, void *args)
{
int post_status;
IBTF_DPRINTF_L5(cmlog, "ibcm_post_mad: "
"ibmf_msg_t = %p, cm_madd_adr = %p", msgp, cm_mad_addr);
IBTF_DPRINTF_L4(cmlog, "ibcm_post_mad: dlid = %x, d_qno= %x",
cm_mad_addr->rcvd_addr.ia_remote_lid,
cm_mad_addr->rcvd_addr.ia_remote_qno);
IBTF_DPRINTF_L4(cmlog, "ibcm_post_mad: p_key = %x, q_key = %x, "
"sl = %x, grh_exists = %x",
cm_mad_addr->rcvd_addr.ia_p_key, cm_mad_addr->rcvd_addr.ia_q_key,
cm_mad_addr->rcvd_addr.ia_service_level, cm_mad_addr->grh_exists);
/* Copy local addressing info */
msgp->im_local_addr = cm_mad_addr->rcvd_addr;
/* Copy global/GRH addressing info */
if (cm_mad_addr->grh_exists == B_TRUE)
msgp->im_global_addr = cm_mad_addr->grh_hdr;
if (post_cb)
ibcm_flow_inc();
post_status = ibmf_msg_transport(
cm_mad_addr->ibmf_hdl, cm_mad_addr->cm_qp_entry->qp_cm, msgp,
NULL, post_cb, args, 0);
if (post_status != IBMF_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_post_mad: ibmf_msg_transport "
"failed: status %d, cb = %p", post_status, post_cb);
/* Analyze the reason for failure */
return (ibcm_ibmf_analyze_error(post_status));
}
return (IBT_SUCCESS);
}
/*
* ibcm_process_get_classport_info:
* Get classportinfo
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - Input MAD pointer
* cm_mad_addr - Address information for the MAD to be posted
*
* RETURN VALUE: NONE
*/
static void
ibcm_process_get_classport_info(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ibmf_msg_t *msgp;
IBTF_DPRINTF_L5(cmlog, "ibcm_process_get_classport_info: (%p, %p, %p)",
hcap, input_madp, cm_mad_addr);
if (ibcm_alloc_out_msg(cm_mad_addr->ibmf_hdl, &msgp,
MAD_METHOD_GET_RESPONSE) != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_classport_info: "
"ibcm_alloc_out_msg failed");
return;
}
/* copy the transaction id from input get mad */
IBCM_OUT_HDRP(msgp)->TransactionID =
((ib_mad_hdr_t *)(input_madp))->TransactionID;
IBCM_OUT_HDRP(msgp)->AttributeID = h2b16(MAD_ATTR_ID_CLASSPORTINFO);
bcopy(&ibcm_clpinfo, IBCM_OUT_MSGP(msgp), sizeof (ibcm_clpinfo));
(void) ibcm_post_mad(msgp, cm_mad_addr, NULL, NULL);
(void) ibcm_free_out_msg(cm_mad_addr->ibmf_hdl, &msgp);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_get_classport_info: done");
}
/*
* ibcm_decode_classport_info:
* Decode classportinfo
*
* INPUTS:
* hcap - HCA entry pointer
* cm_mad_addr - Address information for the MAD to be posted
* input_madp - Input MAD pointer
*
* RETURN VALUE: NONE
*/
static void
ibcm_decode_classport_info(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ibcm_classportinfo_msg_t *portinfop = (ibcm_classportinfo_msg_t *)
(&input_madp[IBCM_MAD_HDR_SIZE]);
IBTF_DPRINTF_L5(cmlog, "ibcm_decode_classport_info: (%p, %p, %p)",
hcap, input_madp, cm_mad_addr);
/* Print various fields of received classportinfo in debuf buf */
IBTF_DPRINTF_L4(cmlog, "ibcm_decode_classport_info: "
"Base version %d Class version %d", portinfop->BaseVersion,
portinfop->ClassVersion);
IBTF_DPRINTF_L4(cmlog, "ibcm_decode_classport_info: "
"Cap Mask %d Resp Time %d", portinfop->CapabilityMask,
portinfop->RespTimeValue_plus);
}
/*
* ibcm_handler_conn_fail:
* Helper function used to call client handler for Conn fail event
*
* INPUTS:
* statep: The connection state pointer
* rej_type: Message being rejected
* rej_reason: Reason why CM is sending the REJ message
* client_data: Private data returned by the client for REJ
* client_data_len: Length of above client's private data.
*
* RETURN VALUE: Client Handler's return status
*/
static void
ibcm_handler_conn_fail(ibcm_state_data_t *statep, uint8_t cf_code,
uint8_t cf_msg, ibt_cm_reason_t cf_reason, uint8_t *client_data,
ibt_priv_data_len_t client_data_len)
{
ibt_cm_event_t event;
ibcm_path_cache_purge();
/* Invoke CM handler w/ event passed as arg */
if (statep->cm_handler != NULL) {
bzero(&event, sizeof (ibt_cm_event_t));
event.cm_type = IBT_CM_EVENT_FAILURE;
event.cm_channel = statep->channel;
event.cm_session_id = NULL;
event.cm_priv_data = NULL;
event.cm_priv_data_len = 0;
event.cm_event.failed.cf_code = cf_code;
event.cm_event.failed.cf_msg = cf_msg;
event.cm_event.failed.cf_reason = cf_reason;
ibcm_insert_trace(statep, IBCM_TRACE_CALLED_CONN_FAIL_EVENT);
(void) statep->cm_handler(statep->state_cm_private, &event,
NULL, client_data, client_data_len);
ibcm_insert_trace(statep, IBCM_TRACE_RET_CONN_FAIL_EVENT);
}
if (ibcm_enable_trace != 0)
ibcm_dump_conn_trace(statep);
mutex_enter(&statep->state_mutex);
ibcm_open_done(statep);
mutex_exit(&statep->state_mutex);
}
/*
* QP State transition functions here
*
* The brief description of these functions :
* Validate QP related attributes in the messages
* Call client/server callback handlers
* Change QP state
* Set QP attributes (modify QP)
* Fill up the response MADs
*/
/*
* ibcm_set_primary_adds_vect:
* Helper function used to fill up ibt_adds_vect_t PRIMARY PATH
* (called from ibcm_cep_state_*() functions)
*
* INPUTS:
* statep : The connection state pointer
* adds_vectp : The ibt_adds_vect_t ptr that is being filled up
* msgp : CM REQ message that is the source of information
*
* RETURN VALUE: NONE
*/
static void
ibcm_set_primary_adds_vect(ibcm_state_data_t *statep,
ibt_adds_vect_t *adds_vectp, ibcm_req_msg_t *msgp)
{
uint32_t flow_label20_res6_rate6;
flow_label20_res6_rate6 = b2h32(msgp->req_primary_flow_label_plus);
/* first setup the srvl, srate, dlid and dgid */
adds_vectp->av_srvl = msgp->req_primary_sl_plus >> 4;
adds_vectp->av_src_path = statep->prim_src_path_bits;
if (statep->mode == IBCM_PASSIVE_MODE) {
adds_vectp->av_dlid = b2h16(msgp->req_primary_l_port_lid);
adds_vectp->av_dgid.gid_prefix =
b2h64(msgp->req_primary_l_port_gid.gid_prefix);
adds_vectp->av_dgid.gid_guid =
b2h64(msgp->req_primary_l_port_gid.gid_guid);
adds_vectp->av_sgid.gid_prefix =
b2h64(msgp->req_primary_r_port_gid.gid_prefix);
adds_vectp->av_sgid.gid_guid =
b2h64(msgp->req_primary_r_port_gid.gid_guid);
adds_vectp->av_srate = flow_label20_res6_rate6 & 0x3f;
} else {
adds_vectp->av_dlid = b2h16(msgp->req_primary_r_port_lid);
adds_vectp->av_dgid.gid_prefix =
b2h64(msgp->req_primary_r_port_gid.gid_prefix);
adds_vectp->av_dgid.gid_guid =
b2h64(msgp->req_primary_r_port_gid.gid_guid);
adds_vectp->av_sgid.gid_prefix =
b2h64(msgp->req_primary_l_port_gid.gid_prefix);
adds_vectp->av_sgid.gid_guid =
b2h64(msgp->req_primary_l_port_gid.gid_guid);
adds_vectp->av_srate = statep->local_srate;
}
/* next copy off the GRH info if it exists */
if ((msgp->req_primary_sl_plus & 0x8) == 0) {
adds_vectp->av_send_grh = B_TRUE;
adds_vectp->av_flow = flow_label20_res6_rate6 >> 12;
adds_vectp->av_tclass = msgp->req_primary_traffic_class;
adds_vectp->av_hop = msgp->req_primary_hop_limit;
} else {
adds_vectp->av_send_grh = B_FALSE;
}
}
/*
* ibcm_set_alt_adds_vect:
* Helper function used to fill up ibt_adds_vect_t ALTERNATE PATH
* (called from ibcm_cep_state_*() functions)
*
* INPUTS:
* statep : The connection state pointer
* adds_vectp : The ibt_adds_vect_t ptr that is being filled up
* msgp : CM REQ message that is the source of information
*
* RETURN VALUE: NONE
*/
static void
ibcm_set_alt_adds_vect(ibcm_state_data_t *statep,
ibt_adds_vect_t *adds_vectp, ibcm_req_msg_t *msgp)
{
ib_gid_t dgid;
ib_gid_t sgid;
uint32_t flow_label20_res6_rate6;
flow_label20_res6_rate6 = b2h32(msgp->req_alt_flow_label_plus);
/* first setup the srvl, srate, dlid and dgid */
adds_vectp->av_srvl = msgp->req_alt_sl_plus >> 4;
adds_vectp->av_src_path = statep->alt_src_path_bits;
if (statep->mode == IBCM_PASSIVE_MODE) {
adds_vectp->av_dlid = b2h16(msgp->req_alt_l_port_lid);
bcopy(&msgp->req_alt_l_port_gid[0], &dgid, sizeof (ib_gid_t));
bcopy(&msgp->req_alt_r_port_gid[0], &sgid, sizeof (ib_gid_t));
adds_vectp->av_srate = flow_label20_res6_rate6 & 0x3f;
} else {
adds_vectp->av_dlid = b2h16(msgp->req_alt_r_port_lid);
bcopy(&msgp->req_alt_r_port_gid[0], &dgid, sizeof (ib_gid_t));
bcopy(&msgp->req_alt_l_port_gid[0], &sgid, sizeof (ib_gid_t));
adds_vectp->av_srate = statep->local_alt_srate;
}
adds_vectp->av_dgid.gid_prefix = b2h64(dgid.gid_prefix);
adds_vectp->av_dgid.gid_guid = b2h64(dgid.gid_guid);
adds_vectp->av_sgid.gid_prefix = b2h64(sgid.gid_prefix);
adds_vectp->av_sgid.gid_guid = b2h64(sgid.gid_guid);
/* next copy off the GRH info if it exists */
if ((msgp->req_alt_sl_plus & 0x8) == 0) {
adds_vectp->av_send_grh = B_TRUE;
adds_vectp->av_flow = flow_label20_res6_rate6 >> 12;
adds_vectp->av_tclass = msgp->req_alt_traffic_class;
adds_vectp->av_hop = msgp->req_alt_hop_limit;
} else {
adds_vectp->av_send_grh = B_FALSE; /* no GRH */
}
}
/*
* ibcm_set_primary_cep_path:
* Helper function used to fill up ibt_cep_path_t PRIMARY PATH
* (called from ibcm_cep_state_*() functions)
*
* INPUTS:
* statep : The connection state pointer
* adds_vectp : The ibt_cep_path_t ptr that is being filled up
* msgp : CM REQ message that is the source of information
*
* RETURN VALUE: NONE
*/
static ibt_status_t
ibcm_set_primary_cep_path(ibcm_state_data_t *statep, ibt_cep_path_t *pathp,
ibcm_req_msg_t *msgp)
{
ibt_status_t status;
/* validate the PKEY in REQ for prim port */
status = ibt_pkey2index_byguid(statep->local_hca_guid,
statep->prim_port, b2h16(msgp->req_part_key), &pathp->cep_pkey_ix);
if (status != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_set_primary_cep_path: "
"statep 0x%p pkey %x prim_port %d ", statep,
b2h16(msgp->req_part_key), statep->prim_port);
IBTF_DPRINTF_L2(cmlog, "ibcm_set_primary_cep_path: "
"statep 0x%p Invalid PKEY on prim_port, status %d ",
statep, status);
return (status);
}
statep->pkey = b2h16(msgp->req_part_key);
ibcm_set_primary_adds_vect(statep, &pathp->cep_adds_vect, msgp);
return (IBT_SUCCESS);
}
/*
* ibcm_set_alt_cep_path:
* Helper function used to fill up ibt_cep_path_t ALTERNATE PATH
* (called from ibcm_cep_state_*() functions)
*
* INPUTS:
* statep : The connection state pointer
* adds_vectp : The ibt_cep_path_t ptr that is being filled up
* msgp : CM REQ message that is the source of information
*
* RETURN VALUE: NONE
*/
static ibt_status_t
ibcm_set_alt_cep_path(ibcm_state_data_t *statep, ibt_cep_path_t *pathp,
ibcm_req_msg_t *msgp)
{
ibt_status_t status;
if (b2h16(msgp->req_alt_l_port_lid) == 0) {
/* no alternate path specified */
return (IBT_SUCCESS);
}
/* validate the PKEY in REQ for alt port */
status = ibt_pkey2index_byguid(statep->local_hca_guid,
statep->alt_port, b2h16(msgp->req_part_key), &pathp->cep_pkey_ix);
if (status != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_set_alt_cep_path: "
"statep 0x%p pkey %x alt_port %d ", statep,
b2h16(msgp->req_part_key), statep->alt_port);
IBTF_DPRINTF_L2(cmlog, "ibcm_set_alt_cep_path: "
"statep 0x%p Invalid PKEY on alt_port, status %d ",
statep, status);
return (status);
}
pathp->cep_hca_port_num = statep->alt_port;
ibcm_set_alt_adds_vect(statep, &pathp->cep_adds_vect, msgp);
return (IBT_SUCCESS);
}
/*
* ibcm_compare_prim_alt_paths:
* Helper function used to find if primary and alternate paths are
* identical
* (called from ibcm_cep_state_req)
*
* INPUTS:
* req: Pointer to ibt_cm_req_rcv_t, filled before invoking
* the function
*
* RETURN VALUE: NONE
*/
static boolean_t
ibcm_compare_prim_alt_paths(ibt_adds_vect_t *prim, ibt_adds_vect_t *alt)
{
if ((alt->av_dlid == prim->av_dlid) &&
(alt->av_dgid.gid_prefix == prim->av_dgid.gid_prefix) &&
(alt->av_dgid.gid_guid == prim->av_dgid.gid_guid) &&
(alt->av_sgid.gid_prefix == prim->av_sgid.gid_prefix) &&
(alt->av_sgid.gid_guid == prim->av_sgid.gid_guid) &&
(alt->av_src_path == prim->av_src_path)) {
return (B_TRUE);
}
return (B_FALSE);
}
/*
* ibcm_invoke_qp_modify:
* Helper function used to call ibt_modify_qp()
* called from ibcm_cep_state_req()/ibcm_cep_state_rep()
* It sets up qp_info/eec_info
*
* Sets state to RTR as well.
*
*
* INPUTS:
* statep: The connection state pointer
* req_msgp: The CM REQ message
*
* RETURN VALUE:
* IBT_SUCCESS - call succeeded
*/
static ibt_status_t
ibcm_invoke_qp_modify(ibcm_state_data_t *statep, ibcm_req_msg_t *req_msgp,
ibcm_rep_msg_t *rep_msgp)
{
ibt_status_t status;
ibt_qp_info_t qp_info;
ibt_cep_modify_flags_t cep_flags;
ibt_tran_srv_t trans;
cep_flags = IBT_CEP_SET_INIT_RTR | IBT_CEP_SET_PKEY_IX;
trans = ((uint8_t *)&req_msgp->req_remote_eecn_plus)[3] >> 1 & 0x3;
ASSERT(statep->channel != NULL);
/*
* If alternate path is present in REQ message then
* OR in IBT_CEP_SET_ALT_PATH, if APM supported on hca
*/
if (b2h16(req_msgp->req_alt_l_port_lid) != 0) {
if (statep->hcap->hca_caps & IBT_HCA_AUTO_PATH_MIG)
cep_flags |= IBT_CEP_SET_ALT_PATH;
/* default value of rep_failover is ACCEPT */
else {
rep_msgp->rep_target_delay_plus |=
IBT_CM_FAILOVER_REJ_NOTSUPP << 1;
IBTF_DPRINTF_L3(cmlog, "ibcm_invoke_qp_modify"
" Alt Path specified in REQ, but not supported");
}
}
/* If transport type is RD OR in IBC_CEP_SET_QKEY */
if (trans == IBT_RD_SRV) {
cep_flags |= IBT_CEP_SET_QKEY;
}
/* Start filling up ibt_qp_info_t. */
bzero(&qp_info, sizeof (qp_info));
qp_info.qp_trans = trans;
qp_info.qp_state = IBT_STATE_RTR;
qp_info.qp_flags = IBT_CEP_NO_FLAGS;
switch (trans) {
case IBT_RC_SRV:
if (statep->mode == IBCM_ACTIVE_MODE) {
/* Setting PSN on RQ */
IBCM_QPINFO_RC(qp_info).rc_rq_psn =
b2h32(req_msgp->req_starting_psn_plus) >> 8;
IBCM_QPINFO_RC(qp_info).rc_dst_qpn =
b2h32(rep_msgp->rep_local_qpn_plus) >> 8;
/* RDMA resources taken from negotiated REP values */
IBCM_QPINFO_RC(qp_info).rc_rdma_ra_in =
rep_msgp->rep_initiator_depth;
} else { /* Passive side CM */
/* Setting PSN on SQ and RQ */
IBCM_QPINFO_RC(qp_info).rc_rq_psn =
b2h32(rep_msgp->rep_starting_psn_plus) >> 8;
IBCM_QPINFO_RC(qp_info).rc_dst_qpn =
b2h32(req_msgp->req_local_qpn_plus) >> 8;
/* RDMA resources taken from negotiated REP values */
IBCM_QPINFO_RC(qp_info).rc_rdma_ra_in =
rep_msgp->rep_resp_resources;
}
/* XXX, Oh!, ibtl doesn't have interface for setting this */
IBCM_QPINFO_RC(qp_info).rc_min_rnr_nak =
ibcm_default_rnr_nak_time;
IBCM_QPINFO_RC(qp_info).rc_path_mtu =
req_msgp->req_mtu_plus >> 4;
IBCM_QPINFO_RC(qp_info).rc_retry_cnt =
((uint8_t *)&req_msgp->req_starting_psn_plus)[3] & 0x7;
IBCM_QPINFO_RC(qp_info).rc_rnr_retry_cnt =
req_msgp->req_mtu_plus & 0x7;
if ((status = ibcm_set_primary_cep_path(statep,
&IBCM_QPINFO_RC(qp_info).rc_path, req_msgp)) !=
IBT_SUCCESS)
return (status);
if ((status = ibcm_set_alt_cep_path(statep,
&IBCM_QPINFO_RC(qp_info).rc_alt_path, req_msgp)) !=
IBT_SUCCESS)
return (status);
break;
case IBT_RD_SRV:
if (statep->mode == IBCM_ACTIVE_MODE) { /* look at REP msg */
IBCM_QPINFO(qp_info).rd.rd_qkey =
b2h32(rep_msgp->rep_local_qkey);
} else {
IBCM_QPINFO(qp_info).rd.rd_qkey =
b2h32(req_msgp->req_local_qkey);
}
break;
case IBT_UC_SRV:
if (statep->mode == IBCM_ACTIVE_MODE) { /* look at REP msg */
IBCM_QPINFO_UC(qp_info).uc_rq_psn =
b2h32(req_msgp->req_starting_psn_plus) >> 8;
IBCM_QPINFO_UC(qp_info).uc_dst_qpn =
b2h32(rep_msgp->rep_local_qpn_plus) >> 8;
} else {
IBCM_QPINFO_UC(qp_info).uc_rq_psn =
b2h32(rep_msgp->rep_starting_psn_plus) >> 8;
IBCM_QPINFO_UC(qp_info).uc_dst_qpn =
b2h32(req_msgp->req_local_qpn_plus) >> 8;
}
IBCM_QPINFO_UC(qp_info).uc_path_mtu =
req_msgp->req_mtu_plus >> 4;
if ((status = ibcm_set_primary_cep_path(statep,
&IBCM_QPINFO_UC(qp_info).uc_path, req_msgp)) !=
IBT_SUCCESS)
return (status);
if ((status = ibcm_set_alt_cep_path(statep,
&IBCM_QPINFO_UC(qp_info).uc_alt_path, req_msgp)) !=
IBT_SUCCESS)
return (status);
break;
default:
IBTF_DPRINTF_L2(cmlog, "ibcm_invoke_qp_modify: "
"unknown svc_type = %x", trans);
break;
}
/* Call modify_qp */
status = ibt_modify_qp(statep->channel, cep_flags, &qp_info, NULL);
IBTF_DPRINTF_L4(cmlog, "ibcm_invoke_qp_modify: statep 0x%p"
" ibt_modify_qp() Init to RTR returned = %d", statep, status);
if (status == IBT_SUCCESS)
ibcm_insert_trace(statep, IBCM_TRACE_INIT_RTR);
else
ibcm_insert_trace(statep, IBCM_TRACE_INIT_RTR_FAIL);
#ifdef DEBUG
print_modify_qp("Init to RTR", statep->channel, cep_flags, &qp_info);
if (statep->channel != NULL) {
ibt_qp_query_attr_t qp_attrs;
(void) ibt_query_qp(statep->channel, &qp_attrs);
IBTF_DPRINTF_L4(cmlog, "ibcm_invoke_qp_modify: "
"qp_info.qp_state = %x", qp_attrs.qp_info.qp_state);
}
#endif
return (status);
}
/*
* ibcm_verify_req_gids_and_svcid
* Validation of LIDs, GIDs and SVC ID
*
* INPUTS:
* statep - state pointer
* cm_req_msgp - REQ message pointer
*
* RETURN VALUE: IBCM_SUCCESS/IBCM_FAILURE
*
*/
ibcm_status_t
ibcm_verify_req_gids_and_svcid(ibcm_state_data_t *statep,
ibcm_req_msg_t *cm_req_msgp)
{
ib_gid_t gid;
ib_gid_t agid;
ib_lid_t lid;
ibt_status_t status;
ibtl_cm_hca_port_t port;
ibt_cm_reason_t reject_reason = IBT_CM_SUCCESS;
ibcm_svc_info_t *svc_infop;
ibcm_svc_bind_t *svc_bindp;
ibcm_svc_bind_t *tmp_bindp;
ib_pkey_t pkey;
uint8_t port_num;
ib_guid_t hca_guid;
ibcm_ip_pvtdata_t *ip_data;
/* Verify LID and GID of primary port */
gid.gid_prefix = b2h64(cm_req_msgp->req_primary_r_port_gid.gid_prefix);
gid.gid_guid = b2h64(cm_req_msgp->req_primary_r_port_gid.gid_guid);
IBTF_DPRINTF_L4(cmlog, "ibcm_verify_req_gids: statep 0x%p"
" PRIM _r_gid (%llx, %llx)", statep, gid.gid_prefix,
gid.gid_guid);
IBTF_DPRINTF_L4(cmlog, "ibcm_verify_req_gids: statep 0x%p "
"PRIM passive lid %x", statep,
b2h16(cm_req_msgp->req_primary_r_port_lid));
/* Verify GID validity, if specified */
if ((status = ibtl_cm_get_hca_port(gid, 0, &port)) == IBT_SUCCESS) {
IBTF_DPRINTF_L4(cmlog, "ibcm_verify_req_gids: statep 0x%p "
"prim_port_num %d", statep, port.hp_port);
IBTF_DPRINTF_L4(cmlog, "ibcm_verify_req_gids: statep 0x%p "
"passive hca_guid 0x%llX", statep, port.hp_hca_guid);
port_num = port.hp_port;
hca_guid = port.hp_hca_guid;
}
if (status != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids: statep 0x%p "
"ibtl_cm_get_hca_port() primary port failed = %d", statep,
status);
reject_reason = IBT_CM_PRIM_GID;
/* we will search for an acceptable GID to this port */
port_num = statep->stored_reply_addr.port_num;
hca_guid = statep->hcap->hca_guid;
} else if (port.hp_base_lid !=
(b2h16(cm_req_msgp->req_primary_r_port_lid) &
(~((1 << port.hp_lmc) - 1)))) {
IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids: statep 0x%p "
"primary port lid invalid (%x, %x, %x)", statep,
port.hp_base_lid,
b2h16(cm_req_msgp->req_primary_r_port_lid), port.hp_lmc);
reject_reason = IBT_CM_PRIM_LID;
} else {
statep->local_hca_guid = port.hp_hca_guid;
statep->prim_port = port.hp_port;
statep->prim_src_path_bits =
b2h16(cm_req_msgp->req_primary_r_port_lid) -
port.hp_base_lid;
IBTF_DPRINTF_L4(cmlog, "ibcm_verify_req_gids: "
"statep 0x%p prim_port_path_bits %d ",
statep, statep->prim_src_path_bits);
/* Verify LID and GID of alternate port. Post REJ if invalid */
/* Need a bcopy, as alt port gid is unaligned in req message */
bcopy(&cm_req_msgp->req_alt_r_port_gid[0], &agid,
sizeof (ib_gid_t));
agid.gid_prefix = b2h64(agid.gid_prefix);
agid.gid_guid = b2h64(agid.gid_guid);
IBTF_DPRINTF_L4(cmlog, "ibcm_verify_req_gids: statep 0x%p"
" Alt port_gid is (%llX:%llX)", statep, agid.gid_prefix,
agid.gid_guid);
if ((agid.gid_prefix != 0) || (agid.gid_guid != 0)) {
/* Verify GID validity, if specified */
if ((status = ibtl_cm_get_hca_port(agid,
statep->local_hca_guid, &port)) != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog,
"ibcm_verify_req_gids: ibtl_cm_get_hca_port"
" statep 0x%p alternate port failed = %d",
statep, status);
reject_reason = IBT_CM_ALT_GID;
} else if (port.hp_base_lid !=
(b2h16(cm_req_msgp->req_alt_r_port_lid) &
(~((1 << port.hp_lmc) - 1)))) {
IBTF_DPRINTF_L2(cmlog,
"ibcm_verify_req_gids: statep 0x%p "
"alternate port lid invalid (%x, %x, %x)",
statep, port.hp_base_lid,
cm_req_msgp->req_alt_r_port_lid,
port.hp_lmc);
reject_reason = IBT_CM_ALT_LID;
} else { /* Alt LID and GID are valid */
statep->alt_port = port.hp_port;
statep->alt_src_path_bits =
b2h16(cm_req_msgp->req_alt_r_port_lid) -
port.hp_base_lid;
IBTF_DPRINTF_L4(cmlog, "ibcm_verify_req_gids: "
"statep 0x%p alt_port_num %d "
"alt_rc_hca_guid 0x%llX", statep,
port.hp_port, port.hp_hca_guid);
IBTF_DPRINTF_L4(cmlog, "ibcm_verify_req_gids: "
"statep 0x%p alt_port_path_bits %d ",
statep, statep->alt_src_path_bits);
}
}
}
mutex_enter(&ibcm_svc_info_lock);
svc_infop = ibcm_find_svc_entry(statep->svcid);
/*
* Note: When we return SUCCESS, the reader lock won't get dropped
* until after the cm_handler is called from ibcm_cep_state_req().
*/
IBTF_DPRINTF_L4(cmlog, "ibcm_verify_req_gids: "
"ibcm_find_svc_entry found svc_infop %p", svc_infop);
/*
* Send REJ with reject reason "invalid service id" for the
* the following cases :-
* Service id is valid, but not available at gid/lid of REQ
* Service id is invalid
*/
if (svc_infop == NULL || svc_infop->svc_bind_list == NULL) {
mutex_exit(&ibcm_svc_info_lock);
IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids_and_svcid: "
"statep 0x%p svc_id %llX svc_infop NULL", statep,
statep->svcid);
/* Send a REJ with invalid SID reason */
ibcm_post_rej_mad(statep,
IBT_CM_INVALID_SID, IBT_CM_FAILURE_REQ, NULL, 0);
return (IBCM_FAILURE);
}
if (svc_infop->svc_rc_handler == NULL) {
mutex_exit(&ibcm_svc_info_lock);
/* Send a REJ with invalid SID reason */
ibcm_post_rej_mad(statep,
IBT_CM_INVALID_SRV_TYPE, IBT_CM_FAILURE_REQ, NULL, 0);
return (IBCM_FAILURE);
}
/*
* Check if ServiceID is in RDMA IP CM SID range, if yes, we parse
* the REQ's Private Data and verify for it's goodness.
*/
if (((statep->svcid & IB_SID_IPADDR_PREFIX_MASK) == 0) &&
(statep->svcid & IB_SID_IPADDR_PREFIX)) {
ibt_ari_ip_t ari_ip;
boolean_t rdma_rej_mad = B_FALSE;
if (cm_req_msgp->req_private_data == NULL) {
mutex_exit(&ibcm_svc_info_lock);
IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids_and_svcid:"
" RDMA CM IP REQ Priv Data is NULL");
/* Send a REJ with CONSUMER REJ */
ibcm_post_rej_mad(statep, IBT_CM_CONSUMER,
IBT_CM_FAILURE_REQ, NULL, 0);
return (IBCM_FAILURE);
}
ip_data = (ibcm_ip_pvtdata_t *)cm_req_msgp->req_private_data;
bzero(&ari_ip, sizeof (ibt_ari_ip_t));
/* RDMA IP CM Layer Rejects this */
if (ip_data->ip_MajV != IBT_CM_IP_MAJ_VER) {
IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids_and_svcid:"
"IP MajorVer mis-match %d", ip_data->ip_MajV);
ari_ip.ip_reason = IBT_ARI_IP_MAJOR_VERSION;
ari_ip.ip_suggested_version = IBT_CM_IP_MAJ_VER;
ari_ip.ip_suggested = B_TRUE;
rdma_rej_mad = B_TRUE;
} else if (ip_data->ip_MinV != IBT_CM_IP_MIN_VER) {
IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids_and_svcid:"
"IP MinorVer mis-match %d", ip_data->ip_MinV);
ari_ip.ip_reason = IBT_ARI_IP_MINOR_VERSION;
ari_ip.ip_suggested_version = IBT_CM_IP_MIN_VER;
ari_ip.ip_suggested = B_TRUE;
rdma_rej_mad = B_TRUE;
} else if ((ip_data->ip_ipv != IBT_CM_IP_IPV_V4) &&
(ip_data->ip_ipv != IBT_CM_IP_IPV_V6)) {
IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids_and_svcid:"
" Invalid IPV specified %d", ip_data->ip_ipv);
ari_ip.ip_reason = IBT_ARI_IP_IPV;
ari_ip.ip_suggested_version = IBT_CM_IP_IPV_V4;
ari_ip.ip_suggested = B_TRUE;
rdma_rej_mad = B_TRUE;
} else {
/*
* Validate whether ip_addr specified are non-NULL.
*
* NOTE:
* RDMA ULP which is servicing this SID, should validate
* the correctness of srcip/dstip and accordingly post
* REJ related to ibt_ari_ip_reason_t of
* IBT_ARI_IP_SRC_ADDR, IBT_ARI_IP_DST_ADDR and
* IBT_ARI_IP_UNKNOWN_ADDR.
*/
if (ip_data->ip_ipv == IBT_CM_IP_IPV_V4) {
if (ip_data->ip_srcv4 == 0) {
IBTF_DPRINTF_L2(cmlog,
"ibcm_verify_req_gids_and_svcid: "
"Invalid NULL V4 SrcIp specified");
rdma_rej_mad = B_TRUE;
ari_ip.ip_reason = IBT_ARI_IP_SRC_ADDR;
ari_ip.ip_suggested = B_TRUE;
ari_ip.ip_suggested_version =
IBT_CM_IP_IPV_V4;
} else if (ip_data->ip_dstv4 == 0) {
IBTF_DPRINTF_L2(cmlog,
"ibcm_verify_req_gids_and_svcid: "
"Invalid NULL V4 DstIp specified");
rdma_rej_mad = B_TRUE;
ari_ip.ip_reason = IBT_ARI_IP_DST_ADDR;
ari_ip.ip_suggested = B_TRUE;
ari_ip.ip_suggested_version =
IBT_CM_IP_IPV_V4;
}
} else if (ip_data->ip_ipv == IBT_CM_IP_IPV_V6) {
if (IN6_IS_ADDR_UNSPECIFIED(
&ip_data->ip_srcv6)) {
IBTF_DPRINTF_L2(cmlog,
"ibcm_verify_req_gids_and_svcid: "
"Invalid NULL V6 SrcIp specified");
rdma_rej_mad = B_TRUE;
ari_ip.ip_reason = IBT_ARI_IP_SRC_ADDR;
ari_ip.ip_suggested = B_TRUE;
ari_ip.ip_suggested_version =
IBT_CM_IP_IPV_V6;
} else if (IN6_IS_ADDR_UNSPECIFIED(
&ip_data->ip_dstv6)) {
IBTF_DPRINTF_L2(cmlog,
"ibcm_verify_req_gids_and_svcid: "
"Invalid NULL V6 DstIp specified");
rdma_rej_mad = B_TRUE;
ari_ip.ip_reason = IBT_ARI_IP_DST_ADDR;
ari_ip.ip_suggested = B_TRUE;
ari_ip.ip_suggested_version =
IBT_CM_IP_IPV_V6;
}
}
/* TBD: IBT_ARI_IP_UNKNOWN_ADDR */
}
if (rdma_rej_mad == B_TRUE) {
ibt_ari_con_t cons_rej;
mutex_exit(&ibcm_svc_info_lock);
cons_rej.rej_ari_len = 1 + sizeof (ibt_ari_ip_t);
cons_rej.rej_ari[0] = 0; /* Rejected by CM Layer */
bcopy(&ari_ip, &cons_rej.rej_ari[1],
sizeof (ibt_ari_ip_t));
/* Send a REJ with CONSUMER REJ */
ibcm_post_rej_mad(statep, IBT_CM_CONSUMER,
IBT_CM_FAILURE_REQ, &cons_rej,
sizeof (ibt_ari_con_t));
return (IBCM_FAILURE);
}
}
/* find the best "bind" entry that enables this port */
pkey = b2h16(cm_req_msgp->req_part_key);
svc_bindp = NULL;
tmp_bindp = svc_infop->svc_bind_list;
while (tmp_bindp) {
if (tmp_bindp->sbind_hcaguid == hca_guid &&
tmp_bindp->sbind_port == port_num) {
if (gid.gid_guid ==
tmp_bindp->sbind_gid.gid_guid &&
gid.gid_prefix ==
tmp_bindp->sbind_gid.gid_prefix) {
/* gid match => really good match */
svc_bindp = tmp_bindp;
if (pkey == tmp_bindp->sbind_pkey)
/* absolute best match */
break;
} else if (svc_bindp == NULL) {
/* port match => a good match */
svc_bindp = tmp_bindp;
}
}
tmp_bindp = tmp_bindp->sbind_link;
}
if (svc_bindp == NULL) { /* port not enabled for this SID */
mutex_exit(&ibcm_svc_info_lock);
IBTF_DPRINTF_L2(cmlog,
"ibcm_verify_req_gids_and_svcid: statep 0x%p "
"no binding found", statep);
ibcm_post_rej_mad(statep,
IBT_CM_INVALID_SID, IBT_CM_FAILURE_REQ, NULL, 0);
return (IBCM_FAILURE);
}
/* copy the GID in case we need it in REJ below */
gid.gid_prefix = b2h64(svc_bindp->sbind_gid.gid_prefix);
gid.gid_guid = b2h64(svc_bindp->sbind_gid.gid_guid);
statep->state_cm_private = svc_bindp->sbind_cm_private;
statep->state_svc_infop = svc_infop;
statep->cm_handler = svc_infop->svc_rc_handler;
if (reject_reason == IBT_CM_SUCCESS)
IBCM_SVC_INCR(svc_infop);
mutex_exit(&ibcm_svc_info_lock);
/*
* If the service id is valid, but gid in REQ is invalid,
* then send a REJ with invalid gid
* For Invalid primary gid, the ARI field is filled with
* with gid from svcinfo
* For invalid prim/alt gid reject, CM uses one of the gids
* registered in ARI.
* For invalid prim/alt lid reject, CM uses the base lid in ARI
*/
if (reject_reason != IBT_CM_SUCCESS) {
switch (reject_reason) {
case IBT_CM_PRIM_GID :
case IBT_CM_ALT_GID :
ibcm_post_rej_mad(statep,
reject_reason, IBT_CM_FAILURE_REQ,
&gid, sizeof (ib_gid_t));
break;
case IBT_CM_PRIM_LID :
case IBT_CM_ALT_LID :
lid = h2b16(port.hp_base_lid);
ibcm_post_rej_mad(statep,
reject_reason, IBT_CM_FAILURE_REQ,
&lid, sizeof (ib_lid_t));
break;
}
return (IBCM_FAILURE);
}
/* Service, primary/alt gid and lid are all valid */
return (IBCM_SUCCESS);
}
/*
* ibcm_cep_state_req:
* QP state transition function called for an incoming REQ on passive side
* LIDs and GIDs should be maintained and validated by the client handler
*
* INPUTS:
* statep - state pointer
* cm_req_msgp - REQ message pointer
* reject_reason - Rejection reason See Section 12.6.7.2 rev1.0a IB Spec
* arej_info_len - Additional Rejection reason info length
*
* RETURN VALUE: IBCM_SEND_REP/IBCM_SEND_REJ
*/
ibcm_status_t
ibcm_cep_state_req(ibcm_state_data_t *statep, ibcm_req_msg_t *cm_req_msgp,
ibt_cm_reason_t *reject_reason, uint8_t *arej_len)
{
void *priv_data = NULL;
ibt_cm_event_t event;
ibt_cm_status_t cb_status;
ibcm_status_t status;
ibt_cm_return_args_t ret_args;
ibcm_clnt_reply_info_t clnt_info;
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_req: statep 0x%p", statep);
/* client handler should be valid */
ASSERT(statep->cm_handler != NULL);
bzero(&event, sizeof (event));
/* Fill in ibt_cm_event_t */
event.cm_type = IBT_CM_EVENT_REQ_RCV;
event.cm_session_id = statep;
IBCM_EVT_REQ(event).req_service_id = b2h64(cm_req_msgp->req_svc_id);
IBCM_EVT_REQ(event).req_transport =
((uint8_t *)&cm_req_msgp->req_remote_eecn_plus)[3] >> 1 & 0x3;
IBCM_EVT_REQ(event).req_timeout = ibt_ib2usec(
(((uint8_t *)&cm_req_msgp->req_remote_eecn_plus)[3] >> 3) & 0x1F);
IBCM_EVT_REQ(event).req_retry_cnt =
((uint8_t *)&cm_req_msgp->req_starting_psn_plus)[3] & 0x7;
IBCM_EVT_REQ(event).req_rnr_retry_cnt = cm_req_msgp->req_mtu_plus & 0x7;
IBCM_EVT_REQ(event).req_pkey = b2h16(cm_req_msgp->req_part_key);
IBCM_EVT_REQ(event).req_rdma_ra_in =
((uint8_t *)&cm_req_msgp->req_local_qpn_plus)[3];
IBCM_EVT_REQ(event).req_rdma_ra_out =
((uint8_t *)&cm_req_msgp->req_local_eec_no_plus)[3];
/* Check for HCA limits for RDMA Resources */
if (IBCM_EVT_REQ(event).req_rdma_ra_in >
statep->hcap->hca_max_rdma_in_qp) {
IBTF_DPRINTF_L2(cmlog, "ibcm_cep_state_req: statep 0x%p, REQ "
"req_rdma_ra_in %d is greater than HCA Limit %d, resetting"
"it to HCA limit", statep,
IBCM_EVT_REQ(event).req_rdma_ra_in,
statep->hcap->hca_max_rdma_in_qp);
IBCM_EVT_REQ(event).req_rdma_ra_in =
statep->hcap->hca_max_rdma_in_qp;
}
if (IBCM_EVT_REQ(event).req_rdma_ra_out >
statep->hcap->hca_max_rdma_out_qp) {
IBTF_DPRINTF_L2(cmlog, "ibcm_cep_state_req: statep 0x%p, REQ "
"req_rdma_ra_out %d is greater than HCA Limit %d, resetting"
"it to HCA limit", statep,
IBCM_EVT_REQ(event).req_rdma_ra_out,
statep->hcap->hca_max_rdma_out_qp);
IBCM_EVT_REQ(event).req_rdma_ra_out =
statep->hcap->hca_max_rdma_out_qp;
}
/* Account for CM and other software delays */
if (IBCM_EVT_REQ(event).req_timeout > ibcm_sw_delay) {
IBCM_EVT_REQ(event).req_timeout -= ibcm_sw_delay;
IBTF_DPRINTF_L5(cmlog, "ibcm_cep_state_req: statep 0x%p"
"Avail resp time %d (usec)", statep,
IBCM_EVT_REQ(event).req_timeout);
} else {
IBTF_DPRINTF_L2(cmlog, "ibcm_cep_state_req: statep 0x%p "
"REQ rem_resp_time < local sw delay 0x%x", statep,
IBCM_EVT_REQ(event).req_timeout);
IBCM_EVT_REQ(event).req_timeout = 0;
}
IBCM_EVT_REQ(event).req_prim_hca_port = statep->prim_port;
IBCM_EVT_REQ(event).req_alt_hca_port = statep->alt_port;
IBCM_EVT_REQ(event).req_hca_guid = statep->local_hca_guid;
IBCM_EVT_REQ(event).req_remote_qpn = statep->remote_qpn;
if (((uint8_t *)&cm_req_msgp->req_remote_eecn_plus)[3] &
IBT_CM_FLOW_CONTROL)
IBCM_EVT_REQ(event).req_flags |= IBT_CM_FLOW_CONTROL;
if ((cm_req_msgp->req_max_cm_retries_plus >> 3) & 0x1)
IBCM_EVT_REQ(event).req_flags |= IBT_CM_SRQ_EXISTS;
/* Initialize req.req_prim_addr */
ibcm_set_primary_adds_vect(statep, &IBCM_EVT_REQ(event).req_prim_addr,
cm_req_msgp);
/* Initialize req.req_alternate_path if they exist */
if (b2h16(cm_req_msgp->req_alt_l_port_lid) != 0) {
ibcm_set_alt_adds_vect(statep,
&IBCM_EVT_REQ(event).req_alt_addr, cm_req_msgp);
/* Verify, alt path is not same as primary */
if (ibcm_compare_prim_alt_paths(
&event.cm_event.req.req_prim_addr,
&event.cm_event.req.req_alt_addr) == B_TRUE) {
/* XXX New REJ code needed */
*reject_reason = IBT_CM_NO_RESC;
IBTF_DPRINTF_L2(cmlog, "ibcm_cep_state_req: statep 0x%p"
" Alt and prim paths are same", statep);
mutex_enter(&ibcm_svc_info_lock);
IBCM_SVC_DECR(statep->state_svc_infop);
mutex_exit(&ibcm_svc_info_lock);
return (IBCM_SEND_REJ);
}
}
#ifdef NO_EEC_SUPPORT_YET
IBCM_EVT_REQ(event).req_rdc_exists = cm_req_msgp->req_mtu_plus >> 3 & 1;
IBCM_EVT_REQ(event).req_remote_eecn =
b2h32(cm_req_msgp->req_remote_eecn_plus) >> 8;
IBCM_EVT_REQ(event).req_local_eecn =
b2h32(cm_req_msgp->req_local_eec_no_plus) >> 8;
IBCM_EVT_REQ(event).req_remote_qkey =
b2h32(cm_req_msgp->req_local_qkey);
#endif
/* cm_req_msgp->req_private_data to event.cm_event.cm_priv_data */
event.cm_priv_data = cm_req_msgp->req_private_data;
event.cm_priv_data_len = IBT_REQ_PRIV_DATA_SZ;
/*
* Allocate priv_data of size IBT_MAX_PRIV_DATA_SZ
*/
priv_data = kmem_zalloc(IBT_MAX_PRIV_DATA_SZ, KM_SLEEP);
bzero(&ret_args, sizeof (ret_args));
/* Fill in the default values from REQ, that client can modify */
ret_args.cm_ret.rep.cm_rdma_ra_in = IBCM_EVT_REQ(event).req_rdma_ra_out;
ret_args.cm_ret.rep.cm_rdma_ra_out = IBCM_EVT_REQ(event).req_rdma_ra_in;
ret_args.cm_ret.rep.cm_rnr_retry_cnt = cm_req_msgp->req_mtu_plus & 0x7;
ibcm_insert_trace(statep, IBCM_TRACE_CALLED_REQ_RCVD_EVENT);
/* Invoke the client handler */
cb_status = statep->cm_handler(statep->state_cm_private, &event,
&ret_args, priv_data, IBT_REP_PRIV_DATA_SZ);
ibcm_insert_trace(statep, IBCM_TRACE_RET_REQ_RCVD_EVENT);
mutex_enter(&ibcm_svc_info_lock);
IBCM_SVC_DECR(statep->state_svc_infop);
mutex_exit(&ibcm_svc_info_lock);
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_req: Client handler returned %d"
" statep 0x%p", cb_status, statep);
if (cb_status == IBT_CM_DEFER) {
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(statep->defer_cm_msg))
if (statep->defer_cm_msg == NULL)
statep->defer_cm_msg =
kmem_zalloc(IBCM_MSG_SIZE, KM_SLEEP);
bcopy(cm_req_msgp, statep->defer_cm_msg, IBCM_MSG_SIZE);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(statep->defer_cm_msg))
/*
* unblock any blocked cm proceed api calls. Do not access
* statep after cv_signal
*/
mutex_enter(&statep->state_mutex);
statep->clnt_proceed = IBCM_UNBLOCK;
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
kmem_free(priv_data, IBT_MAX_PRIV_DATA_SZ);
return (IBCM_DEFER);
}
/* fail any blocked cm proceed api call - client bug */
mutex_enter(&statep->state_mutex);
statep->clnt_proceed = IBCM_FAIL;
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
clnt_info.reply_event = (ibt_cm_proceed_reply_t *)&ret_args.cm_ret;
clnt_info.priv_data = priv_data;
clnt_info.priv_data_len = ret_args.cm_ret_len;
status =
ibcm_process_cep_req_cm_hdlr(statep, cb_status,
&clnt_info, reject_reason, arej_len, cm_req_msgp);
kmem_free(priv_data, IBT_MAX_PRIV_DATA_SZ);
return (status);
}
/*
* ibcm_process_cep_req_cm_hdlr:
* Processes the response from client handler for an incoming REQ.
*/
ibcm_status_t
ibcm_process_cep_req_cm_hdlr(ibcm_state_data_t *statep,
ibt_cm_status_t cb_status, ibcm_clnt_reply_info_t *clnt_info,
ibt_cm_reason_t *reject_reason, uint8_t *arej_len,
ibcm_req_msg_t *cm_req_msg)
{
ibt_status_t status;
ibt_qp_query_attr_t qp_attrs;
ibcm_state_data_t *old_statep;
ibt_channel_hdl_t channel;
ib_guid_t local_ca_guid;
ibcm_rej_msg_t *rej_msgp;
#ifdef NO_EEC_SUPPORT_YET
ibt_eec_query_attr_t eec_attrs;
#endif
if (cb_status == IBT_CM_DEFAULT)
cb_status = IBT_CM_REJECT;
/* verify status */
if (cb_status == IBT_CM_ACCEPT) {
*reject_reason = IBT_CM_SUCCESS;
} else if (cb_status == IBT_CM_REJECT) {
*reject_reason = IBT_CM_CONSUMER;
} else if (cb_status == IBT_CM_REDIRECT_PORT) {
*reject_reason = IBT_CM_PORT_REDIRECT;
} else if (cb_status == IBT_CM_REDIRECT) {
*reject_reason = IBT_CM_REDIRECT_CM;
} else if (cb_status == IBT_CM_NO_CHANNEL) {
*reject_reason = IBT_CM_NO_CHAN;
} else if (cb_status == IBT_CM_NO_RESOURCE) {
*reject_reason = IBT_CM_NO_RESC;
} else {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_req_cm_hdlr: statep %p"
" Client handler unexpected return %x", statep, cb_status);
*reject_reason = IBT_CM_CONSUMER;
}
/* client handler gave CM ok */
if (cb_status == IBT_CM_ACCEPT) {
ibcm_rep_msg_t *rep_msgp = (ibcm_rep_msg_t *)
IBCM_OUT_MSGP(statep->stored_msg);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rep_msgp))
/*
* Check first if ret_args make sense. If not, bailout
* here rather than going along and panicing later.
*/
channel = clnt_info->reply_event->rep.cm_channel;
if (IBCM_INVALID_CHANNEL(channel)) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_req_cm_hdlr: "
"statep 0x%p server's QP handle is NULL", statep);
*reject_reason = IBT_CM_NO_CHAN;
}
IBCM_GET_CHAN_PRIVATE(channel, old_statep);
if ((*reject_reason == IBT_CM_SUCCESS) &&
(old_statep != NULL)) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_req_cm_hdlr: "
"statep 0x%p Channel being re-used on passive side",
statep);
*reject_reason = IBT_CM_NO_CHAN;
}
if (old_statep != NULL)
IBCM_RELEASE_CHAN_PRIVATE(channel);
if (*reject_reason != IBT_CM_SUCCESS) {
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_REQ, *reject_reason, NULL, 0);
return (IBCM_SEND_REJ);
}
statep->channel = channel;
status = ibt_query_qp(channel, &qp_attrs);
if (status != IBT_SUCCESS) {
IBTF_DPRINTF_L3(cmlog, "ibcm_process_cep_req_cm_hdlr: "
"statep %p ibt_query_qp failed %d", statep, status);
*reject_reason = IBT_CM_NO_RESC;
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_REQ, IBT_CM_CI_FAILURE, NULL, 0);
return (IBCM_SEND_REJ);
}
if (qp_attrs.qp_info.qp_trans != IBT_RC_SRV) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_req_cm_hdlr: "
"statep %p qp is not RC channel on server", statep);
*reject_reason = IBT_CM_INVALID_SRV_TYPE;
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_REQ, IBT_CM_CHAN_INVALID_STATE,
NULL, 0);
return (IBCM_SEND_REJ);
}
if (qp_attrs.qp_info.qp_state != IBT_STATE_INIT) {
IBTF_DPRINTF_L3(cmlog, "ibcm_process_cep_req_cm_hdlr: "
"qp state != INIT on server");
*reject_reason = IBT_CM_CHAN_INVALID_STATE;
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_REQ, IBT_CM_CHAN_INVALID_STATE,
NULL, 0);
return (IBCM_SEND_REJ);
}
/* Init to Init, if required */
if (qp_attrs.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
statep->prim_port) {
ibt_qp_info_t qp_info;
ibt_cep_modify_flags_t cep_flags;
IBTF_DPRINTF_L5(cmlog, "ibcm_process_cep_req_cm_hdlr: "
"chan 0x%p chan port %d", channel,
qp_attrs.qp_info.qp_transport.rc.rc_path.\
cep_hca_port_num);
IBTF_DPRINTF_L5(cmlog, "ibcm_process_cep_req_cm_hdlr: "
"chan 0x%p d path port %d", channel,
statep->prim_port);
bzero(&qp_info, sizeof (qp_info));
qp_info.qp_trans = IBT_RC_SRV;
qp_info.qp_state = IBT_STATE_INIT;
qp_info.qp_transport.rc.rc_path.cep_hca_port_num =
statep->prim_port;
cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
status = ibt_modify_qp(statep->channel, cep_flags,
&qp_info, NULL);
if (status != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog,
"ibcm_process_cep_req_cm_hdlr: "
"chan 0x%p ibt_modify_qp() = %d", channel,
status);
*reject_reason = IBT_CM_NO_RESC;
ibcm_insert_trace(statep,
IBCM_TRACE_INIT_INIT_FAIL);
ibcm_handler_conn_fail(statep,
IBT_CM_FAILURE_REJ_SENT, IBT_CM_FAILURE_REQ,
IBT_CM_CI_FAILURE, NULL, 0);
return (IBCM_SEND_REJ);
} else {
ibcm_insert_trace(statep,
IBCM_TRACE_INIT_INIT);
IBTF_DPRINTF_L5(cmlog,
"ibcm_process_cep_req_cm_hdlr: "
"chan 0x%p ibt_modify_qp() = %d", channel,
status);
}
}
/* fill in the REP msg based on ret_args from client */
if (clnt_info->reply_event->rep.cm_rdma_ra_out >
((uint8_t *)&cm_req_msg->req_local_qpn_plus)[3]) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_cm_hdlr "
"statep 0x%p ERROR: InitiatorDepth(%d) is Greater "
"than ResponderResource(%d)", statep,
clnt_info->reply_event->rep.cm_rdma_ra_out,
((uint8_t *)&cm_req_msg->req_local_qpn_plus)[3]);
*reject_reason = IBT_CM_NOT_SUPPORTED;
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_REQ, IBT_CM_NOT_SUPPORTED, NULL, 0);
return (IBCM_SEND_REJ);
}
/* Check for HCA limits for RDMA Resources */
if (clnt_info->reply_event->rep.cm_rdma_ra_in >
statep->hcap->hca_max_rdma_in_qp) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_cm_hdlr: "
"statep %p, ERROR: client specified rdma_ra_in %d "
"is greater than HCA Limit %d, rejecting MAD",
statep, clnt_info->reply_event->rep.cm_rdma_ra_in,
statep->hcap->hca_max_rdma_in_qp);
*reject_reason = IBT_CM_NOT_SUPPORTED;
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_REQ, IBT_CM_NOT_SUPPORTED, NULL, 0);
return (IBCM_SEND_REJ);
}
if (clnt_info->reply_event->rep.cm_rdma_ra_out >
statep->hcap->hca_max_rdma_out_qp) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_cm_hdlr: "
"statep %p, ERROR: client specified rdma_ra_out %d "
"is greater than HCA Limit %d, rejecting MAD",
statep, clnt_info->reply_event->rep.cm_rdma_ra_out,
statep->hcap->hca_max_rdma_out_qp);
*reject_reason = IBT_CM_NOT_SUPPORTED;
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_REQ, IBT_CM_NOT_SUPPORTED, NULL, 0);
return (IBCM_SEND_REJ);
}
rep_msgp->rep_resp_resources =
clnt_info->reply_event->rep.cm_rdma_ra_in;
rep_msgp->rep_initiator_depth =
clnt_info->reply_event->rep.cm_rdma_ra_out;
/* IBT_CM_FLOW_CONTROL is always set by default. */
rep_msgp->rep_target_delay_plus |= IBT_CM_FLOW_CONTROL;
rep_msgp->rep_rnr_retry_cnt_plus =
(clnt_info->reply_event->rep.cm_rnr_retry_cnt & 0x7) << 5;
/*
* Check out whether SRQ is associated with this channel.
* If yes, then set the appropriate bit.
*/
if (qp_attrs.qp_srq != NULL) {
rep_msgp->rep_rnr_retry_cnt_plus |= (1 << 4);
}
local_ca_guid = h2b64(statep->local_hca_guid);
bcopy(&local_ca_guid, rep_msgp->rep_local_ca_guid,
sizeof (ib_guid_t));
/* Transition QP from Init to RTR state */
if (ibcm_invoke_qp_modify(statep, cm_req_msg, rep_msgp) !=
IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_cm_hdlr "
"statep 0x%p ibcm_invoke_qp_modify failed because "
"of invalid data", statep);
*reject_reason = IBT_CM_NO_RESC;
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_REQ, IBT_CM_CI_FAILURE, NULL, 0);
return (IBCM_SEND_REJ);
}
/*
* Link statep and channel, once CM determines it is
* post REP definitely.
*/
IBCM_SET_CHAN_PRIVATE(statep->channel, statep);
/*
* Fill up the REP fields from ret_args
* failover status, from ret_args
*
* Fill up local QPN and EECN from ret_args->channel
*/
/* fill in REP msg bytes Qkey, Starting PSN, 12-15, and 16-19 */
IBTF_DPRINTF_L4(cmlog, "ibcm_process_cep_req_cm_hdlr: "
"qp_info.qp_state = %x", qp_attrs.qp_info.qp_state);
rep_msgp->rep_local_qpn_plus = h2b32(qp_attrs.qp_qpn << 8);
statep->local_qpn = qp_attrs.qp_qpn;
switch (qp_attrs.qp_info.qp_trans) {
case IBT_RD_SRV:
rep_msgp->rep_local_qkey = h2b32(
qp_attrs.qp_info.qp_transport.rd.rd_qkey);
break;
case IBT_RC_SRV:
rep_msgp->rep_starting_psn_plus =
h2b32(IBCM_QP_RC(qp_attrs).rc_sq_psn << 8);
break;
case IBT_UC_SRV:
rep_msgp->rep_starting_psn_plus =
h2b32(IBCM_QP_UC(qp_attrs).uc_sq_psn << 8);
break;
}
#ifdef NO_EEC_SUPPORT_YET
if (ret_args.cm_channel.ch_eec != NULL) {
status = ibt_query_eec(ret_args.cm_channel.ch_eec,
&eec_attrs);
if (status == IBT_SUCCESS) {
rep_msgp->rep_local_eecn_plus =
h2b32(((uint32_t)eec_attrs.eec_eecn << 8));
}
}
#endif
/* figure out Target ACK delay */
rep_msgp->rep_target_delay_plus |= (status == IBT_SUCCESS) ?
statep->hcap->hca_ack_delay << 3 : 0;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_cep_req_cm_hdlr:statep %p "
"REP priv len %x", statep, clnt_info->priv_data_len);
/* Copy PrivateData from priv_data */
if (clnt_info->priv_data_len != 0) {
bcopy(clnt_info->priv_data, rep_msgp->rep_private_data,
min(IBT_REP_PRIV_DATA_SZ,
clnt_info->priv_data_len));
}
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rep_msgp))
return (IBCM_SEND_REP);
}
/* REJ message */
rej_msgp = (ibcm_rej_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_cep_req_cm_hdlr: statep %p REJ "
"priv len %x", statep, clnt_info->priv_data_len);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rej_msgp))
/* if priv_data_len != 0 use priv_data to copy back to rej_priv_data */
if (clnt_info->priv_data_len != 0) {
bcopy(clnt_info->priv_data, rej_msgp->rej_private_data,
min(IBT_REJ_PRIV_DATA_SZ, clnt_info->priv_data_len));
}
if (cb_status == IBT_CM_REDIRECT_PORT) {
ib_gid_t tgid;
tgid.gid_guid =
h2b64(clnt_info->reply_event->rej.ari_gid.gid_guid);
tgid.gid_prefix =
h2b64(clnt_info->reply_event->rej.ari_gid.gid_prefix);
*arej_len = sizeof (ib_gid_t);
bcopy(&tgid, &rej_msgp->rej_addl_rej_info, sizeof (ib_gid_t));
IBTF_DPRINTF_L3(cmlog, "ibcm_process_cep_req_cm_hdlr: ari_gid= "
"%llX:%llX", tgid.gid_prefix, tgid.gid_guid);
} else if (cb_status == IBT_CM_REDIRECT) {
ibcm_classportinfo_msg_t tclp;
ibcm_init_clp_to_mad(&tclp,
&clnt_info->reply_event->rej.ari_redirect);
bcopy(&tclp, rej_msgp->rej_addl_rej_info, sizeof (tclp));
*arej_len = sizeof (ibcm_classportinfo_msg_t);
} else if (cb_status == IBT_CM_REJECT) {
/* Fill up the REJ fields, from ret_args */
*arej_len = min(
clnt_info->reply_event->rej.ari_consumer.rej_ari_len,
IBT_CM_ADDL_REJ_LEN);
bcopy(clnt_info->reply_event->rej.ari_consumer.rej_ari,
&rej_msgp->rej_addl_rej_info, *arej_len);
/*
* RDMA IP REQ was passed up to the ULP, the ULP decided to do
* a "normal" consumer REJ, by the returning IBT_CM_REJECT in
* the cm handler.
* CM has to do some extra stuff too, it has to
* a) return REJ code 28 (consumer) and b) put 0x1 in the first
* byte of the ARI data, to indicate that this is a RDMA aware
* ULP that is doing a consumer reject. The ULP should have
* put its consumer specific data into ibt_arej_info_t(9s) at
* byte 1 of the rej_ari[] array.
*/
if (((statep->svcid & IB_SID_IPADDR_PREFIX_MASK) == 0) &&
(statep->svcid & IB_SID_IPADDR_PREFIX)) {
rej_msgp->rej_addl_rej_info[0] = 1;
}
}
rej_msgp->rej_msg_type_plus = IBT_CM_FAILURE_REQ << 6;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rej_msgp))
return (IBCM_SEND_REJ);
}
/*
* ibcm_cep_state_rep:
* QP state transition function called for an incoming REP on active side
*
* INPUTS:
* statep - state pointer
* cm_rep_msg - REP message pointer
* reject_reason - Rejection reason See Section 12.6.7.2 rev1.0a IB Spec
*
* RETURN VALUE:
*/
ibcm_status_t
ibcm_cep_state_rep(ibcm_state_data_t *statep, ibcm_rep_msg_t *cm_rep_msgp,
ibt_cm_reason_t *reject_reason, uint8_t *arej_len)
{
void *priv_data = NULL;
ibcm_status_t rval = IBCM_SEND_RTU;
ibt_cm_event_t event;
ibt_cm_status_t cb_status = IBT_CM_ACCEPT;
ibt_cm_return_args_t ret_args;
ibcm_clnt_reply_info_t clnt_info;
uint8_t req_init_depth;
IBTF_DPRINTF_L3(cmlog, "ibcm_cep_state_rep: statep 0x%p", statep);
/* Check first if client handler is valid */
if (statep->cm_handler != NULL) {
/* initialize fields in ibt_cm_event_t */
bzero(&event, sizeof (event));
event.cm_type = IBT_CM_EVENT_REP_RCV;
event.cm_channel = statep->channel;
event.cm_session_id = statep;
IBCM_EVT_REP(event).rep_rdma_ra_in =
cm_rep_msgp->rep_initiator_depth;
req_init_depth =
((uint8_t *)&(((ibcm_req_msg_t *)IBCM_OUT_MSGP(
statep->stored_msg))->req_local_eec_no_plus))[3];
IBCM_EVT_REP(event).rep_rdma_ra_out =
min(cm_rep_msgp->rep_resp_resources, req_init_depth);
IBTF_DPRINTF_L3(cmlog, "ibcm_cep_state_rep: statep 0x%p, "
"InitDepth %d, RespResr %d", statep,
cm_rep_msgp->rep_initiator_depth,
IBCM_EVT_REP(event).rep_rdma_ra_out);
IBCM_EVT_REP(event).rep_service_time = ibt_ib2usec(
((uint8_t *)&(((ibcm_req_msg_t *)IBCM_OUT_MSGP(
statep->stored_msg))->req_starting_psn_plus))[3] >> 3);
IBCM_EVT_REP(event).rep_service_time -=
2 * statep->pkt_life_time - ibcm_sw_delay;
IBCM_EVT_REP(event).rep_failover_status =
cm_rep_msgp->rep_target_delay_plus >> 1 & 3;
if (cm_rep_msgp->rep_target_delay_plus & 0x1)
IBCM_EVT_REP(event).rep_flags |= IBT_CM_FLOW_CONTROL;
if ((cm_rep_msgp->rep_rnr_retry_cnt_plus >> 4) & 0x1)
IBCM_EVT_REP(event).rep_flags |= IBT_CM_SRQ_EXISTS;
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_rep: statep 0x%p "
"rep_service_time %d", statep,
IBCM_EVT_REP(event).rep_service_time);
event.cm_priv_data = &(cm_rep_msgp->rep_private_data[0]);
event.cm_priv_data_len = IBT_REP_PRIV_DATA_SZ;
/*
* Allocate priv_data of size IBT_MAX_PRIV_DATA_SZ
*/
priv_data = kmem_zalloc(IBT_MAX_PRIV_DATA_SZ, KM_SLEEP);
bzero(&ret_args, sizeof (ret_args));
ibcm_insert_trace(statep, IBCM_TRACE_CALLED_REP_RCVD_EVENT);
/* invoke the CM handler */
cb_status = statep->cm_handler(statep->state_cm_private, &event,
&ret_args, priv_data, IBT_RTU_PRIV_DATA_SZ);
ibcm_insert_trace(statep, IBCM_TRACE_RET_REP_RCVD_EVENT);
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_rep: statep 0x%p "
"Client handler returned %x", statep, cb_status);
if (cb_status == IBT_CM_DEFER) {
if (statep->defer_cm_msg == NULL)
statep->defer_cm_msg =
kmem_zalloc(IBCM_MSG_SIZE, KM_SLEEP);
bcopy(cm_rep_msgp, statep->defer_cm_msg, IBCM_MSG_SIZE);
/* unblock any blocked cm proceed api calls */
mutex_enter(&statep->state_mutex);
statep->clnt_proceed = IBCM_UNBLOCK;
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
kmem_free(priv_data, IBT_MAX_PRIV_DATA_SZ);
return (IBCM_DEFER);
}
}
/* fail any blocked cm proceed api calls - client bug */
mutex_enter(&statep->state_mutex);
statep->clnt_proceed = IBCM_FAIL;
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
clnt_info.reply_event = (ibt_cm_proceed_reply_t *)&ret_args.cm_ret;
clnt_info.priv_data = priv_data;
clnt_info.priv_data_len = ret_args.cm_ret_len;
rval =
ibcm_process_cep_rep_cm_hdlr(statep, cb_status, &clnt_info,
reject_reason, arej_len, cm_rep_msgp);
if (priv_data != NULL)
kmem_free(priv_data, IBT_MAX_PRIV_DATA_SZ);
return (rval);
}
/*
* ibcm_process_cep_rep_cm_hdlr:
* Processes the response from client handler for an incoming REP.
*/
ibcm_status_t
ibcm_process_cep_rep_cm_hdlr(ibcm_state_data_t *statep,
ibt_cm_status_t cb_status, ibcm_clnt_reply_info_t *clnt_info,
ibt_cm_reason_t *reject_reason, uint8_t *arej_len,
ibcm_rep_msg_t *cm_rep_msgp)
{
ibcm_status_t rval = IBCM_SEND_RTU;
ibcm_rej_msg_t *rej_msgp;
if (cb_status == IBT_CM_DEFAULT)
cb_status = IBT_CM_ACCEPT;
if (cb_status == IBT_CM_REJECT) {
*reject_reason = IBT_CM_CONSUMER;
} else if (cb_status == IBT_CM_REDIRECT_PORT) {
*reject_reason = IBT_CM_PORT_REDIRECT;
} else if (cb_status == IBT_CM_REDIRECT) {
*reject_reason = IBT_CM_REDIRECT_CM;
} else if (cb_status == IBT_CM_NO_RESOURCE) {
*reject_reason = IBT_CM_NO_RESC;
} else if (cb_status != IBT_CM_ACCEPT) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_rep_cm_hdlr: statep "
"0x%p, Client handler returned unexpected value %d",
statep, cb_status);
*reject_reason = IBT_CM_CONSUMER;
} else
*reject_reason = IBT_CM_SUCCESS;
/* We come here if status is ACCEPT or CM handler is NULL */
if (cb_status == IBT_CM_ACCEPT) {
ib_time_t time;
time = ibt_usec2ib(statep->pkt_life_time * 2 +
ibt_ib2usec(cm_rep_msgp->rep_target_delay_plus >> 3));
IBTF_DPRINTF_L5(cmlog, "ibcm_process_cep_rep_cm_hdlr: statep %p"
" active cep_timeout(usec) 0x%x ", statep, time);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_cep_rep_cm_hdlr: statep %p"
" passive hca_ack_delay(ib_time) = 0x%x, ", statep,
cm_rep_msgp->rep_target_delay_plus >> 3);
IBTF_DPRINTF_L5(cmlog, "ibcm_process_cep_rep_cm_hdlr: statep %p"
" rnr_retry_cnt = 0x%x", statep,
cm_rep_msgp->rep_rnr_retry_cnt_plus >> 5);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
statep->starting_psn =
b2h32(cm_rep_msgp->rep_starting_psn_plus) >> 8;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
/* Call IBTL CM's qp modify function from Init to RTR */
if (ibcm_invoke_qp_modify(statep,
(ibcm_req_msg_t *)IBCM_OUT_MSGP(statep->stored_msg),
cm_rep_msgp) != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_rep_cm_hdlr: "
"statep %p, ibcm_invoke_qp_modify to RTR failed",
statep);
*reject_reason = IBT_CM_NO_RESC;
/*
* Call modify qp function from RTR to RTS
* RDMA initiator depth on active is same as negotiated
* passive REP's responder resources
*/
} else if (ibcm_invoke_rtu_qp_modify(statep, time, cm_rep_msgp)
!= IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_rep_cm_hdlr: "
"statep %p ibcm_invoke_rtu_qp_modify to RTS failed",
statep);
(void) ibcm_cep_to_error_state(statep);
*reject_reason = IBT_CM_NO_RESC;
}
if (*reject_reason == IBT_CM_NO_RESC) {
/* Disassociate statep and QP */
IBCM_SET_CHAN_PRIVATE(statep->channel, NULL);
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_REP, IBT_CM_CI_FAILURE, NULL, 0);
return (IBCM_SEND_REJ); /* send REJ */
}
if (clnt_info->priv_data_len != 0) {
ibcm_rtu_msg_t *rtu_msgp;
rtu_msgp = (ibcm_rtu_msg_t *)
IBCM_OUT_MSGP(statep->stored_msg);
bcopy(clnt_info->priv_data, rtu_msgp->rtu_private_data,
min(IBT_RTU_PRIV_DATA_SZ,
clnt_info->priv_data_len));
}
*reject_reason = IBT_CM_SUCCESS;
return (rval);
}
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rej_msgp))
/* Fill up the REJ fields, from ret_args */
rej_msgp = (ibcm_rej_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
rej_msgp->rej_msg_type_plus = IBT_CM_FAILURE_REP << 6;
/* if priv_len != 0 use priv_data to copy back to rej_priv_data */
if (clnt_info->priv_data_len != 0)
bcopy(clnt_info->priv_data, rej_msgp->rej_private_data,
min(IBT_REJ_PRIV_DATA_SZ, clnt_info->priv_data_len));
if (clnt_info->reply_event != NULL)
*arej_len =
min(clnt_info->reply_event->rej.ari_consumer.rej_ari_len,
IBT_CM_ADDL_REJ_LEN);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(clnt_info->reply_event->rej))
if (*arej_len != 0) /* asserts that clnt_info->reply_event != 0 */
bcopy(clnt_info->reply_event->rej.ari_consumer.rej_ari,
&rej_msgp->rej_addl_rej_info, *arej_len);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(clnt_info->reply_event->rej))
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rej_msgp))
rval = IBCM_SEND_REJ;
/* Disassociate statep and QP */
IBCM_SET_CHAN_PRIVATE(statep->channel, NULL);
/* callback client, to enable client to do resource cleanup */
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_REP, *reject_reason, NULL, 0);
return (rval);
}
/*
* ibcm_invoke_rtu_qp_modify:
* Helper function to modify QP for RTU only called from
* ibcm_cep_state_rtu() and ibcm_cep_send_rtu()
*
* INPUTS:
* statep - connection state pointer
*
* RETURN VALUE:
*/
static ibt_status_t
ibcm_invoke_rtu_qp_modify(ibcm_state_data_t *statep, ib_time_t timeout,
ibcm_rep_msg_t *rep_msg)
{
ibt_status_t status;
ibt_qp_info_t qp_info;
ibt_cep_modify_flags_t cep_flags = IBT_CEP_SET_RTR_RTS;
/* Start filling up ibt_qp_info_t. */
bzero(&qp_info, sizeof (qp_info));
qp_info.qp_trans = ibtl_cm_get_chan_type(statep->channel);
qp_info.qp_current_state = IBT_STATE_RTR;
switch (qp_info.qp_trans) {
case IBT_RC_SRV:
IBCM_QPINFO_RC_PATH(qp_info).cep_timeout = timeout;
IBCM_QPINFO_RC(qp_info).rc_retry_cnt = statep->cep_retry_cnt;
IBCM_QPINFO_RC(qp_info).rc_rnr_retry_cnt =
rep_msg->rep_rnr_retry_cnt_plus >> 5;
IBCM_QPINFO_RC(qp_info).rc_sq_psn = statep->starting_psn;
if (statep->mode == IBCM_ACTIVE_MODE) {
IBCM_QPINFO_RC(qp_info).rc_rdma_ra_out =
rep_msg->rep_resp_resources;
} else {
IBCM_QPINFO_RC(qp_info).rc_rdma_ra_out =
rep_msg->rep_initiator_depth;
}
if (statep->alt_port &&
(((rep_msg->rep_target_delay_plus >> 1) & 0x3) ==
IBT_CM_FAILOVER_ACCEPT)) {
/* failover was accepted */
cep_flags |= IBT_CEP_SET_MIG;
IBCM_QPINFO_RC(qp_info).rc_mig_state =
IBT_STATE_REARMED;
}
break;
/* XXX RD? */
case IBT_UC_SRV:
IBCM_QPINFO_UC_PATH(qp_info).cep_timeout = timeout;
break;
default:
IBTF_DPRINTF_L2(cmlog, "ibcm_invoke_rtu_qp_modify: "
"unknow svc_type = %x", qp_info.qp_trans);
break;
}
/* Call modify_qp */
status = ibt_modify_qp(statep->channel, cep_flags, &qp_info, NULL);
IBTF_DPRINTF_L4(cmlog, "ibcm_invoke_rtu_qp_modify: statep 0x%p "
"modify qp status = %d", statep, status);
if (status == IBT_SUCCESS)
ibcm_insert_trace(statep, IBCM_TRACE_RTR_RTS);
else
ibcm_insert_trace(statep, IBCM_TRACE_RTR_RTS_FAIL);
#ifdef DEBUG
print_modify_qp("RTR to RTS", statep->channel, cep_flags, &qp_info);
if (statep->channel != NULL) {
ibt_qp_query_attr_t qp_attrs;
(void) ibt_query_qp(statep->channel, &qp_attrs);
IBTF_DPRINTF_L4(cmlog, "ibcm_invoke_rtu_qp_modify: "
"qp_info.qp_state = %x", qp_attrs.qp_info.qp_state);
}
#endif
return (status);
}
/*
* ibcm_cep_state_rtu:
* QP state transition function called for an incoming RTU
* on passive side.
*
* INPUTS:
* statep - connection state pointer
* cm_rtu_msg - RTU message pointer
*
*/
void
ibcm_cep_state_rtu(ibcm_state_data_t *statep, ibcm_rtu_msg_t *cm_rtu_msgp)
{
ibt_status_t status;
ibt_cm_event_t event;
ibcm_rep_msg_t *rep_msgp = (ibcm_rep_msg_t *)
IBCM_OUT_MSGP(statep->stored_msg);
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_rtu: statep 0x%p", statep);
ASSERT(statep->channel != NULL);
/* RDMA initiator depth taken from negotiated REP values */
status = ibcm_invoke_rtu_qp_modify(statep,
ibt_usec2ib(statep->remote_ack_delay), rep_msgp);
if (status != IBT_SUCCESS) {
(void) ibcm_cep_to_error_state(statep);
/*
* Disassociate statep and QP, as there is a
* QP associated with this statep.
*/
IBCM_SET_CHAN_PRIVATE(statep->channel, NULL);
ibcm_post_rej_mad(statep, IBT_CM_NO_RESC,
IBT_CM_FAILURE_UNKNOWN, NULL, 0);
/*
* Invoke CM handler, so client/server can do
* resource cleanup. No private data can be returned here
*/
ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
IBT_CM_FAILURE_UNKNOWN, IBT_CM_NO_RESC, NULL, 0);
/* unblock any pending DREQ threads */
mutex_enter(&statep->state_mutex);
statep->cep_in_rts = IBCM_FAIL;
cv_broadcast(&statep->block_mad_cv);
mutex_exit(&statep->state_mutex);
return;
}
mutex_enter(&statep->state_mutex);
statep->state = IBCM_STATE_ESTABLISHED;
ibtl_cm_chan_is_open(statep->channel);
mutex_exit(&statep->state_mutex);
/* invoke the CM handler */
ASSERT(statep->cm_handler != NULL);
bzero(&event, sizeof (event));
event.cm_channel = statep->channel;
event.cm_session_id = NULL;
event.cm_type = IBT_CM_EVENT_CONN_EST;
if (cm_rtu_msgp != NULL) {
event.cm_priv_data = &(cm_rtu_msgp->rtu_private_data[0]);
event.cm_priv_data_len = IBT_RTU_PRIV_DATA_SZ;
}
ibcm_insert_trace(statep, IBCM_TRACE_CALLED_CONN_EST_EVENT);
(void) statep->cm_handler(statep->state_cm_private, &event, NULL,
NULL, 0);
ibcm_insert_trace(statep, IBCM_TRACE_RET_CONN_EST_EVENT);
if (ibcm_enable_trace & 4)
ibcm_dump_conn_trace(statep);
else
IBTF_DPRINTF_L2(cmlog, "ibcm_cep_state_rtu CONN_EST Channel %p",
statep->channel);
/* unblock any pending DREQ threads */
mutex_enter(&statep->state_mutex);
statep->cep_in_rts = IBCM_UNBLOCK;
cv_broadcast(&statep->block_mad_cv);
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_cep_send_rtu:
* QP state transition function called for an outgoing RTU
* on active side.
*
* INPUTS:
* statep - connection state pointer
*
* RETURN VALUE:
*/
void
ibcm_cep_send_rtu(ibcm_state_data_t *statep)
{
/* invoke the CM handler */
if (statep->cm_handler) {
ibt_cm_event_t event;
bzero(&event, sizeof (event));
event.cm_type = IBT_CM_EVENT_CONN_EST;
event.cm_channel = statep->channel;
event.cm_session_id = NULL;
event.cm_priv_data = NULL;
event.cm_priv_data_len = 0;
ibcm_insert_trace(statep, IBCM_TRACE_CALLED_CONN_EST_EVENT);
(void) statep->cm_handler(statep->state_cm_private, &event,
NULL, NULL, 0);
ibcm_insert_trace(statep, IBCM_TRACE_RET_CONN_EST_EVENT);
} else {
IBTF_DPRINTF_L2(cmlog, "ibcm_cep_send_rtu: cm_handler NULL");
}
if (ibcm_enable_trace & 4)
ibcm_dump_conn_trace(statep);
else
IBTF_DPRINTF_L2(cmlog, "ibcm_cep_send_rtu CONN_EST Channel %p",
statep->channel);
/* unblock any pending DREQ threads */
mutex_enter(&statep->state_mutex);
statep->cep_in_rts = IBCM_UNBLOCK;
cv_broadcast(&statep->block_mad_cv);
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_cep_to_error_state:
* CEP state transition function. Changes state to IBT_STATE_ERROR
*
* INPUTS:
* statep - connection state pointer
*
* RETURN VALUE:
* IBT_SUCCESS - if able to change state otherwise failure
*/
ibt_status_t
ibcm_cep_to_error_state(ibcm_state_data_t *statep)
{
ibt_status_t status = IBT_SUCCESS;
if (statep->channel != NULL) {
ibt_qp_info_t qp_info;
bzero(&qp_info, sizeof (qp_info));
/* For now, set it to RC type */
qp_info.qp_trans = IBT_RC_SRV;
qp_info.qp_state = IBT_STATE_ERROR;
/* Call modify_qp to move to ERROR state */
status = ibt_modify_qp(statep->channel, IBT_CEP_SET_STATE,
&qp_info, NULL);
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_to_error_state: "
"statep %p ibt_modify_qp() = %d", statep, status);
if (status == IBT_SUCCESS)
ibcm_insert_trace(statep, IBCM_TRACE_ERROR);
else
ibcm_insert_trace(statep, IBCM_TRACE_ERROR_FAIL);
}
#ifdef NO_EEC_SUPPORT_YET
if (statep->channel.ch_eec != NULL) {
ibt_eec_info_t eec_info;
bzero(&eec_info, sizeof (ibt_eec_info_t));
eec_info.eec_state = what;
/* Call modify_eec */
status = ibtl_cm_modify_eec(statep->channel.ch_eec, &eec_info,
IBT_CEP_SET_NOTHING);
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_to_error_state: "
"ibtl_cm_modify_eec() returned = %x", status);
}
#endif
return (status);
}
/*
* ibcm_cep_state_rej:
* QP state transition function called for an incoming REJ
* on active/passive side
*
* INPUTS:
* statep - connection state pointer
* rej_msgp - REJ message pointer
* rej_state - State where REJ processing began
*
* RETURN VALUE:
*/
void
ibcm_cep_state_rej(ibcm_state_data_t *statep, ibcm_rej_msg_t *rej_msgp,
ibcm_conn_state_t rej_state)
{
ibt_cm_event_t event;
ibt_status_t status;
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_rej: statep 0x%p", statep);
ibcm_path_cache_purge();
if ((rej_state == IBCM_STATE_REP_SENT) ||
(rej_state == IBCM_STATE_MRA_REP_RCVD)) {
status = ibcm_cep_to_error_state(statep);
IBTF_DPRINTF_L5(cmlog, "ibcm_cep_state_rej: statep 0x%p "
"ibcm_cep_to_error_state returned %d", statep,
status);
}
/* Disassociate state structure and CM */
IBCM_SET_CHAN_PRIVATE(statep->channel, NULL);
/* invoke the CM handler */
bzero(&event, sizeof (event));
if (statep->cm_handler) {
event.cm_type = IBT_CM_EVENT_FAILURE;
event.cm_channel = statep->channel;
event.cm_session_id = NULL;
/*
* copy rej_msgp->rej_private_data to
* event.cm_event.cm_priv_data
*/
event.cm_priv_data = &(rej_msgp->rej_private_data[0]);
event.cm_priv_data_len = IBT_REJ_PRIV_DATA_SZ;
event.cm_event.failed.cf_code = IBT_CM_FAILURE_REJ_RCV;
event.cm_event.failed.cf_msg = rej_msgp->rej_msg_type_plus >> 6;
event.cm_event.failed.cf_reason =
b2h16(rej_msgp->rej_rejection_reason);
IBTF_DPRINTF_L3(cmlog, "ibcm_cep_state_rej: rej_reason = %d",
event.cm_event.failed.cf_reason);
ibcm_copy_addl_rej(statep, rej_msgp, &event.cm_event.failed);
(void) statep->cm_handler(statep->state_cm_private, &event,
NULL, NULL, 0);
}
if (statep->open_return_data != NULL)
bcopy(&event.cm_event.failed.cf_additional,
&statep->open_return_data->rc_arej_info,
sizeof (ibt_arej_info_t));
if (ibcm_enable_trace != 0)
ibcm_dump_conn_trace(statep);
mutex_enter(&statep->state_mutex);
ibcm_open_done(statep);
mutex_exit(&statep->state_mutex);
}
/* Used to initialize client args with addl rej information from REJ MAD */
static void
ibcm_copy_addl_rej(ibcm_state_data_t *statep, ibcm_rej_msg_t *rej_msgp,
ibt_cm_conn_failed_t *failed)
{
uint16_t rej_reason = b2h16(rej_msgp->rej_rejection_reason);
uint8_t ari_len = rej_msgp->rej_reject_info_len_plus >> 1;
ibcm_classportinfo_msg_t tclp;
ibt_arej_info_t *cf_addl = &failed->cf_additional;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cf_addl))
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(failed->cf_arej_info_valid))
failed->cf_arej_info_valid = B_FALSE;
IBTF_DPRINTF_L3(cmlog, "ibcm_copy_addl_rej: rej_reason = %d "
"ari_len = %d", rej_reason, ari_len);
if ((statep->mode == IBCM_PASSIVE_MODE) &&
(rej_reason != IBT_CM_CONSUMER))
return;
switch (rej_reason) {
case IBT_CM_PRIM_GID:
case IBT_CM_ALT_GID:
case IBT_CM_PORT_REDIRECT:
if (ari_len < sizeof (ib_gid_t))
break;
failed->cf_arej_info_valid = B_TRUE;
bcopy(rej_msgp->rej_addl_rej_info, &cf_addl->ari_gid,
sizeof (ib_gid_t));
cf_addl->ari_gid.gid_guid = b2h64(cf_addl->ari_gid.gid_guid);
cf_addl->ari_gid.gid_prefix =
b2h64(cf_addl->ari_gid.gid_prefix);
IBTF_DPRINTF_L4(cmlog, "ibcm_copy_addl_rej: ari_gid= %llX:%llX",
cf_addl->ari_gid.gid_prefix, cf_addl->ari_gid.gid_guid);
break;
case IBT_CM_PRIM_LID:
case IBT_CM_ALT_LID:
if (ari_len < sizeof (ib_lid_t))
break;
failed->cf_arej_info_valid = B_TRUE;
bcopy(rej_msgp->rej_addl_rej_info, &cf_addl->ari_lid,
sizeof (ib_lid_t));
cf_addl->ari_lid = b2h16(cf_addl->ari_lid);
IBTF_DPRINTF_L4(cmlog, "ibcm_copy_addl_rej: ari_lid= 0x%lX",
cf_addl->ari_lid);
break;
case IBT_CM_INVALID_PRIM_SL:
case IBT_CM_INVALID_ALT_SL:
if (ari_len < 1)
break;
failed->cf_arej_info_valid = B_TRUE;
/* take the first 4 bits */
cf_addl->ari_sl = rej_msgp->rej_addl_rej_info[0] >> 4;
break;
case IBT_CM_INVALID_PRIM_TC:
case IBT_CM_INVALID_ALT_TC:
if (ari_len < 1)
break;
failed->cf_arej_info_valid = B_TRUE;
/* take the first byte */
cf_addl->ari_tclass = rej_msgp->rej_addl_rej_info[0];
break;
case IBT_CM_INVALID_PRIM_HOP:
case IBT_CM_INVALID_ALT_HOP:
if (ari_len < 1)
break;
failed->cf_arej_info_valid = B_TRUE;
/* take the first byte */
cf_addl->ari_hop = rej_msgp->rej_addl_rej_info[0];
break;
case IBT_CM_INVALID_PRIM_RATE:
case IBT_CM_INVALID_ALT_RATE:
if (ari_len < 1)
break;
failed->cf_arej_info_valid = B_TRUE;
/* take the first 6 bits */
cf_addl->ari_rate = rej_msgp->rej_addl_rej_info[0] >> 2;
break;
case IBT_CM_REDIRECT_CM:
if (ari_len < sizeof (ibcm_classportinfo_msg_t))
break;
failed->cf_arej_info_valid = B_TRUE;
bcopy(rej_msgp->rej_addl_rej_info, &tclp, sizeof (tclp));
ibcm_init_clp_from_mad(&tclp, &cf_addl->ari_redirect);
break;
case IBT_CM_INVALID_MTU:
if (ari_len < 1)
break;
failed->cf_arej_info_valid = B_TRUE;
/* take the first 4 bits */
cf_addl->ari_mtu = rej_msgp->rej_addl_rej_info[0] >> 4;
break;
case IBT_CM_CONSUMER:
if (ari_len == 0)
break;
failed->cf_arej_info_valid = B_TRUE;
if (ari_len > IBT_CM_ADDL_REJ_LEN)
ari_len = IBT_CM_ADDL_REJ_LEN;
bcopy(&rej_msgp->rej_addl_rej_info,
cf_addl->ari_consumer.rej_ari, ari_len);
cf_addl->ari_consumer.rej_ari_len = ari_len;
break;
case IBT_CM_INVALID_PRIM_FLOW:
case IBT_CM_INVALID_ALT_FLOW:
if (ari_len < 3) /* 3 bytes needed for 20 bits */
break;
failed->cf_arej_info_valid = B_TRUE;
/* take the first 20 bits */
cf_addl->ari_flow =
b2h32(*(uint32_t *)&rej_msgp->rej_addl_rej_info) >> 12;
break;
default:
break;
}
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(failed->cf_arej_info_valid))
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cf_addl))
}
/* Used to copy classportinfo to MAD from client initialized args */
static void
ibcm_init_clp_to_mad(ibcm_classportinfo_msg_t *clp, ibt_redirect_info_t *rinfo)
{
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*clp))
bcopy(&ibcm_clpinfo, clp, sizeof (ibcm_clpinfo));
clp->RedirectGID_hi = h2b64(rinfo->rdi_gid.gid_prefix);
clp->RedirectGID_lo = h2b64(rinfo->rdi_gid.gid_guid);
clp->RedirectTC_plus =
h2b32((rinfo->rdi_tclass << 24) | (rinfo->rdi_sl << 20) |
(rinfo->rdi_flow & 0xfffff));
clp->RedirectLID = h2b16(rinfo->rdi_dlid);
clp->RedirectQP_plus = h2b32(rinfo->rdi_qpn & 0xffffff);
clp->RedirectQ_Key = h2b32(rinfo->rdi_qkey);
clp->RedirectP_Key = h2b16(rinfo->rdi_pkey);
IBTF_DPRINTF_L4(cmlog, "ibcm_init_clp_to_mad: RedirectGID= %llX:%llX,"
" RedirectLID= 0x%lX", clp->RedirectGID_hi, clp->RedirectGID_lo,
clp->RedirectLID);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*clp))
}
/* Used to initialize classportinfo to be returned to clients, from MAD */
static void
ibcm_init_clp_from_mad(ibcm_classportinfo_msg_t *clp,
ibt_redirect_info_t *rinfo)
{
uint32_t temp32;
rinfo->rdi_gid.gid_prefix = b2h64(clp->RedirectGID_hi);
rinfo->rdi_gid.gid_guid = b2h64(clp->RedirectGID_lo);
temp32 = b2h32(clp->RedirectTC_plus);
rinfo->rdi_tclass = temp32 >> 24;
rinfo->rdi_sl = (temp32 >> 20) & 0xf;
rinfo->rdi_flow = temp32 & 0xffff;
rinfo->rdi_dlid = b2h16(clp->RedirectLID);
rinfo->rdi_qpn = b2h32(clp->RedirectQP_plus & 0xffffff);
rinfo->rdi_qkey = b2h32(clp->RedirectQ_Key);
rinfo->rdi_pkey = b2h16(clp->RedirectP_Key);
IBTF_DPRINTF_L4(cmlog, "ibcm_init_clp_from_mad: RedirectGID= %llX:%llX,"
" RedirectLID= 0x%lX", rinfo->rdi_gid.gid_prefix,
rinfo->rdi_gid.gid_guid, rinfo->rdi_dlid);
}
/*
* ibcm_cep_state_rej_est:
* QP state transition function called for an incoming REJ
* on active side in established state
*
* INPUTS:
* statep - connection state pointer
*
* RETURN VALUE:
*/
void
ibcm_cep_state_rej_est(ibcm_state_data_t *statep)
{
ibt_cm_event_t event;
ibt_status_t status;
IBTF_DPRINTF_L3(cmlog, "ibcm_cep_state_rej_est:");
status = ibcm_cep_to_error_state(statep);
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_rej_est: statep 0x%p "
"ibcm_cep_to_error_state returned %d", statep, status);
/* Disassociate state structure and CM */
IBCM_SET_CHAN_PRIVATE(statep->channel, NULL);
ibtl_cm_chan_is_closing(statep->channel);
/* invoke the CM handler */
if (statep->cm_handler) {
bzero(&event, sizeof (event));
event.cm_type = IBT_CM_EVENT_CONN_CLOSED;
event.cm_channel = statep->channel;
event.cm_session_id = NULL;
event.cm_priv_data = NULL;
event.cm_priv_data_len = 0;
event.cm_event.closed = IBT_CM_CLOSED_REJ_RCVD;
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_rej_est: "
"rej_reason = %d", event.cm_event.failed.cf_reason);
ibcm_insert_trace(statep, IBCM_TRACE_CALLED_CONN_CLOSE_EVENT);
(void) statep->cm_handler(statep->state_cm_private, &event,
NULL, NULL, 0);
ibcm_insert_trace(statep, IBCM_TRACE_RET_CONN_CLOSE_EVENT);
}
}
/*
* ibcm_sidr_req_ud_handler:
* Invoke Client's UD handler For SIDR_REQ msg
*
* INPUTS:
* ud_statep - ud_state pointer
* sidr_reqp - SIDR_REQ message pointer
*
* RETURN VALUE: IBCM_SEND_REP/IBCM_SEND_REJ
*/
static ibcm_status_t
ibcm_sidr_req_ud_handler(ibcm_ud_state_data_t *ud_statep,
ibcm_sidr_req_msg_t *sidr_reqp, ibcm_mad_addr_t *cm_mad_addr,
ibt_sidr_status_t *sidr_status)
{
void *priv_data = NULL;
ibt_cm_ud_event_t ud_event;
ibcm_sidr_rep_msg_t *sidr_repp;
ibt_cm_ud_return_args_t ud_ret_args;
ibt_cm_status_t cb_status;
ibt_qp_query_attr_t qp_attr;
ibt_status_t retval;
ibcm_ud_clnt_reply_info_t ud_clnt_info;
/* Check first if UD client handler is valid */
ASSERT(ud_statep->ud_cm_handler != NULL);
/* Fill in ibt_cm_ud_event_t */
ud_event.cm_type = IBT_CM_UD_EVENT_SIDR_REQ;
ud_event.cm_session_id = ud_statep;
ud_event.cm_event.sidr_req.sreq_service_id = ud_statep->ud_svc_id;
ud_event.cm_event.sidr_req.sreq_hca_guid = ud_statep->ud_hcap->hca_guid;
ud_event.cm_event.sidr_req.sreq_pkey = b2h16(sidr_reqp->sidr_req_pkey);
ud_event.cm_event.sidr_req.sreq_hca_port = cm_mad_addr->port_num;
ud_event.cm_priv_data =
&(sidr_reqp->sidr_req_private_data[0]);
ud_event.cm_priv_data_len = IBT_SIDR_REQ_PRIV_DATA_SZ;
sidr_repp =
(ibcm_sidr_rep_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg);
priv_data = &(sidr_repp->sidr_rep_private_data[0]);
bzero(&ud_ret_args, sizeof (ud_ret_args));
/* Invoke the client handler */
cb_status = ud_statep->ud_cm_handler(ud_statep->ud_state_cm_private,
&ud_event, &ud_ret_args, priv_data, IBT_SIDR_REP_PRIV_DATA_SZ);
if (cb_status == IBT_CM_DEFER) {
/* unblock any blocked cm ud proceed api calls */
mutex_enter(&ud_statep->ud_state_mutex);
ud_statep->ud_clnt_proceed = IBCM_UNBLOCK;
cv_broadcast(&ud_statep->ud_block_client_cv);
mutex_exit(&ud_statep->ud_state_mutex);
return (IBCM_DEFER);
}
/* fail any blocked ud cm proceed api calls - client bug */
mutex_enter(&ud_statep->ud_state_mutex);
ud_statep->ud_clnt_proceed = IBCM_FAIL;
cv_broadcast(&ud_statep->ud_block_client_cv);
mutex_exit(&ud_statep->ud_state_mutex);
/* do the query qp as soon as possible, after return from cm handler */
if (cb_status == IBT_CM_ACCEPT) {
retval = ibt_query_qp(ud_ret_args.ud_channel, &qp_attr);
if (retval != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_sidr_req_ud_handler: "
"Failed to retrieve QPN from the channel: %d",
retval);
*sidr_status = IBT_CM_SREP_NO_CHAN;
return (IBCM_SEND_SIDR_REP);
} else if (qp_attr.qp_info.qp_trans != IBT_UD_SRV) {
IBTF_DPRINTF_L2(cmlog, "ibcm_sidr_req_ud_handler: "
"Server/Passive returned non-UD %d transport type "
"QP", qp_attr.qp_info.qp_trans);
*sidr_status = IBT_CM_SREP_NO_CHAN;
return (IBCM_SEND_SIDR_REP);
}
ud_clnt_info.ud_qkey = qp_attr.qp_info.qp_transport.ud.ud_qkey;
ud_clnt_info.ud_qpn = qp_attr.qp_qpn;
}
ud_clnt_info.priv_data = priv_data;
ud_clnt_info.priv_data_len = ud_ret_args.ud_ret_len;
ud_clnt_info.redirect_infop = &ud_ret_args.ud_redirect;
ibcm_process_sidr_req_cm_hdlr(ud_statep, cb_status, &ud_clnt_info,
sidr_status, sidr_repp);
return (IBCM_SEND_SIDR_REP);
}
/*ARGSUSED*/
void
ibcm_process_sidr_req_cm_hdlr(ibcm_ud_state_data_t *ud_statep,
ibt_cm_status_t cb_status, ibcm_ud_clnt_reply_info_t *ud_clnt_info,
ibt_sidr_status_t *sidr_status, ibcm_sidr_rep_msg_t *sidr_repp)
{
if (cb_status == IBT_CM_DEFAULT)
cb_status = IBT_CM_REJECT;
if (cb_status == IBT_CM_ACCEPT)
*sidr_status = IBT_CM_SREP_CHAN_VALID;
else if ((cb_status == IBT_CM_REJECT) ||
(cb_status == IBT_CM_NO_RESOURCE))
*sidr_status = IBT_CM_SREP_REJ;
else if (cb_status == IBT_CM_NO_CHANNEL)
*sidr_status = IBT_CM_SREP_NO_CHAN;
else if (cb_status == IBT_CM_REDIRECT)
*sidr_status = IBT_CM_SREP_REDIRECT;
else *sidr_status = IBT_CM_SREP_REJ;
if (*sidr_status != IBT_CM_SREP_CHAN_VALID) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_sidr_req_cm_hdlr: "
"ud_handler return a failure: %d", cb_status);
if (*sidr_status == IBT_CM_SREP_REDIRECT) {
/*
* typecasting to ibcm_classportinfo_msg_t is ok, as addl info
* begins at offset 24 in sidr rep
*/
ibcm_init_clp_to_mad(
(ibcm_classportinfo_msg_t *)
&sidr_repp->sidr_rep_class_port_info,
ud_clnt_info->redirect_infop);
}
return;
}
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sidr_repp))
sidr_repp->sidr_rep_qkey =
h2b32(ud_clnt_info->ud_qkey);
sidr_repp->sidr_rep_qpn_plus = h2b32(ud_clnt_info->ud_qpn << 8);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sidr_repp))
}
/*
* ibcm_sidr_rep_ud_handler:
* Invoke Client's UD handler For SIDR_REP msg
*
* INPUTS:
* ud_statep - ud_state pointer
* sidr_rep_msgp - SIDR_REQ message pointer
*
*/
static void
ibcm_sidr_rep_ud_handler(ibcm_ud_state_data_t *ud_statep,
ibcm_sidr_rep_msg_t *sidr_rep_msgp)
{
ibt_cm_ud_event_t ud_event;
IBTF_DPRINTF_L5(cmlog, "ibcm_sidr_rep_ud_handler: ud_statep 0x%p",
ud_statep);
/* Check first if UD client handler is valid */
if (ud_statep->ud_cm_handler == NULL) {
IBTF_DPRINTF_L2(cmlog, "ibcm_sidr_rep_ud_handler: "
"cm_handler NULL");
return;
}
/* Fill in ibt_cm_ud_event_t */
ud_event.cm_type = IBT_CM_UD_EVENT_SIDR_REP;
ud_event.cm_session_id = NULL;
ud_event.cm_event.sidr_rep.srep_status =
sidr_rep_msgp->sidr_rep_rep_status;
ud_event.cm_event.sidr_rep.srep_remote_qpn =
b2h32(sidr_rep_msgp->sidr_rep_qpn_plus) >> 8;
ud_event.cm_event.sidr_rep.srep_remote_qkey =
h2b32(sidr_rep_msgp->sidr_rep_qkey);
if (ud_event.cm_event.sidr_rep.srep_status == IBT_CM_SREP_REDIRECT) {
/*
* typecasting to ibcm_classportinfo_msg_t is ok, as addl info
* begins at offset 24 in sidr rep
*/
ibcm_init_clp_from_mad(
(ibcm_classportinfo_msg_t *)
sidr_rep_msgp->sidr_rep_class_port_info,
&ud_event.cm_event.sidr_rep.srep_redirect);
if (ud_statep->ud_return_data != NULL)
bcopy(&ud_event.cm_event.sidr_rep.srep_redirect,
&ud_statep->ud_return_data->ud_redirect,
sizeof (ibt_redirect_info_t));
}
ud_event.cm_priv_data = &(sidr_rep_msgp->sidr_rep_private_data[0]);
ud_event.cm_priv_data_len = IBT_SIDR_REP_PRIV_DATA_SZ;
/* Invoke the client handler - inform only, so ignore retval */
(void) ud_statep->ud_cm_handler(ud_statep->ud_state_cm_private,
&ud_event, NULL, NULL, 0);
}
/*
* ibcm_process_lap_msg:
* This call processes an incoming LAP message
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - incoming CM LAP MAD
* cm_mad_addr - Address information for the MAD
*
* RETURN VALUE: NONE
*/
/* ARGSUSED */
void
ibcm_process_lap_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ibcm_status_t state_lookup_status;
ibcm_lap_msg_t *lap_msg = (ibcm_lap_msg_t *)
(&input_madp[IBCM_MAD_HDR_SIZE]);
ibcm_apr_msg_t *apr_msg;
ibcm_state_data_t *statep = NULL;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_lap_msg:");
rw_enter(&hcap->hca_state_rwlock, RW_READER);
state_lookup_status = ibcm_lookup_msg(IBCM_INCOMING_LAP,
b2h32(lap_msg->lap_remote_comm_id), 0, 0, hcap, &statep);
rw_exit(&hcap->hca_state_rwlock);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_lap_msg: lookup status %x"
" com id %x", state_lookup_status,
b2h32(lap_msg->lap_remote_comm_id));
if (state_lookup_status != IBCM_LOOKUP_EXISTS) {
/* Post a REJ message ? - but spec doesn't state so */
return;
}
/* There is an existing state structure entry with active comid */
ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_LAP);
mutex_enter(&statep->state_mutex);
if ((statep->state == IBCM_STATE_ESTABLISHED) &&
(statep->ap_state == IBCM_AP_STATE_IDLE) &&
(statep->mode == IBCM_PASSIVE_MODE)) {
if ((statep->lapr_msg) &&
(IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID ==
((ib_mad_hdr_t *)(input_madp))->TransactionID))
ibcm_post_stored_apr_mad(statep, input_madp);
else {
ibcm_status_t clnt_response;
statep->ap_state = IBCM_AP_STATE_LAP_RCVD;
statep->clnt_proceed = IBCM_BLOCK;
mutex_exit(&statep->state_mutex);
if (statep->lapr_msg == NULL) {
if (ibcm_alloc_out_msg(
statep->stored_reply_addr.ibmf_hdl,
&statep->lapr_msg, MAD_METHOD_SEND) !=
IBT_SUCCESS) {
mutex_enter(&statep->state_mutex);
statep->clnt_proceed = IBCM_FAIL;
cv_broadcast(&statep->block_client_cv);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
return;
}
}
apr_msg = (ibcm_apr_msg_t *)
IBCM_OUT_MSGP(statep->lapr_msg);
IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID =
((ib_mad_hdr_t *)(input_madp))->TransactionID;
clnt_response =
ibcm_cep_state_lap(statep, lap_msg, apr_msg);
IBTF_DPRINTF_L4(cmlog, "ibcm_process_lap_msg:"
" statep 0x%p apr status %d", statep,
apr_msg->apr_ap_status);
if (clnt_response == IBCM_DEFER) {
IBTF_DPRINTF_L4(cmlog, "ibcm_process_lap_msg: "
"client returned DEFER response");
return;
}
/* fail any blocked cm proceed api calls - client bug */
mutex_enter(&statep->state_mutex);
statep->clnt_proceed = IBCM_FAIL;
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
ibcm_post_apr_mad(statep);
return;
}
} /* drop the LAP MAD in any other state */
IBCM_REF_CNT_DECR(statep); /* decrement the ref count */
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_post_stored_apr_mad:
* Builds and posts an APR MAD from the stored APR MAD
*
* INPUTS:
* statep - pointer to ibcm_state_data_t
* input_madp - pointer to incoming lap mad
*
* RETURN VALUE:
* NONE
*
* This function is called holding the state mutex, and returns
* holding the state mutex
*/
static void
ibcm_post_stored_apr_mad(ibcm_state_data_t *statep, uint8_t *input_madp)
{
ibmf_msg_t *ibmf_apr_msg;
uint8_t apr_msg[IBCM_MSG_SIZE];
/* Need to make a copy, else an incoming new LAP may modify lapr_msg */
bcopy(IBCM_OUT_MSGP(statep->lapr_msg), apr_msg, IBCM_MSG_SIZE);
mutex_exit(&statep->state_mutex);
if (ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl,
&ibmf_apr_msg, MAD_METHOD_SEND) != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_post_stored_apr_mad: "
"ibcm_alloc_out_msg failed");
mutex_enter(&statep->state_mutex);
return;
}
bcopy(apr_msg, IBCM_OUT_MSGP(ibmf_apr_msg), IBCM_MSG_SIZE);
IBCM_OUT_HDRP(ibmf_apr_msg)->AttributeID =
h2b16(IBCM_INCOMING_APR + IBCM_ATTR_BASE_ID);
IBCM_OUT_HDRP(ibmf_apr_msg)->TransactionID =
((ib_mad_hdr_t *)(input_madp))->TransactionID;
ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_APR);
ibcm_post_rc_mad(statep, ibmf_apr_msg, ibcm_post_stored_apr_complete,
ibmf_apr_msg);
/* ibcm_free_out_msg done in ibcm_post_stored_apr_complete */
mutex_enter(&statep->state_mutex);
}
/*
* ibcm_cep_state_lap:
* This call processes an incoming LAP message for cep state
* transition and invoking cm handler
*
* INPUTS:
* statep - pointer to ibcm_state_data_t
* lap_msg - lap msg received
* apr_msg - apr msg to be sent
*
* RETURN VALUE: NONE
*/
ibcm_status_t
ibcm_cep_state_lap(ibcm_state_data_t *statep, ibcm_lap_msg_t *lap_msg,
ibcm_apr_msg_t *apr_msg)
{
ibt_cm_event_t event;
ibt_cm_return_args_t ret_args;
ibt_cm_status_t cb_status;
ibcm_clnt_reply_info_t clnt_info;
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_lap: statep 0x%p", statep);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*apr_msg))
/* If APM is not supported, return error */
if (!(statep->hcap->hca_caps & IBT_HCA_AUTO_PATH_MIG)) {
apr_msg->apr_ap_status = IBT_CM_AP_NOT_SUPPORTED;
return (IBCM_SEND_APR);
}
if (statep->local_qpn !=
b2h32(lap_msg->lap_remote_qpn_eecn_plus) >> 8) {
apr_msg->apr_ap_status = IBT_CM_AP_REJECT;
IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_lap: local_qpn %x does "
"not match remote's remote_qpn %x", statep->local_qpn,
b2h32(lap_msg->lap_remote_qpn_eecn_plus) >> 8);
return (IBCM_SEND_APR);
}
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*apr_msg))
/* Fill up the event */
bzero(&event, sizeof (event));
event.cm_type = IBT_CM_EVENT_LAP_RCV;
event.cm_channel = statep->channel;
event.cm_session_id = statep;
event.cm_priv_data = lap_msg->lap_private_data;
event.cm_priv_data_len = IBT_LAP_PRIV_DATA_SZ;
event.cm_event.lap.lap_timeout = ibt_ib2usec(
((uint8_t *)&lap_msg->lap_remote_qpn_eecn_plus)[3] >> 3);
ibcm_fill_adds_from_lap(&event.cm_event.lap.lap_alternate_path,
lap_msg, IBCM_PASSIVE_MODE);
cb_status = statep->cm_handler(statep->state_cm_private, &event,
&ret_args, apr_msg->apr_private_data, IBT_APR_PRIV_DATA_SZ);
IBTF_DPRINTF_L3(cmlog, "ibcm_cep_state_lap: cb_status = %d", cb_status);
if (cb_status == IBT_CM_DEFER) {
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(statep->defer_cm_msg))
if (statep->defer_cm_msg == NULL)
statep->defer_cm_msg =
kmem_zalloc(IBCM_MSG_SIZE, KM_SLEEP);
bcopy(lap_msg, statep->defer_cm_msg, IBCM_MSG_SIZE);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(statep->defer_cm_msg))
/* unblock any blocked cm proceed api calls */
mutex_enter(&statep->state_mutex);
statep->clnt_proceed = IBCM_UNBLOCK;
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
return (IBCM_DEFER);
}
clnt_info.reply_event = (ibt_cm_proceed_reply_t *)&ret_args.cm_ret;
clnt_info.priv_data = NULL;
clnt_info.priv_data_len = 0;
ibcm_process_cep_lap_cm_hdlr(statep, cb_status, &clnt_info, lap_msg,
apr_msg);
return (IBCM_SEND_APR);
}
/*
* ibcm_fill_adds_from_lap:
* Fills the address vector (part of event structure passed to
* client) from the LAP message
*
* INPUTS:
* adds - Address vector to be filled-in
* lap_msg - LAP message used to fill the address vector
*
* RETURN VALUE: NONE
*/
static void
ibcm_fill_adds_from_lap(ibt_adds_vect_t *adds, ibcm_lap_msg_t *lap_msg,
ibcm_mode_t mode)
{
adds->av_srvl = lap_msg->lap_alt_sl_plus >> 4;
if (mode == IBCM_PASSIVE_MODE) {
adds->av_dgid.gid_prefix =
b2h64(lap_msg->lap_alt_l_port_gid.gid_prefix);
adds->av_dgid.gid_guid =
b2h64(lap_msg->lap_alt_l_port_gid.gid_guid);
adds->av_sgid.gid_prefix =
b2h64(lap_msg->lap_alt_r_port_gid.gid_prefix);
adds->av_sgid.gid_guid =
b2h64(lap_msg->lap_alt_r_port_gid.gid_guid);
adds->av_dlid = b2h16(lap_msg->lap_alt_l_port_lid);
} else {
adds->av_sgid.gid_prefix =
b2h64(lap_msg->lap_alt_l_port_gid.gid_prefix);
adds->av_sgid.gid_guid =
b2h64(lap_msg->lap_alt_l_port_gid.gid_guid);
adds->av_dgid.gid_prefix =
b2h64(lap_msg->lap_alt_r_port_gid.gid_prefix);
adds->av_dgid.gid_guid =
b2h64(lap_msg->lap_alt_r_port_gid.gid_guid);
adds->av_dlid = b2h16(lap_msg->lap_alt_r_port_lid);
}
IBTF_DPRINTF_L4(cmlog, "ibcm_fill_adds_from_lap: SGID=(%llX:%llX)",
adds->av_sgid.gid_prefix, adds->av_sgid.gid_guid);
IBTF_DPRINTF_L4(cmlog, "ibcm_fill_adds_from_lap: DGID=(%llX:%llX)",
adds->av_dgid.gid_prefix, adds->av_dgid.gid_guid);
adds->av_srate = lap_msg->lap_alt_srate_plus & 0x3f;
/* next copy off the GRH info if it exists */
if ((lap_msg->lap_alt_sl_plus & 0x8) == 0) {
uint32_t flow_tclass = b2h32(lap_msg->lap_alt_flow_label_plus);
adds->av_send_grh = B_TRUE;
adds->av_flow = flow_tclass >> 12;
adds->av_tclass = flow_tclass & 0xff;
adds->av_hop = lap_msg->lap_alt_hop_limit;
} else {
adds->av_send_grh = B_FALSE;
}
}
/*
* ibcm_process_cep_lap_cm_hdlr:
* Processes the cm handler response for an incoming LAP.
*/
void
ibcm_process_cep_lap_cm_hdlr(ibcm_state_data_t *statep,
ibt_cm_status_t cb_status, ibcm_clnt_reply_info_t *clnt_info,
ibcm_lap_msg_t *lap_msg, ibcm_apr_msg_t *apr_msg)
{
ibtl_cm_hca_port_t port;
ibt_qp_query_attr_t qp_attrs;
ibt_cep_modify_flags_t cep_flags;
ibt_status_t status;
ibt_adds_vect_t *adds;
if (cb_status == IBT_CM_DEFAULT)
cb_status = IBT_CM_REJECT;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*apr_msg))
/* verify status */
apr_msg->apr_addl_info_len = 0;
if (cb_status == IBT_CM_ACCEPT) {
apr_msg->apr_ap_status = IBT_CM_AP_LOADED;
} else if (cb_status == IBT_CM_REJECT) {
apr_msg->apr_ap_status = IBT_CM_AP_REJECT;
} else if (cb_status == IBT_CM_REDIRECT) {
apr_msg->apr_ap_status = IBT_CM_AP_REDIRECT;
/* copy redirect info to APR */
apr_msg->apr_addl_info_len = sizeof (ibcm_classportinfo_msg_t);
ibcm_init_clp_to_mad(
(ibcm_classportinfo_msg_t *)apr_msg->apr_addl_info,
&clnt_info->reply_event->apr);
} else if (cb_status == IBT_CM_NO_RESOURCE) {
apr_msg->apr_ap_status = IBT_CM_AP_REJECT;
} else {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_lap_cm_hdlr: statep %p"
" Client handler unexpected return %x", statep, cb_status);
cb_status = IBT_CM_REJECT;
apr_msg->apr_ap_status = IBT_CM_AP_REJECT;
}
IBTF_DPRINTF_L4(cmlog, "ibcm_process_cep_lap_cm_hdlr: statep 0x%p "
" client handler returned %d, apr status %d", statep, cb_status,
apr_msg->apr_ap_status);
/* copy private data to outgoing apr, specified via priv_data */
if ((clnt_info->priv_data != NULL) && (clnt_info->priv_data_len > 0))
bcopy(clnt_info->priv_data, apr_msg->apr_private_data,
min(clnt_info->priv_data_len, IBT_APR_PRIV_DATA_SZ));
if (cb_status != IBT_CM_ACCEPT)
return;
if (ibt_query_qp(statep->channel, &qp_attrs) != IBT_SUCCESS ||
(qp_attrs.qp_info.qp_state != IBT_STATE_RTS &&
qp_attrs.qp_info.qp_state != IBT_STATE_SQD)) {
apr_msg->apr_ap_status = IBT_CM_AP_REJECT;
return;
}
/* Fill up input args for ibt_modify_qp */
cep_flags = IBT_CEP_SET_ALT_PATH | IBT_CEP_SET_STATE;
/* do RTS=>RTS or SQD=>SQD. The next line is needed for RTS=>RTS. */
qp_attrs.qp_info.qp_current_state = qp_attrs.qp_info.qp_state;
adds = &IBCM_QP_RC(qp_attrs).rc_alt_path.cep_adds_vect;
ibcm_fill_adds_from_lap(adds, lap_msg, IBCM_PASSIVE_MODE);
if ((status = ibtl_cm_get_hca_port(adds->av_sgid,
statep->local_hca_guid, &port)) != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_lap_cm_hdlr:"
" ibtl_cm_get_hca_port failed status %d", status);
apr_msg->apr_ap_status = IBT_CM_AP_REJECT;
return;
}
IBCM_QP_RC(qp_attrs).rc_alt_path.cep_hca_port_num = port.hp_port;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_cep_lap_cm_hdlr: statep 0x%p "
"gid = (%llx, %llx), port_num = %d", statep,
IBCM_QP_RC(qp_attrs).rc_alt_path.cep_adds_vect.av_dgid.
gid_prefix,
IBCM_QP_RC(qp_attrs).rc_alt_path.cep_adds_vect.av_dgid.gid_guid,
port.hp_port);
/* The pkey is same as the primary path */
status = ibt_pkey2index_byguid(statep->local_hca_guid,
port.hp_port, statep->pkey,
&IBCM_QP_RC(qp_attrs).rc_alt_path.cep_pkey_ix);
if (status != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_lap_cm_hdlr: statep %p"
" ibt_pkey2index_byguid failed %d", statep, status);
apr_msg->apr_ap_status = IBT_CM_AP_REJECT;
return;
}
IBCM_QP_RC(qp_attrs).rc_alt_path.cep_timeout =
lap_msg->lap_alt_local_acktime_plus >> 3;
qp_attrs.qp_info.qp_trans = IBT_RC_SRV;
if (IBCM_QP_RC(qp_attrs).rc_mig_state == IBT_STATE_MIGRATED) {
IBTF_DPRINTF_L3(cmlog, "ibcm_process_cep_lap_cm_hdlr: statep %p"
": rearming APM", statep);
cep_flags |= IBT_CEP_SET_MIG;
IBCM_QP_RC(qp_attrs).rc_mig_state = IBT_STATE_REARMED;
}
status = ibt_modify_qp(statep->channel, cep_flags, &qp_attrs.qp_info,
NULL);
if (status != IBT_SUCCESS) {
ibcm_insert_trace(statep, IBCM_TRACE_SET_ALT_FAIL);
} else
ibcm_insert_trace(statep, IBCM_TRACE_SET_ALT);
#ifdef DEBUG
(void) ibt_query_qp(statep->channel, &qp_attrs);
print_modify_qp("PASSIVE LAP QUERY", statep->channel,
cep_flags, &qp_attrs.qp_info);
#endif
if (status != IBT_SUCCESS) {
apr_msg->apr_ap_status = IBT_CM_AP_REJECT;
IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_lap_cm_hdlr:"
" ibt_modify_qp() returned = %d", status);
return;
}
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*apr_msg))
}
/*
* ibcm_post_apr_mad:
* Posts a APR MAD and starts timer
*
* INPUTS:
* statep - state pointer
*
* RETURN VALUE: NONE
*/
void
ibcm_post_apr_mad(ibcm_state_data_t *statep)
{
ibcm_apr_msg_t *apr_msgp;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*apr_msgp))
apr_msgp = (ibcm_apr_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg);
apr_msgp->apr_local_comm_id = h2b32(statep->local_comid);
apr_msgp->apr_remote_comm_id = h2b32(statep->remote_comid);
IBCM_OUT_HDRP(statep->lapr_msg)->AttributeID =
h2b16(IBCM_INCOMING_APR + IBCM_ATTR_BASE_ID);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*apr_msgp))
ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_APR);
ibcm_post_rc_mad(statep, statep->lapr_msg, ibcm_post_apr_complete,
statep);
}
/*
* ibcm_process_apr_msg:
* This call processes an incoming APR message
*
* INPUTS:
* hcap - HCA entry pointer
* input_madp - incoming CM SIDR REP MAD
* cm_mad_addr - Address information for the MAD to be posted
*
* RETURN VALUE: NONE
*/
/*ARGSUSED*/
void
ibcm_process_apr_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp,
ibcm_mad_addr_t *cm_mad_addr)
{
ibcm_status_t state_lookup_status;
ibcm_apr_msg_t *apr_msg = (ibcm_apr_msg_t *)
(&input_madp[IBCM_MAD_HDR_SIZE]);
ibcm_state_data_t *statep = NULL;
IBTF_DPRINTF_L4(cmlog, "ibcm_process_apr_msg:");
rw_enter(&hcap->hca_state_rwlock, RW_READER);
state_lookup_status = ibcm_lookup_msg(IBCM_INCOMING_APR,
b2h32(apr_msg->apr_remote_comm_id), 0, 0, hcap, &statep);
rw_exit(&hcap->hca_state_rwlock);
if (state_lookup_status != IBCM_LOOKUP_EXISTS) {
return;
}
/* if transaction id is not as expected, drop the APR mad */
if (IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID !=
((ib_mad_hdr_t *)(input_madp))->TransactionID) {
mutex_enter(&statep->state_mutex);
IBCM_REF_CNT_DECR(statep);
mutex_exit(&statep->state_mutex);
IBTF_DPRINTF_L3(cmlog, "ibcm_process_apr_msg: statep 0x%p"
": rcv'd APR MAD with comid 0x%x",
statep, b2h32(apr_msg->apr_remote_comm_id));
IBTF_DPRINTF_L3(cmlog, "ibcm_process_apr_msg: "
"tid expected 0x%llX tid found 0x%llX",
b2h64(IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID),
b2h64(((ib_mad_hdr_t *)(input_madp))->TransactionID));
return;
}
IBTF_DPRINTF_L4(cmlog, "ibcm_process_apr_msg: statep 0x%p "
"lookup status %x", statep, state_lookup_status);
mutex_enter(&statep->state_mutex);
if (!((statep->state == IBCM_STATE_ESTABLISHED) &&
((statep->ap_state == IBCM_AP_STATE_LAP_SENT) ||
(statep->ap_state == IBCM_AP_STATE_MRA_LAP_RCVD)))) {
IBCM_REF_CNT_DECR(statep); /* decrement the ref count */
mutex_exit(&statep->state_mutex);
return;
}
statep->ap_state = IBCM_AP_STATE_APR_RCVD;
/* cancel the LAP timer */
if (statep->timerid != 0) {
timeout_id_t timer_val;
timer_val = statep->timerid;
statep->timerid = 0;
mutex_exit(&statep->state_mutex);
(void) untimeout(timer_val);
} else {
mutex_exit(&statep->state_mutex);
}
ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_APR);
ibcm_cep_state_apr(statep,
(ibcm_lap_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg), apr_msg);
mutex_enter(&statep->state_mutex);
statep->ap_state = IBCM_AP_STATE_IDLE;
/* unblock any DREQ threads and close channels */
cv_broadcast(&statep->block_mad_cv);
statep->ap_done = B_TRUE;
/* wake up blocking ibt_set_alt_path */
cv_broadcast(&statep->block_client_cv);
IBCM_REF_CNT_DECR(statep); /* decrement the ref count */
mutex_exit(&statep->state_mutex);
}
static void
ibcm_set_apr_arej(int ap_status, ibcm_apr_msg_t *apr_msgp,
ibt_arej_info_t *ari, boolean_t *ari_valid)
{
uint8_t ari_len = apr_msgp->apr_addl_info_len;
ibcm_classportinfo_msg_t tclp;
*ari_valid = B_FALSE;
IBTF_DPRINTF_L3(cmlog, "ibcm_set_apr_arej: apr_status = %d "
"ari_len = %d", ap_status, ari_len);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ari))
switch (ap_status) {
case IBT_CM_AP_REDIRECT:
if (ari_len < sizeof (ibcm_classportinfo_msg_t))
break;
*ari_valid = B_TRUE;
bcopy(apr_msgp->apr_addl_info, &tclp, sizeof (tclp));
ibcm_init_clp_from_mad(&tclp, &ari->ari_redirect);
break;
case IBT_CM_AP_RLID_REJECTED:
if (ari_len < sizeof (ib_lid_t))
break;
*ari_valid = B_TRUE;
bcopy(apr_msgp->apr_addl_info, &ari->ari_lid,
sizeof (ib_lid_t));
ari->ari_lid = b2h16(ari->ari_lid);
break;
case IBT_CM_AP_RGID_REJECTED:
if (ari_len < sizeof (ib_gid_t))
break;
*ari_valid = B_TRUE;
bcopy(apr_msgp->apr_addl_info, &ari->ari_gid,
sizeof (ib_gid_t));
ari->ari_gid.gid_guid = b2h64(ari->ari_gid.gid_guid);
ari->ari_gid.gid_prefix = b2h64(ari->ari_gid.gid_prefix);
IBTF_DPRINTF_L4(cmlog, "ibcm_set_apr_arej: ari_gid= %llX:%llX",
ari->ari_gid.gid_prefix, ari->ari_gid.gid_guid);
break;
case IBT_CM_AP_FLOW_REJECTED:
if (ari_len < 3) /* 3 bytes needed for 20 bits */
break;
*ari_valid = B_TRUE;
/* take the first 20 bits */
ari->ari_flow =
b2h32(*(uint32_t *)&apr_msgp->apr_addl_info) >> 12;
break;
case IBT_CM_AP_TCLASS_REJECTED:
if (ari_len < 1)
break;
*ari_valid = B_TRUE;
/* take the first byte */
ari->ari_tclass = apr_msgp->apr_addl_info[0];
break;
case IBT_CM_AP_HOP_REJECTED:
if (ari_len < 1)
break;
*ari_valid = B_TRUE;
/* take the first byte */
ari->ari_hop = apr_msgp->apr_addl_info[0];
break;
case IBT_CM_AP_RATE_REJECTED:
if (ari_len < 1)
break;
*ari_valid = B_TRUE;
/* take the first 6 bits */
ari->ari_rate = apr_msgp->apr_addl_info[0] >> 2;
break;
case IBT_CM_AP_SL_REJECTED:
if (ari_len < 1)
break;
*ari_valid = B_TRUE;
/* take the first 4 bits */
ari->ari_sl = apr_msgp->apr_addl_info[0] >> 4;
break;
default:
break;
}
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ari))
}
/*
* ibcm_cep_state_apr:
* This call processes an incoming APR message
*
* INPUTS:
* statep - pointer to ibcm_state_data_t
* lap_msg - lap msg sent earlier
* apr_msg - apr msg received
*
* RETURN VALUE: NONE
*/
void
ibcm_cep_state_apr(ibcm_state_data_t *statep, ibcm_lap_msg_t *lap_msg,
ibcm_apr_msg_t *apr_msg)
{
ibt_cm_event_t event;
ibcm_status_t status = IBCM_SUCCESS;
uint8_t ap_status = apr_msg->apr_ap_status;
IBTF_DPRINTF_L3(cmlog, "ibcm_cep_state_apr: statep 0x%p, ap_status %d",
statep, ap_status);
if (ap_status == IBT_CM_AP_LOADED)
status = ibcm_set_qp_from_apr(statep, lap_msg);
if (statep->ap_return_data != NULL) { /* blocking call */
/* copy the private data */
if ((statep->ap_return_data->ap_priv_data != NULL) &&
(statep->ap_return_data->ap_priv_data_len > 0))
bcopy(apr_msg->apr_private_data,
statep->ap_return_data->ap_priv_data,
statep->ap_return_data->ap_priv_data_len);
/* initialize the ap status */
if (status == IBCM_FAILURE) {
statep->ap_return_data->ap_status = IBT_CM_AP_REJECT;
statep->ap_return_data->ap_arej_info_valid = B_FALSE;
} else {
statep->ap_return_data->ap_status = ap_status;
ibcm_set_apr_arej(ap_status, apr_msg,
&statep->ap_return_data->ap_arej_info,
&statep->ap_return_data->ap_arej_info_valid);
}
/* do a cv signal for a blocking ibt_set_alt_path */
mutex_enter(&statep->state_mutex);
statep->ap_done = B_TRUE;
cv_broadcast(&statep->block_client_cv);
mutex_exit(&statep->state_mutex);
} else { /* Non blocking call */
/* Fill up the event */
bzero(&event, sizeof (event));
event.cm_type = IBT_CM_EVENT_APR_RCV;
event.cm_channel = statep->channel;
event.cm_session_id = NULL;
event.cm_priv_data = apr_msg->apr_private_data;
event.cm_priv_data_len = IBT_APR_PRIV_DATA_SZ;
if (status == IBCM_FAILURE) {
event.cm_event.apr.apr_status = IBT_CM_AP_REJECT;
event.cm_event.apr.apr_arej_info_valid = B_FALSE;
} else {
event.cm_event.apr.apr_status = ap_status;
ibcm_set_apr_arej(ap_status, apr_msg,
&event.cm_event.apr.apr_arej_info,
&event.cm_event.apr.apr_arej_info_valid);
}
/* initialize the ap status */
statep->cm_handler(statep->state_cm_private, &event,
NULL, apr_msg->apr_private_data, IBT_APR_PRIV_DATA_SZ);
}
mutex_enter(&statep->state_mutex);
ibcm_open_done(statep);
mutex_exit(&statep->state_mutex);
}
/*
* ibcm_set_qp_from_apr:
* This call sets QP's alt path info based on APR message contents
*
* INPUTS:
* statep - pointer to ibcm_state_data_t
* lap_msg - lap msg sent earlier
*
* RETURN VALUE: ibcm_status_t
*/
static ibcm_status_t
ibcm_set_qp_from_apr(ibcm_state_data_t *statep, ibcm_lap_msg_t *lap_msg)
{
ibtl_cm_hca_port_t port;
ibt_adds_vect_t *adds;
ibt_qp_query_attr_t qp_attrs;
ibt_cep_modify_flags_t cep_flags;
ibt_status_t status;
IBTF_DPRINTF_L3(cmlog, "ibcm_set_qp_from_apr: statep 0x%p", statep);
status = ibt_query_qp(statep->channel, &qp_attrs);
if (status != IBT_SUCCESS ||
(qp_attrs.qp_info.qp_state != IBT_STATE_RTS &&
qp_attrs.qp_info.qp_state != IBT_STATE_SQD)) {
IBTF_DPRINTF_L2(cmlog, "ibcm_set_qp_from_apr: ibt_query_qp "
"failed, status = %d, qp_state = %d", statep, status,
qp_attrs.qp_info.qp_state);
return (IBCM_FAILURE);
}
/* Fill up input args for ibt_modify_qp */
cep_flags = IBT_CEP_SET_ALT_PATH | IBT_CEP_SET_STATE;
/* do RTS=>RTS or SQD=>SQD. The next line is needed for RTS=>RTS. */
qp_attrs.qp_info.qp_current_state = qp_attrs.qp_info.qp_state;
/* Fill up input args for ibt_modify_qp */
adds = &IBCM_QP_RC(qp_attrs).rc_alt_path.cep_adds_vect;
ibcm_fill_adds_from_lap(adds, lap_msg, IBCM_ACTIVE_MODE);
if ((status = ibtl_cm_get_hca_port(adds->av_sgid,
statep->local_hca_guid, &port)) != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_set_qp_from_apr: "
"ibtl_cm_get_hca_port failed status = %d", status);
IBTF_DPRINTF_L5(cmlog, "ibcm_set_qp_from_apr:"
" ibtl_cm_get_hca_port sgid guid %llX",
adds->av_sgid.gid_guid);
IBTF_DPRINTF_L5(cmlog, "ibcm_set_qp_from_apr:"
" ibtl_cm_get_hca_port sgid prefix %llX ",
adds->av_sgid.gid_prefix);
return (IBCM_FAILURE);
}
IBCM_QP_RC(qp_attrs).rc_alt_path.cep_hca_port_num =
port.hp_port;
IBTF_DPRINTF_L4(cmlog, "ibcm_set_qp_from_apr: "
"gid = %llx:%llx, port_num = %d",
IBCM_QP_RC(qp_attrs).rc_alt_path.cep_adds_vect.av_sgid.
gid_prefix,
IBCM_QP_RC(qp_attrs).rc_alt_path.cep_adds_vect.av_sgid.gid_guid,
port.hp_port);
/* The pkey is same as the primary path */
status = ibt_pkey2index_byguid(statep->local_hca_guid,
port.hp_port, statep->pkey,
&IBCM_QP_RC(qp_attrs).rc_alt_path.cep_pkey_ix);
if (status != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_set_qp_from_apr: "
"ibt_pkey2index_byguid failed %d", status);
return (IBCM_FAILURE);
}
qp_attrs.qp_info.qp_trans = IBT_RC_SRV;
IBCM_QP_RC(qp_attrs).rc_alt_path.cep_timeout =
ibt_usec2ib(statep->remote_ack_delay +
2 * statep->rc_alt_pkt_lt);
if (IBCM_QP_RC(qp_attrs).rc_mig_state == IBT_STATE_MIGRATED) {
/* Need to rearm */
IBTF_DPRINTF_L3(cmlog, "ibcm_set_qp_from_apr: statep 0x%p: "
"rearming APM", statep);
cep_flags |= IBT_CEP_SET_MIG;
IBCM_QP_RC(qp_attrs).rc_mig_state = IBT_STATE_REARMED;
}
status = ibt_modify_qp(statep->channel, cep_flags, &qp_attrs.qp_info,
NULL);
if (status != IBT_SUCCESS)
ibcm_insert_trace(statep, IBCM_TRACE_SET_ALT_FAIL);
else
ibcm_insert_trace(statep, IBCM_TRACE_SET_ALT);
#ifdef DEBUG
(void) ibt_query_qp(statep->channel, &qp_attrs);
print_modify_qp("ACTIVE LAP QUERY", statep->channel,
cep_flags, &qp_attrs.qp_info);
#endif
if (status != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibcm_set_qp_from_apr:"
" ibt_modify_qp() failed, status = %d", status);
return (IBCM_FAILURE);
}
return (IBCM_SUCCESS);
}
/*
* ibcm_sync_lapr_idle:
*
* This call either cancels a LAP/APR operation or waits
* until the operation is complete
*
* INPUTS:
* statep Pointer to ibcm_state_data_t
*
* RETURN VALUE: NONE
*
* This function is called holding state mutex
* This function returns, releasing the state mutex
*/
void
ibcm_sync_lapr_idle(ibcm_state_data_t *statep)
{
timeout_id_t timer_val = statep->timerid;
ibt_cm_event_t event;
IBTF_DPRINTF_L3(cmlog, "ibcm_sync_lapr_idle:"
"statep %p state %d ap_state %d", statep, statep->state,
statep->ap_state);
ASSERT(MUTEX_HELD(&statep->state_mutex));
_NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&statep->state_mutex))
/* Busy AP states on active/passive sides */
if ((statep->ap_state == IBCM_AP_STATE_LAP_RCVD) ||
(statep->ap_state == IBCM_AP_STATE_APR_RCVD) ||
(statep->ap_state == IBCM_AP_STATE_MRA_LAP_SENT) ||
(statep->ap_state == IBCM_AP_STATE_TIMED_OUT)) {
/* wait till ap_state becomes IBCM_AP_STATE_IDLE */
while (statep->ap_state != IBCM_AP_STATE_IDLE)
cv_wait(&statep->block_mad_cv, &statep->state_mutex);
mutex_exit(&statep->state_mutex);
} else if ((statep->ap_state == IBCM_AP_STATE_LAP_SENT) ||
(statep->ap_state == IBCM_AP_STATE_MRA_LAP_RCVD)) {
/* fail the client's ibt_set_alt_path */
/* blocking ibt_set_alt_path */
if (statep->ap_return_data != NULL) {
statep->ap_return_data->ap_status =
IBT_CM_AP_ABORT;
statep->ap_state = IBCM_AP_STATE_IDLE;
cv_broadcast(&statep->block_client_cv);
IBTF_DPRINTF_L3(cmlog, "ibcm_sync_lapr_idle:"
"blocked wait");
}
statep->timerid = 0;
/* Cancel the timeout */
mutex_exit(&statep->state_mutex);
if (timer_val != 0)
(void) untimeout(timer_val);
/* Non blocking ibt_set_alt_path */
if (statep->ap_return_data == NULL) {
/* Fill up the event */
bzero(&event, sizeof (event));
event.cm_type = IBT_CM_EVENT_APR_RCV;
event.cm_channel = statep->channel;
event.cm_session_id = NULL;
event.cm_priv_data = NULL;
event.cm_priv_data_len = 0;
event.cm_event.apr.apr_status = IBT_CM_AP_ABORT;
/* Call the cm handler */
statep->cm_handler(statep->state_cm_private, &event,
NULL, NULL, 0);
IBTF_DPRINTF_L3(cmlog, "ibcm_sync_lapr_idle:"
"non-blocked wait");
}
} else mutex_exit(&statep->state_mutex);
ASSERT(!MUTEX_HELD(&statep->state_mutex));
}
#ifdef DEBUG
/*
* Debug function used to print all the modify qp attributes.
* Useful to manually verify the modify qp parameters are as
* expected
*/
static void
print_modify_qp(char *prefix, ibt_qp_hdl_t ibt_qp,
ibt_cep_modify_flags_t flags, ibt_qp_info_t *qp_attr)
{
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP %s %p", prefix, ibt_qp);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP flags 0x%x", flags);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP "
"rc_rdma_ra_in %d rc_rdma_ra_out %d",
qp_attr->qp_transport.rc.rc_rdma_ra_in,
qp_attr->qp_transport.rc.rc_rdma_ra_out);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP primary: "
"port %d path bits %d dlid %X",
qp_attr->qp_transport.rc.rc_path.cep_hca_port_num,
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_src_path,
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_dlid);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP primary: "
"pkey index %d cep_timeout %d",
qp_attr->qp_transport.rc.rc_path.cep_pkey_ix,
qp_attr->qp_transport.rc.rc_path.cep_timeout);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP primary: "
"srvl %d flow label %d tclass %d",
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_srvl,
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_flow,
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_tclass);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP primary: "
"hop %d srate %d sgid_ix %d send_grh %d",
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_hop,
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_srate,
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_sgid_ix,
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_send_grh);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP primary: "
"dgid prefix %llX dgid guid %llX",
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_dgid.gid_prefix,
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_dgid.gid_guid);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP primary: "
"sgid prefix %llX sgid guid %llX",
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_sgid.gid_prefix,
qp_attr->qp_transport.rc.rc_path.cep_adds_vect.av_sgid.gid_guid);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP alternate: "
"port %d path bits %d dlid %X",
qp_attr->qp_transport.rc.rc_alt_path.cep_hca_port_num,
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_src_path,
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_dlid);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP alternate: "
"pkey index %d cep_timeout %d",
qp_attr->qp_transport.rc.rc_alt_path.cep_pkey_ix,
qp_attr->qp_transport.rc.rc_alt_path.cep_timeout);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP alternate: "
"srvl %d flow label %d tclass %d",
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_srvl,
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_flow,
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_tclass);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP alternate: "
"hop %d srate %d sgid_ix %d send_grh %d",
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_hop,
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_srate,
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_sgid_ix,
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_send_grh);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP alternate: "
"dgid prefix %llX dgid guid %llX",
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_dgid.
gid_prefix,
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_dgid.
gid_guid);
IBTF_DPRINTF_L4(cmlog, "PRINT_MODIFY_QP alternate: "
"sgid prefix %llX sgid guid %llX",
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_sgid.
gid_prefix,
qp_attr->qp_transport.rc.rc_alt_path.cep_adds_vect.av_sgid.
gid_guid);
}
#endif