sol_ib_cma.c revision c0dd49bdd68c0d758a67d56f07826f3b45cfc664
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* IB specific routines for RDMA CM functionality
*/
/* Standard driver includes */
extern char *sol_rdmacm_dbg_str;
/* Delay of 5 secs */
#define SOL_OFS_REQ_DELAY 5000000
/* Solaris CM Event Callbacks */
ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
ibt_cm_ud_return_args_t *, void *, ibt_priv_data_len_t);
/* Local functions */
struct rdma_conn_param *);
struct rdma_conn_param *);
ibcma_chan_t *);
ibcma_chan_t *);
genlist_t *);
static int ibcma_any_addr(ibt_ip_addr_t *);
static int ibcma_get_first_ib_ipaddr(struct rdma_cm_id *);
/* Utility Conversion Routines */
static void ibt_addsvect2sa_path(ibt_adds_vect_t *,
struct ib_sa_path_rec *, ib_lid_t);
ib_lid_t);
ibt_ip_addr_t *, in_port_t *);
in_port_t *);
#ifdef QP_DEBUG
static void dump_qp_info(ibt_qp_hdl_t);
#endif
static void dump_priv_data(void *, ibt_priv_data_len_t,
ibt_priv_data_len_t, char *);
/*
* RDMA CM API - Transport specific functions
*/
void
{
if (ibchanp->chan_mcast_cnt) {
"rdma_ib_destroy_id: pending mcast!!");
while (entry) {
if (status != IBT_SUCCESS)
"destroy_id: ibt_leave_mcg failed %d",
status);
}
}
}
if (ibchanp->chan_pathp) {
}
}
int
{
int ret;
/*
* If this is IF_ADDR_ANY, get info of IB port with IP @.
* Return Failure, if there are no IB ports with IP @.
*/
if (sol_cma_any_addr(addr)) {
return (ibcma_get_first_ib_ipaddr(idp));
}
if (ret == 0) {
"chan SID %llx , ps %x, port %x",
}
ret);
return (ret);
}
int
{
int ret;
/*
* Copy src_addr if the passed src @ is valid IP address and
* the local @ has not been set for this CMID.
*/
&port);
if (port) {
"local @ SID %llx, ps %x, port %x",
}
}
&port);
}
/*
* Return SUCCESS if remote address is a MCAST address
* and local address is not IF_ADDR_ANY. If local_addr
* is IF_ADDR_ANY and remote is MCAST, return FAILURE.
*/
"ib_resolve_addr - mcast dest @, local IP");
ret = 0;
} else {
}
return (0);
}
return (ret);
}
/*
* Linux OFED implementation is as below :
* 1. librdmacm sends INIT_QP_ATTR command to get QP attributes
* which the kernel CM expects QP attribute to be in. Kernel
* CM sets the QP attribute to be set and passes it back to
* user library.
* 2. librdmacm calls ibv_modify_qp() to modify the QP attribute.
* The QP attribute used is the same as the that passed by
* kernel sol_ucma.
*
* For RC connections, Solaris ibcm manages the QP state after :
* CM Event Handler is called - Passive side
* ibv_open_rc_channel(9f) - Active Side
* The client will *not* have to do an explcit modify_qp(). To fit this
* INIT_QP_ATTR commands *marks* the QP to fake it's attributes and
* ignore ibv_modify_qp() for this QP. Solaris ibcm manages QP state.
*
* Before the above calls, the client will have to maintain the QP state.
* The sol_ucma driver will pass the appropriate QP atrributes, for the
* clients to pass to ibv_modify_qp().
*
* For UD, OFED model is adhered to till the QP is transitioned to RTS.
* Any transitions after the QP has transitioned to RTS are ignored.
*/
int
int *qp_attr_mask)
{
"Ignoring Init QP Attr");
return (0);
}
"init_qp_attr, devp NULL");
return (EINVAL);
}
return (0);
qpstate == IB_QPS_RTR) {
/*
* Fill in valid values for address vector & Remote QPN.
* Fill in MTU as MTU_256 & PSN as 0. This QP will be
* reset anyway.
*/
return (0);
} else if (IS_UDP_CMID(idp)) {
return (0);
} else
return (EINVAL);
}
int
{
int ret;
"invalid IP @");
return (EINVAL);
}
else
return (ret);
}
void
{
int num_hcas;
struct rdma_cm_id *ep_idp;
/*
* Address other than IF_ADDR_ANY bound to this channel. Listen on
* this IP address alone.
*/
if (root_ibchanp->chan_devp &&
KM_SLEEP);
return;
}
/*
* Get the list of IB devs with valid IP addresses
* Append to the list of listeners for root_idp
*/
&dev_genlist);
while (entry) {
"Create listen chan- ALL");
KM_SLEEP);
}
}
int
{
struct rdma_cm_id *root_idp;
else
NULL);
if (status != IBT_SUCCESS) {
"init_root_chan: ibt_register_service ret %x"
return (EINVAL);
}
"ibt_register_service: SID %x, port %x: done",
return (0);
}
int
{
if (status != IBT_SUCCESS) {
"fini_root_chan: ibt_deregister_service ret %x",
status);
return (EINVAL);
}
return (0);
}
void
{
}
int
{
struct rdma_cm_id *root_idp;
"fini_ep_chan(%p)", ep_chanp);
if (ep_ibchanp->chan_devp)
if (ep_ibchanp->chan_pathp)
if (!ep_listenp->listen_ib_sbind_hdl)
return (0);
if (status != IBT_SUCCESS) {
"fini_ep_chan(%p) : ibt_unbind_service() ret %d",
status);
return (-1);
}
return (0);
}
{
struct rdma_cm_id *root_idp;
if (root_ibchanp->chan_sid == 0) {
}
}
/*ARGSUSED*/
int
{
struct rdma_cm_id *root_idp;
if (status != IBT_SUCCESS) {
"ibt_bind_service failed with %x", status);
return (EINVAL);
}
return (0);
}
#define SOL_REP_PRIV_DATA_SZ 208
int
{
idp, conn_param);
"Active side, cm_proceed not needed");
return (0);
}
if (!conn_param) {
return (0);
}
/* Fill cm_reply */
if (priv_len) {
privp = (void *)kmem_zalloc(
#ifdef DEBUG
#endif
}
if (status != IBT_SUCCESS) {
"ibt_ofuvcm_proceed failed %x", status);
if (privp)
return (EINVAL);
}
if (privp)
} else {
if (priv_len) {
privp = (void *)kmem_zalloc(
#ifdef DEBUG
#endif
}
if (status != IBT_SUCCESS) {
"ibt_cm_ud_proceed failed %x", status);
if (privp)
return (EINVAL);
}
if (privp)
}
return (0);
}
int
{
"rdma_ib_reject(%p, %p, %x)", idp,
"chan_session_id NULL");
return (EINVAL);
}
if (private_data_len) {
KM_SLEEP);
#ifdef DEBUG
private_data_len, "ib_reject");
#endif
}
"calling ibt_cm_proceed");
if (status != IBT_SUCCESS) {
"ibt_cm_proceed failed %x", status);
if (privp)
return (EINVAL);
}
} else {
"calling ibt_cm_ud_proceed");
if (status != IBT_SUCCESS) {
"ibt_cm_ud_proceed failed %x", status);
if (privp)
return (EINVAL);
}
}
if (privp)
return (0);
}
int
{
if (status != IBT_SUCCESS) {
"ib_disconnect(%p) Reject for incoming REQ "
"failed, status %d", status);
return (EINVAL);
}
}
}
NULL, 0);
if (status != IBT_SUCCESS) {
"ib_disconnect(%p) Reject for incoming REQ "
"failed, status %d", status);
return (EINVAL);
}
}
}
/*
* Close RC channel for RC.
* No explicit Disconnect required for UD
*/
/*
* No callbacks for CMIDs for which CONNECT has been
* initiated but not completed.
*/
if (status != IBT_SUCCESS) {
"disconnect: close_rc_channel failed %x",
status);
return (EINVAL);
}
} else
return (0);
}
int
void *context)
{
return (EINVAL);
}
ibmcastp);
/* Check if input @ to rdma_join_mcast is multicast IP @ */
0xE0000000))) {
"Invalid IP addr specified");
return (EINVAL);
}
if (sol_cma_any_addr(addr)) {
} else {
}
if (status != IBT_SUCCESS) {
"ibt_join_mcg failed with status %d", status);
return (EINVAL);
}
NULL);
return (0);
}
void
{
sizeof (struct sockaddr)) == 0) {
break;
}
}
"leave_mcast: No matching @");
return;
}
if (status != IBT_SUCCESS)
"ibt_leave_mcg failed %d", status);
}
/* Local Functions */
#define SOL_REQ_PRIV_DATA_SZ 96
static int
struct rdma_conn_param *conn_paramp)
{
sizeof (ibt_ip_addr_t));
sizeof (ibt_ip_addr_t));
if (status != IBT_SUCCESS) {
"ibt_format_ip_private_data failed!!");
return (EINVAL);
}
if (conn_paramp->private_data_len) {
void *dest;
}
/*
* Set the RDMA related flags for this QP, if required.
*/
if (conn_paramp->initiator_depth) {
flags |= IBT_CEP_RDMA_RD;
}
if (conn_paramp->responder_resources) {
flags |= IBT_CEP_RDMA_WR;
}
"tcp_connect: Calling ibt_modify_rdma(%p, %x)",
if (status != IBT_SUCCESS) {
"ibt_open_rdma failed %x", status);
return (EINVAL);
}
}
if (status != IBT_SUCCESS) {
"tcp_connect: ibv_open_rc_channel failed %x",
status);
return (EINVAL);
}
return (0);
}
static int
struct rdma_conn_param *conn_paramp)
{
/* We always select the first path */
if (conn_paramp->private_data_len) {
(void *)(((char *)attr.ud_priv_data) +
}
sizeof (ibt_ip_addr_t));
sizeof (ibt_ip_addr_t));
if (status != IBT_SUCCESS) {
"ibt_format_ip_private_data() failed with status %d",
status);
return (EINVAL);
}
if (status != IBT_SUCCESS) {
"ibt_ud_get_dqpn failed with status %x", status);
return (EINVAL);
}
return (0);
}
static int
{
return (-1);
/* Get the port_info and the pkey */
if (status != IBT_SUCCESS) {
"query_hca_port failed rc %d", status);
return (-1);
} else {
int index;
pkey);
}
return (0);
}
static int
{
return (0);
}
IBT_SUCCESS) {
"ib_cma_get_devinfo:status %d, %p not IB IP @",
return (EINVAL);
}
"ib_cma_get_devinfo:init_devinfo failed");
return (EINVAL);
}
return (0);
}
extern void cma_resolve_addr_callback(sol_cma_chan_t *, int);
static void
{
int i;
return;
}
sizeof (ibt_path_info_t));
sizeof (ibt_ip_addr_t));
return;
}
}
return;
}
for (i = 0; i < ibchanp->chan_numpaths; i++)
}
static int
{
ibchanp);
sizeof (ibt_ip_addr_t));
if (status != IBT_SUCCESS) {
"cma_get_paths : ibt_aget_paths() failed %d", status);
return (EINVAL);
}
return (0);
}
/*
* Solaris Event Handlers
*/
/* UD Event Handler */
/*ARGSUSED*/
static ibt_cm_status_t
{
enum rdma_cm_event_type event;
int evt_status = -1;
void *cm_priv;
void *find_ret;
/*
* Reject further REQs if destroy of listen CMID
* has been called.
*/
if (chan_state == SOL_CMA_CHAN_DESTROY_PENDING ||
"listen CMID destroy called");
return (IBT_CM_REJECT);
}
if (cm_privlen < IBT_IP_HDR_PRIV_DATA_SZ) {
"Priv data len %x < %x", cm_privlen,
return (IBT_CM_REJECT);
}
if (ibt_status != IBT_SUCCESS) {
"ibt_get_ip_data failed, %x", ibt_status);
return (IBT_CM_REJECT);
}
"create_new_id failed!!");
return (IBT_CM_REJECT);
}
sizeof (ibt_ip_addr_t));
/*
* Increment number of Reqs for listening CMID,
* so that listening CMID is not deleted, till this
* connection expects no more events.
* chan_req_cnt is decremented connection is
* notified to the consumer.
*
* Insert the CMID into the REQ_AVL_TREE. This is
* deleted when the connection is accepted or rejected.
*/
"Add to REQ AVL of %p IDP, idp %p, session_id %p",
if (find_ret) {
"DUPLICATE ENTRY in REQ AVL : root %p, "
"idp %p, session_id %p",
return (IBT_CM_REJECT);
}
evt_status = 0;
evt_status = 0;
} else {
"SIDR Response err with status %x",
goto ud_gen_event;
}
}
if (evt_status == 0 && cm_privlen) {
#ifdef DEBUG
#endif
}
/* Pass back the event to sol_cma consumer */
if (ud_paramp->private_data)
return (IBT_CM_DEFER);
else
return (IBT_CM_DEFAULT);
}
static ibt_cm_status_t
{
void *find_ret;
#ifdef DEBUG
void *dump_priv;
#endif
/*
* Reject further REQs if destroy of listen CMID
* has been called.
*/
if (chan_state == SOL_CMA_CHAN_DESTROY_PENDING ||
"listen CMID destroy called");
return (IBT_CM_REJECT);
}
*evt_status = 0;
? 1 : 0;
#ifdef DEBUG
#endif /* DEBUG */
if (cm_privlen < IBT_IP_HDR_PRIV_DATA_SZ) {
"Priv data len %x < %x", cm_privlen,
return (IBT_CM_REJECT);
}
if (ibt_status != IBT_SUCCESS) {
"ibt_get_ip_data failed, %x", ibt_status);
return (IBT_CM_REJECT);
}
sizeof (ibt_ip_addr_t));
if (ibt_status != IBT_SUCCESS) {
"ibt_ofuvcm_get_req_data failed, %x", ibt_status);
return (IBT_CM_REJECT);
}
if (paramp->private_data_len) {
KM_SLEEP);
}
"create_new_id failed!!");
if (paramp->private_data)
return (IBT_CM_REJECT);
}
/*
* Fill the route, device and port_num.
* TBD - Fill up packet_life_time
*/
reqp->req_hca_guid));
KM_SLEEP);
if (reqp->req_alt_hca_port) {
}
(void *)(&event_ibchanp->chan_rcreq_addr),
sizeof (ibt_adds_vect_t));
sizeof (ibt_ofuvcm_req_data_t));
/*
* Increment number of Reqs for listening CMID, so that
* listening CMID is not deleted, till this connection
* expects no more events. chan_req_cnt is decremented
* when connection is notified to the consumer.
*
* Insert the CMID into the REQ_AVL_TREE. This is
* deleted when the connection is accepted or rejected.
*/
"Add to REQ AVL of %p IDP, idp %p, session_id %p",
if (find_ret) {
"DUPLICATE ENTRY in REQ AVL : root %p, "
"idp %p, session_id %p",
return (IBT_CM_REJECT);
}
return (IBT_CM_DEFER);
}
static void
{
struct rdma_conn_param *paramp;
#ifdef DEBUG
#endif
if (paramp->private_data_len) {
KM_SLEEP);
}
}
static ibt_cm_status_t
{
/* Established event on active / client side */
*event_id_ptr = idp;
sizeof (struct rdma_conn_param));
if (paramp->private_data_len) {
(void *)paramp->private_data,
}
event_chanp = chanp;
goto est_common;
}
event_chanp = NULL;
"No matching CMID for qp_hdl %p in ACPT AVL of CMID %p",
return (IBT_CM_REJECT);
}
#ifdef QP_DEBUG
#endif
/*
* Pass back CONNECT_ESTABLISHED event to consumer.
*/
*evt_status = 0;
return (IBT_CM_DEFAULT);
}
static ibt_cm_status_t
{
*evt_status = 0;
if (!root_idp) {
*event_id_ptr = idp;
return (IBT_CM_DEFAULT);
}
/* On the passive side, search ACPT AVL Tree */
"ibcma_handle_closed: "
"No matching CMID for qp hdl %p in EST AVL of CMID %p",
return (IBT_CM_DEFAULT);
}
return (IBT_CM_DEFAULT);
}
static ibt_cm_status_t
{
"cf_code %x, cf_msg %x, cf_arej_info_valid %x, cf_reason %x",
*evt_status = 0;
case IBT_CM_FAILURE_REJ_SENT :
/* Reject sent. No event to userland. */
break;
case IBT_CM_FAILURE_REJ_RCV :
/*
* Reject recieved. If this is a consumer reject, copy the
* private * data. Send RDMA_CM_EVENT_REJECTED to user land.
*/
(void *)paramp->private_data,
}
/*
* If this an REJECT for an accepted CMID, pass the
* event to accepted CMID.
*/
if (root_idp) {
eventp->cm_channel);
"ibcma_handle_failed: No matching CMID "
"for qp_hdl %p in ACPT AVL of CMID %p",
break;
}
} else {
*event_id_ptr = idp;
}
break;
case IBT_CM_FAILURE_TIMEOUT :
/*
* Connection Timeout, Send RDMA_CM_EVENT_REJECTED event and
* status as IBT_CM_TIMEOUT.
*/
"ibcma_handle_failed: No matching CMID "
"for qp_hdl %p in REQ AVL of CMID %p",
break;
}
}
"ibcma_handle_failed: timeout "
"session_id NULL");
}
if (!root_idp) {
*event_id_ptr = idp;
}
break;
case IBT_CM_FAILURE_STALE :
/* Stale connection, ignore */
break;
}
return (IBT_CM_DEFAULT);
}
static ibt_cm_status_t
{
enum rdma_cm_event_type event;
int event_status;
case IBT_CM_EVENT_REQ_RCV :
"ibcma_rc_hdlr : REQ Event");
/*
* We need to do a round trip to userland. Send a MRA
* so that the client does not send multiple REQs. Then
* continue the processing of REQs.
*/
if (ibt_status != IBT_SUCCESS) {
"ibcma_rc_hdlr : ibt_cma_delay failed %x",
return (IBT_CM_REJECT);
}
&event, &event_status);
if (status == IBT_CM_REJECT)
return (status);
break;
case IBT_CM_EVENT_REP_RCV :
"ibcma_rc_hdlr : REP Event");
return (IBT_CM_DEFAULT);
/* NOTREACHED */
/* break; */
case IBT_CM_EVENT_LAP_RCV :
case IBT_CM_EVENT_APR_RCV :
/*
* Alternate Paths not supported from userland. Return
* IBT_CM_REJECT.
*/
"ibcma_rc_hdlr : AP Event");
return (IBT_CM_REJECT);
/* NOTREACHED */
/* break; */
case IBT_CM_EVENT_MRA_RCV :
/* Let Solaris ibcm take default action for MRA */
"ibcma_rc_hdlr : MRA Event");
return (IBT_CM_DEFAULT);
/* NOTREACHED */
/* break; */
case IBT_CM_EVENT_CONN_EST :
"ibcma_rc_hdlr : EST Event");
&event, &event_status);
break;
case IBT_CM_EVENT_CONN_CLOSED :
/*
* Pass on RDMA_CM_EVENT_DISCONNECTED to consumer
*/
"ibcma_rc_hdlr : CLOSED Event");
&event, &event_status);
break;
case IBT_CM_EVENT_FAILURE :
/* Handle Failure Event */
"ibcma_rc_hdlr : FAIL Event");
&event, &event_status);
/*
* Check if there is an event to be send to the userland.
* Return if there are none.
*/
if (event_status == 0)
return (status);
break;
}
/* Pass back the event to sol_cma consumer */
if (event_idp) {
} else
"No Event to userland!!");
if (paramp->private_data)
return (status);
}
static void
{
struct rdma_cm_id *idp;
int evt_status;
enum rdma_cm_event_type event;
if (status == IBT_SUCCESS) {
}
/* Send the event to consumer of sol_cma. */
}
static int
{
int num_hcas, info_inited = 0;
while (entry) {
if (info_inited == 0) {
info_inited = 1;
} else {
}
}
if (info_inited)
return (0);
else
return (ENODEV);
}
/* Utility Conversion functions */
static void
{
struct sockaddr_in *sock_in4p;
if (portp)
} else {
struct sockaddr_in6 *in6_addr;
sizeof (in6_addr_t));
if (portp)
}
}
static void
{
struct sockaddr_in *sock_in4p;
if (IS_UDP_CMID(idp))
else
if (portp)
} else {
struct sockaddr_in6 *in6_addr;
sizeof (in6_addr_t));
if (portp)
}
}
static void
{
/*
* Libraries expect the GID to be in network order. Convert
* to network order before passing it to the library.
*/
}
static void
{
}
static void
{
/*
* Libraries expect the GID to be in network order. Convert
* to network order before passing it to the library.
*/
}
static void
{
}
static void
{
}
/*
* Map a multicast IP onto multicast MAC for type IP-over-InfiniBand.
* Leave P_Key as 0 to be filled in by caller
*/
static void
{
buf[0] = 0; /* Reserved */
buf[9] = 0;
buf[10] = 0;
buf[11] = 0;
buf[12] = 0;
buf[13] = 0;
buf[14] = 0;
buf[15] = 0;
addr >>= 8;
addr >>= 8;
addr >>= 8;
}
static void
{
0xFF10A01B) {
} else {
}
}
static int
{
return (0);
}
static struct rdma_cm_id *
{
struct rdma_cm_id *new_idp;
return (new_idp);
KM_SLEEP);
}
}
sizeof (ibt_ip_addr_t));
sizeof (ibt_ip_addr_t));
return (new_idp);
}
static void
{
int i;
for (i = 0; i < num_hcas; i++) {
if (status != IBT_SUCCESS) {
"ibt_query_hca_ports_byguid failed %d", status);
continue;
}
continue;
for (s = 0; s < num_sgids; s++) {
/* Skip holes in sgid table */
continue;
/* Skip holes in pkey table */
continue;
tmp->p_sgid_tbl[s],
&hca_ipaddr);
if (status != IBT_SUCCESS)
continue;
/* allocate devinfo & fill in info */
devp = kmem_zalloc(
sizeof (ibcma_dev_t), KM_SLEEP);
sizeof (ibt_ip_addr_t));
"get_devlist: add2devlist "
"node_guid %llx", hca_guidp[i]);
(void) add_genlist(ret_devlist,
}
}
}
}
}
#ifdef QP_DEBUG
static void
{
if (status != IBT_SUCCESS) {
return;
}
"QP HDL : %p, qp_sq_cq %p, qp_rq_cq %p, "
"qp_rdd_hdl %p, qp_qpn %x, qp_sq_sgl %x, qp_rq_sgl %x, "
"qp_srq %p, quer_attr.qp_flags %x",
"qp_sq_sz %x, qp_rq_sz %x, qp_state %x, "
"qp_current_state %x, qp_info.qp_flags %x, qp_trans %x",
"rc_sq_psn %x, rc_rq_psn %x, rc_dst_qpn %x, "
"rc_mig_state %x, rc_rnr_retry_cnt %x, rc_retry_cnt %x, "
"rc_rdma_ra_out %x, rc_rdma_ra_in %x, rc_min_rnr_nak %x, "
"av_dgid %llx: %llx, av_sgid: %llx, "
"srate %x, srvl %x, flow %x, tclass %x, hop %x, "
"av_port_num %x, av_send_grh %x, av_dlid %x, "
"av_src_path %x, av_sgid_ix %x, pkey_index %x, "
"port_num %x",
}
#endif
static void
{
uint8_t i;
return;
/* Display in rows of 16 uchar_t */
for (i = 0; i < arr_len; i += 16)
"%x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x",
c[i], c[i + 1], c[i + 2], c[i + 3], c[i + 4], c[i + 5],
c[i + 6], c[i + 7], c[i + 8], c[i + 9], c[i + 10],
c[i + 11], c[i + 12], c[i + 13], c[i + 14], c[i + 15]);
}