enx_ibt.c revision b494511a9cf72b1fc4eb13a0e593f55c624ab829
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * CDDL HEADER START
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * The contents of this file are subject to the terms of the
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Common Development and Distribution License (the "License").
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * You may not use this file except in compliance with the License.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * See the License for the specific language governing permissions
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * and limitations under the License.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * When distributing Covered Code, include this CDDL HEADER in each
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * If applicable, add the following below this CDDL HEADER, with the
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * fields enclosed by brackets "[]" replaced with your own identifying
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * information: Portions Copyright [yyyy] [name of copyright owner]
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * CDDL HEADER END
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Module (static) info passed to IBTL during ibt_attach
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "EoIB Nexus"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Static function declarations
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic int eibnx_state_init(void);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic int eibnx_join_advertise_mcg(eibnx_thr_info_t *);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic void eibnx_rb_state_init(void);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic void eibnx_rb_join_solicit_mcg(eibnx_thr_info_t *);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic void eibnx_rb_join_advertise_mcg(eibnx_thr_info_t *);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * eibnx_ibt_init() is expected to be called during the nexus driver's
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * attach time; given that there is only one instance of the nexus
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * driver allowed, and no threads are active before the initialization
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * is complete, we don't really have to acquire any driver specific mutex
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * within this routine.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Do per-state initialization
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Attach to IBTL
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if ((ret = ibt_attach(&eibnx_clnt_modinfo, ss->nx_dip, ss,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Get the list of HCA guids on the system
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if ((ret = ibt_detach(ss->nx_ibt_hdl)) != IBT_SUCCESS) {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Open the HCAs and store the handles
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (i = 0; i < num_hcas; i++) {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * If we cannot open a HCA, allocate a protection domain
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * on it or get portinfo on it, print an error and move on
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * to the next HCA. Otherwise, queue it up in our hca list
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin * Free the HCA guid list we've allocated via ibt_get_hca_list()
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Put the hca list in the state structure
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Register for subnet notices
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin * Initialize synchronization primitives
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin mutex_init(&ss->nx_nodeq_lock, NULL, MUTEX_DRIVER, NULL);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin mutex_init(&ss->nx_busop_lock, NULL, MUTEX_DRIVER, NULL);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Initialize well-known mgids: there must be a better way to
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * do this instead of having to express every single gid as a
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * tuple of two 8-byte integer quantities.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin enx_advertise_mgid.gid_prefix = EIB_GUID_ADVERTISE_PREFIX;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Start up the eoib node creation thread
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin kt = thread_create(NULL, 0, eibnx_create_eoib_node, NULL, 0,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Locate the two multicast groups: the All-EoIB-GWs-GID and
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * All-EoIB-ENodes-GID. Make sure the MTU is something that
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * we can work with and Qkey is as expected.
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz if ((info->ti_mcg_status & ENX_MCGS_FOUND) == ENX_MCGS_FOUND) {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Request GID defining this port
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * First, locate the multicast group to use for sending solicit
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * requests to the GW
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if ((ret = ibt_query_mcg(rgid, &mcg_attr, 1, &info->ti_solicit_mcg,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ENX_DPRINTF_WARN("solicit mcg (gid=%llx.%llx) not found, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "ibt_query_mcg() returned %d", enx_solicit_mgid.gid_prefix,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Make sure the multicast mtu isn't bigger than the port mtu
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * and the multicast group's qkey is the same as EIB_FIP_QKEY.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ENX_DPRINTF_WARN("solicit mcg (gid=%llx.%llx) mtu too big, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin enx_solicit_mgid.gid_guid, info->ti_solicit_mcg->mc_mtu,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ENX_DPRINTF_WARN("solicit mcg (gid=%llx.%llx) qkey bad, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "actual=0x%x, expected=0x%x", enx_solicit_mgid.gid_prefix,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin enx_solicit_mgid.gid_guid, info->ti_solicit_mcg->mc_qkey,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Now, locate the multicast group for receiving discover
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * advertisements from the GW
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if ((ret = ibt_query_mcg(rgid, &mcg_attr, 1, &info->ti_advertise_mcg,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ENX_DPRINTF_WARN("advertise mcg (gid=%llx.%llx) not found, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "ibt_query_mcg() returned %d",
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Verify the multicast group's mtu and qkey as before
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ENX_DPRINTF_WARN("advertise mcg (gid=%llx.%llx) mtu too big, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ENX_DPRINTF_WARN("advertise mcg (gid=%llx.%llx) qkey bad, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "actual=0x%x, expected=0x%x",
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin enx_advertise_mgid.gid_prefix, enx_advertise_mgid.gid_guid,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Allocate and setup a single completion queue for tx and rx
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Get this HCA's attributes
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ENX_DPRINTF_ERR("ibt_query_hca(hca_hdl=0x%llx) failed, ret=%d",
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Allocate a completion queue for our sends and receives
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin cq_attr.cq_size = (hca_attr.hca_max_cq_sz < ENX_CQ_SIZE) ?
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ret = ibt_alloc_cq(info->ti_hca, &cq_attr, &info->ti_cq_hdl, &sz);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ENX_DPRINTF_ERR("ibt_alloc_cq(hca_hdl=0x%llx, cq_sz=0x%lx) "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Set up other parameters for collecting completion information
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin info->ti_wc = kmem_zalloc(sizeof (ibt_wc_t) * sz, KM_SLEEP);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Allocate and setup the UD channel parameters
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Protect against arbitrary additions to the chan_alloc_args
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * and chan_query_attr structures (make sure the ones we don't
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * use are zero'd).
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * This ud channel is not going to be used by the nexus driver
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * to send any LSO packets, so we won't need the IBT_USES_LSO flag.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ret = ibt_pkey2index(info->ti_hca, info->ti_pi->p_port_num,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "port_num=0x%x, pkey=0x%x) failed, ret=%d",
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ret = ibt_alloc_ud_channel(info->ti_hca, IBT_ACHAN_NO_FLAGS,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ENX_DPRINTF_ERR("ibt_alloc_ud_channel(hca_hdl=0x%llx, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "cs_sq=0x%lx, cs_rq=0x%lx) failed, ret=%d",
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ret = ibt_query_ud_channel(info->ti_chan, &query_attr);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ENX_DPRINTF_ERR("ibt_query_ud_channel(chan_hdl=0x%llx) "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if ((ret = ibt_free_channel(info->ti_chan)) != IBT_SUCCESS) {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Set up the transmit buffers for communicating with the gateway. Since
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * the EoIB Nexus driver only exchanges control messages with the
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * gateway, we don't really need too much space.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Allocate for the tx buf
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin snd_p->tx_vaddr = (ib_vaddr_t)(uintptr_t)kmem_zalloc(tx_bufsz,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Register the memory region with IBTF for use
return (ENX_E_FAILURE);
for (i = 0; i < ENX_NUM_SWQE; i++) {
return (ENX_E_FAILURE);
return (ENX_E_SUCCESS);
KM_SLEEP);
return (ENX_E_FAILURE);
for (i = 0; i < ENX_NUM_RWQE; i++) {
return (ENX_E_SUCCESS);
return (ENX_E_FAILURE);
return (ENX_E_FAILURE);
for (i = 0; i < ENX_NUM_RWQE; i++) {
NULL);
return (ENX_E_FAILURE);
return (ENX_E_SUCCESS);
int rv;
return (ENX_E_FAILURE);
return (ENX_E_SUCCESS);
return (ENX_E_FAILURE);
sizeof (ibt_mcg_info_t));
return (ENX_E_SUCCESS);
return (ENX_E_FAILURE);
return (ENX_E_FAILURE);
sizeof (ibt_mcg_info_t));
return (ENX_E_FAILURE);
return (ENX_E_SUCCESS);
return (ENX_E_FAILURE);
return (ENX_E_FAILURE);
return (ENX_E_FAILURE);
return (ENX_E_FAILURE);
return (ENX_E_SUCCESS);
return (ENX_E_FAILURE);
return (ENX_E_FAILURE);
return (ENX_E_SUCCESS);
return (ENX_E_FAILURE);
return (ENX_E_FAILURE);
return (ENX_E_SUCCESS);
eibnx_rb_state_init(void)
if (thr_id) {
for (i = 0; i < ENX_NUM_SWQE; i++) {
ret =
for (i = 0; i < ENX_NUM_RWQE; i++) {
return (NULL);
return (NULL);
return (NULL);
if (port_tail) {
return (NULL);
return (hca);
return (ENX_E_FAILURE);
return (ENX_E_FAILURE);
return (ENX_E_SUCCESS);