2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A
2N/A/*
2N/A * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
2N/A * Use is subject to license terms.
2N/A */
2N/A
2N/A#include "dapl.h"
2N/A#include "dapl_tavor_hw.h"
2N/A#include "dapl_tavor_wr.h"
2N/A#include "dapl_tavor_ibtf_impl.h"
2N/A
2N/A#define HERMON_WQE_SGL_INVALID_LKEY 0x00000100
2N/A#define HERMON_WQE_SEND_FENCE_MASK 0x40
2N/A#define HERMON_WQE_NDS_MASK 0x3F
2N/A
2N/A#define HERMON_CQDB_NOTIFY_CQ_SOLICIT (0x1 << 24)
2N/A#define HERMON_CQDB_NOTIFY_CQ (0x2 << 24)
2N/A
2N/A#define HERMON_CQE_RCV_SEND 0x1
2N/A#define HERMON_CQE_ERR_OPCODE 0x1E
2N/A#define HERMON_CQE_RESIZE_OPCODE 0x16
2N/A#define HERMON_CQE_OPCODE_GET(cqe) (((uint8_t *)cqe)[31] & 0x1F)
2N/A#define HERMON_CQE_SENDRECV_GET(cqe) (((uint8_t *)cqe)[31] & 0x40)
2N/A#define HERMON_CQE_OWNER_IS_SW(cq, cqe) ((((uint8_t *)cqe)[31] >> 7) == \
2N/A ((cq->cq_consindx & cq->cq_size) >> cq->cq_log_cqsz))
2N/A
2N/A#define HERMON_QP_WQEADDRSZ(wcnt) ((uint32_t)(wcnt << 6))
2N/A
2N/A#define HERMON_WQE_SEND_SIGNALED_MASK 0x0000000C00000000ull
2N/A#define HERMON_WQE_SEND_SOLICIT_MASK 0x0000000200000000ull
2N/A#define HERMON_WQE_SETCTRL(desc, ctrl) \
2N/A ((uint64_t *)(desc))[1] = HTOBE_64(ctrl)
2N/A#define HERMON_WQE_SETNEXT(desc, nopcode, size, fence) \
2N/A ((uint64_t *)(desc))[0] = HTOBE_64((nopcode) | (size) | (fence) | \
2N/A (((uint64_t)((uint8_t *)desc)[0] &0x80) << 56))
2N/A#define HERMON_WQE_BUILD_DATA_SEG(ds, sgl) \
2N/A{ \
2N/A uint64_t *tmp; \
2N/A \
2N/A tmp = (uint64_t *)(ds); \
2N/A tmp[1] = HTOBE_64((sgl)->ds_va); \
2N/A ((uint32_t *)tmp)[1] = HTOBE_32((sgl)->ds_key); \
2N/A membar_producer(); \
2N/A ((uint32_t *)tmp)[0] = HTOBE_32((sgl)->ds_len); \
2N/A}
2N/A
2N/A
2N/A/* handy macro, useful because of cq_resize dynamics */
2N/A#define cq_wrap_around_mask (cq->cq_size - 1)
2N/A
2N/Apthread_spinlock_t hermon_bf_lock;
2N/A
2N/A/*
2N/A * Function signatures
2N/A */
2N/Aextern uint64_t dapls_tavor_wrid_get_entry(ib_cq_handle_t, tavor_hw_cqe_t *,
2N/A uint_t, uint_t, dapls_tavor_wrid_entry_t *);
2N/Aextern void dapls_tavor_wrid_cq_reap(ib_cq_handle_t);
2N/Aextern DAPL_OS_LOCK g_tavor_uar_lock;
2N/A
2N/A#ifndef _LP64
2N/Aextern void dapls_atomic_assign_64(uint64_t, uint64_t *);
2N/A#endif
2N/A
2N/Astatic int dapli_hermon_wqe_send_build(ib_qp_handle_t, ibt_send_wr_t *,
2N/A uint64_t *, uint_t *);
2N/Astatic DAT_RETURN dapli_hermon_wqe_recv_build(ib_qp_handle_t, ibt_recv_wr_t *,
2N/A uint64_t *, uint_t *);
2N/Astatic int dapli_hermon_cq_cqe_consume(ib_cq_handle_t, uint32_t *, ibt_wc_t *);
2N/Astatic int dapli_hermon_cq_errcqe_consume(ib_cq_handle_t, uint32_t *,
2N/A ibt_wc_t *);
2N/Aextern void dapli_tavor_wrid_add_entry(dapls_tavor_workq_hdr_t *, uint64_t,
2N/A uint32_t, uint_t);
2N/Aextern void dapli_tavor_wrid_add_entry_srq(ib_srq_handle_t, uint64_t, uint32_t);
2N/A
2N/A/*
2N/A * Note: The 64 bit doorbells need to written atomically.
2N/A * In 32 bit libraries we need to use the special assembly rtn
2N/A * because compiler generated code splits into 2 word writes
2N/A */
2N/A
2N/A/*
2N/A * dapli_hermon_cq_doorbell()
2N/A * Takes the specified cq cmd and cq number and rings the cq doorbell
2N/A */
2N/Astatic void
2N/Adapli_hermon_cq_doorbell(dapls_hw_uar_t ia_uar, uint32_t cq_cmd, uint32_t cqn,
2N/A uint32_t cmd_sn, uint32_t cq_param)
2N/A{
2N/A uint64_t doorbell;
2N/A
2N/A /* Build the doorbell from the parameters */
2N/A doorbell = (cmd_sn | cq_cmd | cqn);
2N/A doorbell = (doorbell << 32) | cq_param;
2N/A
2N/A /* Write the doorbell to UAR */
2N/A#ifdef _LP64
2N/A ((tavor_hw_uar_t *)ia_uar)->cq = HTOBE_64(doorbell);
2N/A /* 32 bit version */
2N/A#elif defined(i386)
2N/A dapl_os_lock(&g_tavor_uar_lock);
2N/A /*
2N/A * For 32 bit intel we assign the doorbell in the order
2N/A * prescribed by the Tavor PRM, lower to upper addresses
2N/A */
2N/A ((tavor_hw_uar32_t *)ia_uar)->cq[0] =
2N/A (uint32_t)HTOBE_32(doorbell >> 32);
2N/A ((tavor_hw_uar32_t *)ia_uar)->cq[1] =
2N/A (uint32_t)HTOBE_32(doorbell & 0x00000000ffffffff);
2N/A dapl_os_unlock(&g_tavor_uar_lock);
2N/A#else
2N/A dapls_atomic_assign_64(HTOBE_64(doorbell),
2N/A &((tavor_hw_uar_t *)ia_uar)->cq);
2N/A#endif
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_qp_send_doorbell()
2N/A * Takes the specified qp number and rings the send doorbell.
2N/A */
2N/Astatic void
2N/Adapli_hermon_sq_dbreg(dapls_hw_uar_t ia_uar, uint32_t qpn)
2N/A{
2N/A uint64_t doorbell;
2N/A
2N/A doorbell = qpn << 8;
2N/A
2N/A /* Write the doorbell to UAR */
2N/A#ifdef _LP64
2N/A ((tavor_hw_uar_t *)ia_uar)->send = HTOBE_64(doorbell);
2N/A#else
2N/A#if defined(i386)
2N/A dapl_os_lock(&g_tavor_uar_lock);
2N/A /*
2N/A * For 32 bit intel we assign the doorbell in the order
2N/A * prescribed by the Tavor PRM, lower to upper addresses
2N/A */
2N/A ((tavor_hw_uar32_t *)ia_uar)->send[0] =
2N/A (uint32_t)HTOBE_32(doorbell >> 32);
2N/A ((tavor_hw_uar32_t *)ia_uar)->send[1] =
2N/A (uint32_t)HTOBE_32(doorbell & 0x00000000ffffffff);
2N/A dapl_os_unlock(&g_tavor_uar_lock);
2N/A#else
2N/A dapls_atomic_assign_64(HTOBE_64(doorbell),
2N/A &((tavor_hw_uar_t *)ia_uar)->send);
2N/A#endif
2N/A#endif
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_wqe_send_build()
2N/A * Constructs a WQE for a given ibt_send_wr_t
2N/A */
2N/Astatic int
2N/Adapli_hermon_wqe_send_build(ib_qp_handle_t qp, ibt_send_wr_t *wr,
2N/A uint64_t *addr, uint_t *size)
2N/A{
2N/A tavor_hw_snd_wqe_remaddr_t *rc;
2N/A tavor_hw_snd_wqe_bind_t *bn;
2N/A tavor_hw_wqe_sgl_t *ds;
2N/A ibt_wr_ds_t *sgl;
2N/A uint8_t *src, *dst, *maxdst;
2N/A uint32_t nds;
2N/A int len, thislen, maxlen;
2N/A uint32_t new_rkey;
2N/A uint32_t old_rkey;
2N/A int i, num_ds;
2N/A int max_inline_bytes = -1;
2N/A uint64_t ctrl;
2N/A uint64_t nopcode;
2N/A uint_t my_size;
2N/A
2N/A nds = wr->wr_nds;
2N/A sgl = wr->wr_sgl;
2N/A num_ds = 0;
2N/A ctrl = ((wr->wr_flags & IBT_WR_SEND_SIGNAL) ?
2N/A HERMON_WQE_SEND_SIGNALED_MASK : 0) |
2N/A ((wr->wr_flags & IBT_WR_SEND_SOLICIT) ?
2N/A HERMON_WQE_SEND_SOLICIT_MASK : 0);
2N/A
2N/A /*
2N/A * RC is the only supported transport in UDAPL
2N/A * For RC requests, we allow "Send", "RDMA Read", "RDMA Write"
2N/A */
2N/A switch (wr->wr_opcode) {
2N/A case IBT_WRC_SEND:
2N/A /*
2N/A * If this is a Send request, then all we need is
2N/A * the Data Segment processing below.
2N/A * Initialize the information for the Data Segments
2N/A */
2N/A ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)addr +
2N/A sizeof (tavor_hw_snd_wqe_nextctrl_t));
2N/A if (qp->qp_sq_inline != 0)
2N/A max_inline_bytes =
2N/A qp->qp_sq_wqesz - TAVOR_INLINE_HEADER_SIZE_SEND;
2N/A nopcode = TAVOR_WQE_SEND_NOPCODE_SEND;
2N/A break;
2N/A case IBT_WRC_RDMAW:
2N/A if (qp->qp_sq_inline != 0)
2N/A max_inline_bytes =
2N/A qp->qp_sq_wqesz - TAVOR_INLINE_HEADER_SIZE_RDMAW;
2N/A nopcode = TAVOR_WQE_SEND_NOPCODE_RDMAW;
2N/A /* FALLTHROUGH */
2N/A case IBT_WRC_RDMAR:
2N/A if (wr->wr_opcode == IBT_WRC_RDMAR) {
2N/A if (qp->qp_sq_inline < 0)
2N/A qp->qp_sq_inline = 0;
2N/A nopcode = TAVOR_WQE_SEND_NOPCODE_RDMAR;
2N/A }
2N/A /*
2N/A * If this is an RDMA Read or RDMA Write request, then fill
2N/A * in the "Remote Address" header fields.
2N/A */
2N/A rc = (tavor_hw_snd_wqe_remaddr_t *)((uintptr_t)addr +
2N/A sizeof (tavor_hw_snd_wqe_nextctrl_t));
2N/A
2N/A /*
2N/A * Build the Remote Address Segment for the WQE, using
2N/A * the information from the RC work request.
2N/A */
2N/A TAVOR_WQE_BUILD_REMADDR(rc, &wr->wr.rc.rcwr.rdma);
2N/A
2N/A /* Update "ds" for filling in Data Segments (below) */
2N/A ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)rc +
2N/A sizeof (tavor_hw_snd_wqe_remaddr_t));
2N/A break;
2N/A case IBT_WRC_BIND:
2N/A /*
2N/A * Generate a new R_key
2N/A * Increment the upper "unconstrained" bits and need to keep
2N/A * the lower "constrained" bits the same it represents
2N/A * the MPT index.
2N/A */
2N/A#if 0
2N/A /* XXX - need equiv of "hermon_wr_bind_check(state, wr);" */
2N/A /* XXX - uses hermon_mr_keycalc - what about Sinai vs. Arbel??? */
2N/A#endif
2N/A old_rkey = wr->wr.rc.rcwr.bind->bind_rkey;
2N/A new_rkey = old_rkey >> 8; /* index */
2N/A old_rkey = (old_rkey + 1) & 0xff; /* incremented key */
2N/A new_rkey = (new_rkey << 8) | old_rkey;
2N/A
2N/A wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey;
2N/A
2N/A bn = (tavor_hw_snd_wqe_bind_t *)((uintptr_t)addr +
2N/A sizeof (tavor_hw_snd_wqe_nextctrl_t));
2N/A
2N/A /*
2N/A * Build the Bind Memory Window Segments for the WQE,
2N/A * using the information from the RC Bind memory
2N/A * window work request.
2N/A */
2N/A TAVOR_WQE_BUILD_BIND(bn, wr->wr.rc.rcwr.bind);
2N/A
2N/A /*
2N/A * Update the "ds" pointer. Even though the "bind"
2N/A * operation requires no SGLs, this is necessary to
2N/A * facilitate the correct descriptor size calculations
2N/A * (below).
2N/A */
2N/A ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)bn +
2N/A sizeof (tavor_hw_snd_wqe_bind_t));
2N/A nds = 0;
2N/A nopcode = TAVOR_WQE_SEND_NOPCODE_BIND;
2N/A break;
2N/A default:
2N/A dapl_dbg_log(DAPL_DBG_TYPE_ERR,
2N/A "dapli_hermon_wqe_send_build: invalid wr_opcode=%d\n",
2N/A wr->wr_opcode);
2N/A return (DAT_INTERNAL_ERROR);
2N/A }
2N/A
2N/A /*
2N/A * Now fill in the Data Segments (SGL) for the Send WQE based on
2N/A * the values setup above (i.e. "sgl", "nds", and the "ds" pointer
2N/A * Start by checking for a valid number of SGL entries
2N/A */
2N/A if (nds > qp->qp_sq_sgl) {
2N/A return (DAT_INVALID_PARAMETER);
2N/A }
2N/A
2N/A /*
2N/A * For each SGL in the Send Work Request, fill in the Send WQE's data
2N/A * segments. Note: We skip any SGL with zero size because Tavor
2N/A * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
2N/A * the encoding for zero means a 2GB transfer. Because of this special
2N/A * encoding in the hardware, we mask the requested length with
2N/A * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
2N/A * zero.)
2N/A */
2N/A if (max_inline_bytes != -1) { /* compute total_len */
2N/A len = 0;
2N/A for (i = 0; i < nds; i++)
2N/A len += sgl[i].ds_len;
2N/A if (len == 0)
2N/A max_inline_bytes = -1; /* do not inline */
2N/A else {
2N/A /* need to reduce the length by dword "len" fields */
2N/A max_inline_bytes -= (len / 64) * sizeof (uint32_t);
2N/A if (len > max_inline_bytes)
2N/A max_inline_bytes = -1; /* too big for inline */
2N/A }
2N/A }
2N/A if (max_inline_bytes != -1) { /* do "inline" */
2N/A
2N/A dst = (uint8_t *)((uint32_t *)ds + 1);
2N/A maxdst = (uint8_t *)(((uintptr_t)dst + 64) & ~(64 - 1));
2N/A maxlen = maxdst - dst;
2N/A thislen = 0;
2N/A i = 0;
2N/A src = (uint8_t *)(uintptr_t)sgl[i].ds_va;
2N/A len = sgl[i].ds_len;
2N/A do {
2N/A /* if this sgl overflows the inline segment */
2N/A if (len > maxlen) {
2N/A if (maxlen) /* might be 0 */
2N/A (void) dapl_os_memcpy(dst,
2N/A src, maxlen);
2N/A membar_producer();
2N/A *(uint32_t *)ds =
2N/A HTOBE_32((thislen + maxlen) |
2N/A TAVOR_WQE_SGL_INLINE_MASK);
2N/A thislen = 0;
2N/A len -= maxlen;
2N/A src += maxlen;
2N/A dst = maxdst + sizeof (uint32_t);
2N/A ds = (tavor_hw_wqe_sgl_t *)(void *)maxdst;
2N/A maxdst += 64;
2N/A maxlen = 64 - sizeof (uint32_t);
2N/A } else { /* this sgl fully fits */
2N/A (void) dapl_os_memcpy(dst,
2N/A src, len);
2N/A maxlen -= len; /* room left */
2N/A thislen += len;
2N/A dst += len;
2N/A while (++i < nds)
2N/A if (sgl[i].ds_len)
2N/A break;
2N/A if (i >= nds)
2N/A break;
2N/A src = (uint8_t *)(uintptr_t)sgl[i].ds_va;
2N/A len = sgl[i].ds_len;
2N/A }
2N/A } while (i < nds);
2N/A membar_producer();
2N/A *(uint32_t *)ds = HTOBE_32(thislen |
2N/A TAVOR_WQE_SGL_INLINE_MASK);
2N/A
2N/A /* Return the size of descriptor (in 16-byte chunks) */
2N/A my_size = ((uintptr_t)dst - (uintptr_t)addr + 15) >> 4;
2N/A if (my_size <= (256 >> 4))
2N/A *size = my_size; /* use Hermon Blueflame */
2N/A else
2N/A *size = 0;
2N/A } else {
2N/A for (i = 0; i < nds; i++) {
2N/A if (sgl[i].ds_len == 0) {
2N/A continue;
2N/A }
2N/A
2N/A /*
2N/A * Fill in the Data Segment(s) for the current WQE,
2N/A * using the information contained in the
2N/A * scatter-gather list of the work request.
2N/A */
2N/A HERMON_WQE_BUILD_DATA_SEG(&ds[num_ds], &sgl[i]);
2N/A num_ds++;
2N/A }
2N/A
2N/A /* Return the size of descriptor (in 16-byte chunks) */
2N/A my_size = ((uintptr_t)&ds[num_ds] - (uintptr_t)addr) >> 4;
2N/A *size = 0; /* do not use Hermon Blueflame */
2N/A }
2N/A HERMON_WQE_SETCTRL(addr, ctrl);
2N/A membar_producer();
2N/A HERMON_WQE_SETNEXT(addr, nopcode << 32, my_size,
2N/A (wr->wr_flags & IBT_WR_SEND_FENCE) ?
2N/A HERMON_WQE_SEND_FENCE_MASK : 0);
2N/A
2N/A return (DAT_SUCCESS);
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_wqe_recv_build()
2N/A * Builds the recv WQE for a given ibt_recv_wr_t
2N/A */
2N/Astatic DAT_RETURN
2N/Adapli_hermon_wqe_recv_build(ib_qp_handle_t qp, ibt_recv_wr_t *wr,
2N/A uint64_t *addr, uint_t *size)
2N/A{
2N/A tavor_hw_wqe_sgl_t *ds;
2N/A int i;
2N/A int num_ds;
2N/A
2N/A /* Fill in the Data Segments (SGL) for the Recv WQE */
2N/A ds = (tavor_hw_wqe_sgl_t *)addr;
2N/A num_ds = 0;
2N/A
2N/A /* Check for valid number of SGL entries */
2N/A if (wr->wr_nds > qp->qp_rq_sgl) {
2N/A return (DAT_INVALID_PARAMETER);
2N/A }
2N/A
2N/A /*
2N/A * For each SGL in the Recv Work Request, fill in the Recv WQE's data
2N/A * segments. Note: We skip any SGL with zero size because Tavor
2N/A * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
2N/A * the encoding for zero means a 2GB transfer. Because of this special
2N/A * encoding in the hardware, we mask the requested length with
2N/A * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
2N/A * zero.)
2N/A */
2N/A for (i = 0; i < wr->wr_nds; i++) {
2N/A if (wr->wr_sgl[i].ds_len == 0) {
2N/A continue;
2N/A }
2N/A
2N/A /*
2N/A * Fill in the Data Segment(s) for the receive WQE, using the
2N/A * information contained in the scatter-gather list of the
2N/A * work request.
2N/A */
2N/A TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &wr->wr_sgl[i]);
2N/A num_ds++;
2N/A }
2N/A if (i < qp->qp_rq_sgl) {
2N/A ibt_wr_ds_t sgl;
2N/A sgl.ds_va = (ib_vaddr_t)0;
2N/A sgl.ds_len = (ib_msglen_t)0;
2N/A sgl.ds_key = (ibt_lkey_t)HERMON_WQE_SGL_INVALID_LKEY;
2N/A TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &sgl);
2N/A }
2N/A
2N/A /* Return the size of descriptor (in 16-byte chunks) */
2N/A *size = qp->qp_rq_wqesz >> 4;
2N/A
2N/A return (DAT_SUCCESS);
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_wqe_srq_build()
2N/A * Builds the recv WQE for a given ibt_recv_wr_t
2N/A */
2N/Astatic DAT_RETURN
2N/Adapli_hermon_wqe_srq_build(ib_srq_handle_t srq, ibt_recv_wr_t *wr,
2N/A uint64_t *addr)
2N/A{
2N/A tavor_hw_wqe_sgl_t *ds;
2N/A ibt_wr_ds_t end_sgl;
2N/A int i;
2N/A int num_ds;
2N/A
2N/A /* Fill in the Data Segments (SGL) for the Recv WQE */
2N/A ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)addr +
2N/A sizeof (tavor_hw_rcv_wqe_nextctrl_t));
2N/A num_ds = 0;
2N/A
2N/A /* Check for valid number of SGL entries */
2N/A if (wr->wr_nds > srq->srq_wq_sgl) {
2N/A return (DAT_INVALID_PARAMETER);
2N/A }
2N/A
2N/A /*
2N/A * For each SGL in the Recv Work Request, fill in the Recv WQE's data
2N/A * segments. Note: We skip any SGL with zero size because Tavor
2N/A * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
2N/A * the encoding for zero means a 2GB transfer. Because of this special
2N/A * encoding in the hardware, we mask the requested length with
2N/A * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
2N/A * zero.)
2N/A */
2N/A for (i = 0; i < wr->wr_nds; i++) {
2N/A if (wr->wr_sgl[i].ds_len == 0) {
2N/A continue;
2N/A }
2N/A
2N/A /*
2N/A * Fill in the Data Segment(s) for the receive WQE, using the
2N/A * information contained in the scatter-gather list of the
2N/A * work request.
2N/A */
2N/A TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &wr->wr_sgl[i]);
2N/A num_ds++;
2N/A }
2N/A
2N/A /*
2N/A * For SRQ, if the number of data segments is less than the maximum
2N/A * specified at alloc, then we have to fill in a special "key" entry in
2N/A * the sgl entry after the last valid one in this post request. We do
2N/A * that here.
2N/A */
2N/A if (num_ds < srq->srq_wq_sgl) {
2N/A end_sgl.ds_va = (ib_vaddr_t)0;
2N/A end_sgl.ds_len = (ib_msglen_t)0;
2N/A end_sgl.ds_key = (ibt_lkey_t)HERMON_WQE_SGL_INVALID_LKEY;
2N/A TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &end_sgl);
2N/A }
2N/A
2N/A return (DAT_SUCCESS);
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_cq_peek()
2N/A * Peeks into a given CQ to check if there are any events that can be
2N/A * polled. It returns the number of CQEs that can be polled.
2N/A */
2N/Astatic void
2N/Adapli_hermon_cq_peek(ib_cq_handle_t cq, int *num_cqe)
2N/A{
2N/A uint32_t *cqe;
2N/A uint32_t imm_eth_pkey_cred;
2N/A uint32_t cons_indx;
2N/A int polled_cnt;
2N/A uint_t doorbell_cnt;
2N/A uint_t opcode;
2N/A
2N/A /* Get the consumer index */
2N/A cons_indx = cq->cq_consindx & cq_wrap_around_mask;
2N/A
2N/A /* Calculate the pointer to the first CQ entry */
2N/A cqe = (uint32_t *)&cq->cq_addr[cons_indx];
2N/A
2N/A /*
2N/A * Count entries in the CQ until we find an entry owned by
2N/A * the hardware.
2N/A */
2N/A polled_cnt = 0;
2N/A while (HERMON_CQE_OWNER_IS_SW(cq, cqe)) {
2N/A opcode = HERMON_CQE_OPCODE_GET(cqe);
2N/A /* Error CQE map to multiple work completions */
2N/A if (opcode == HERMON_CQE_ERR_OPCODE) {
2N/A imm_eth_pkey_cred =
2N/A TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe);
2N/A doorbell_cnt =
2N/A imm_eth_pkey_cred & TAVOR_CQE_ERR_DBDCNT_MASK;
2N/A polled_cnt += (doorbell_cnt + 1);
2N/A } else {
2N/A polled_cnt++;
2N/A }
2N/A /* Increment the consumer index */
2N/A cons_indx = (cons_indx + 1) & cq_wrap_around_mask;
2N/A
2N/A /* Update the pointer to the next CQ entry */
2N/A cqe = (uint32_t *)&cq->cq_addr[cons_indx];
2N/A }
2N/A
2N/A *num_cqe = polled_cnt;
2N/A}
2N/A
2N/A#define dapli_hermon_cq_update_ci(cq, dbp) \
2N/A (dbp)[0] = HTOBE_32(cq->cq_consindx & 0xFFFFFF)
2N/A
2N/A/*
2N/A * dapli_hermon_cq_resize_helper()
2N/A * This routine switches from the pre-cq_resize buffer to the new buffer.
2N/A */
2N/Astatic int
2N/Adapli_hermon_cq_resize_helper(ib_cq_handle_t cq)
2N/A{
2N/A int i;
2N/A
2N/A if ((cq->cq_resize_addr == 0) ||
2N/A (munmap((char *)cq->cq_addr, cq->cq_map_len) < 0)) {
2N/A dapl_dbg_log(DAPL_DBG_TYPE_ERR, "cq_resize_helper: "
2N/A "munmap(%p:0x%llx) failed(%d)\n", cq->cq_addr,
2N/A cq->cq_map_len, errno);
2N/A return (1); /* FAILED */
2N/A }
2N/A cq->cq_addr = cq->cq_resize_addr;
2N/A cq->cq_map_offset = cq->cq_resize_map_offset;
2N/A cq->cq_map_len = cq->cq_resize_map_len;
2N/A cq->cq_size = cq->cq_resize_size;
2N/A cq->cq_cqesz = cq->cq_resize_cqesz;
2N/A cq->cq_resize_addr = 0;
2N/A cq->cq_resize_map_offset = 0;
2N/A cq->cq_resize_map_len = 0;
2N/A cq->cq_resize_size = 0;
2N/A cq->cq_resize_cqesz = 0;
2N/A for (i = 0; (1 << i) < cq->cq_size; i++)
2N/A ;
2N/A cq->cq_log_cqsz = i;
2N/A
2N/A cq->cq_consindx++; /* consume the RESIZE cqe */
2N/A
2N/A return (0); /* SUCCESS */
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_cq_poll()
2N/A * This routine polls CQEs out of a CQ and puts them into the ibt_wc_t
2N/A * array that is passed in.
2N/A */
2N/Astatic DAT_RETURN
2N/Adapli_hermon_cq_poll(ib_cq_handle_t cq, ibt_wc_t *wc_p, uint_t num_wc,
2N/A uint_t *num_polled)
2N/A{
2N/A uint32_t *cqe;
2N/A uint32_t cons_indx;
2N/A uint32_t polled_cnt;
2N/A DAT_RETURN dat_status;
2N/A int status;
2N/A
2N/A /* Get the consumer index */
2N/A cons_indx = cq->cq_consindx & cq_wrap_around_mask;
2N/A
2N/A /* Calculate the pointer to the first CQ entry */
2N/A cqe = (uint32_t *)&cq->cq_addr[cons_indx];
2N/A
2N/A /*
2N/A * Keep pulling entries from the CQ until we find an entry owned by
2N/A * the hardware. As long as there the CQE's owned by SW, process
2N/A * each entry by calling dapli_hermon_cq_cqe_consume() and updating the
2N/A * CQ consumer index. Note: We only update the consumer index if
2N/A * dapli_hermon_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.
2N/A * Otherwise, it indicates that we are going to "recycle" the CQE
2N/A * (probably because it is a error CQE and corresponds to more than one
2N/A * completion).
2N/A */
2N/A polled_cnt = 0;
2N/A while (HERMON_CQE_OWNER_IS_SW(cq, cqe)) {
2N/A if (HERMON_CQE_OPCODE_GET(cqe) == HERMON_CQE_RESIZE_OPCODE) {
2N/A if (dapli_hermon_cq_resize_helper(cq))
2N/A return (DAT_ERROR(DAT_INTERNAL_ERROR, 0));
2N/A cons_indx = cq->cq_consindx & cq_wrap_around_mask;
2N/A cqe = (uint32_t *)&cq->cq_addr[cons_indx];
2N/A continue;
2N/A }
2N/A status = dapli_hermon_cq_cqe_consume(cq, cqe,
2N/A &wc_p[polled_cnt++]);
2N/A if (status == TAVOR_CQ_SYNC_AND_DB) {
2N/A /* Reset to hardware ownership is implicit in Hermon */
2N/A cq->cq_consindx++; /* incr the total counter */
2N/A
2N/A /* Increment the consumer index */
2N/A cons_indx = (cons_indx + 1) & cq_wrap_around_mask;
2N/A
2N/A /* Update the pointer to the next CQ entry */
2N/A cqe = (uint32_t *)&cq->cq_addr[cons_indx];
2N/A }
2N/A
2N/A /*
2N/A * If we have run out of space to store work completions,
2N/A * then stop and return the ones we have pulled of the CQ.
2N/A */
2N/A if (polled_cnt >= num_wc) {
2N/A break;
2N/A }
2N/A }
2N/A
2N/A dat_status = DAT_SUCCESS;
2N/A /*
2N/A * Now we only ring the doorbell (to update the consumer index) if
2N/A * we've actually consumed a CQ entry. If we have, for example,
2N/A * pulled from a CQE that we are still in the process of "recycling"
2N/A * for error purposes, then we would not update the consumer index.
2N/A */
2N/A if (polled_cnt != 0) {
2N/A /*
2N/A * Update the consumer index in both the CQ handle and the
2N/A * doorbell record.
2N/A */
2N/A dapli_hermon_cq_update_ci(cq, cq->cq_poll_dbp);
2N/A } else if (polled_cnt == 0) {
2N/A /*
2N/A * If the CQ is empty, we can try to free up some of the WRID
2N/A * list containers.
2N/A */
2N/A if (cq->cq_wrid_reap_head) /* look before leaping */
2N/A dapls_tavor_wrid_cq_reap(cq);
2N/A dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0);
2N/A }
2N/A
2N/A if (num_polled != NULL) {
2N/A *num_polled = polled_cnt;
2N/A }
2N/A
2N/A return (dat_status);
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_cq_poll_one()
2N/A * This routine polls one CQE out of a CQ and puts ot into the ibt_wc_t
2N/A * that is passed in. See above for more comments/details.
2N/A */
2N/Astatic DAT_RETURN
2N/Adapli_hermon_cq_poll_one(ib_cq_handle_t cq, ibt_wc_t *wc_p)
2N/A{
2N/A uint32_t *cqe;
2N/A uint32_t cons_indx;
2N/A DAT_RETURN dat_status;
2N/A int status;
2N/A
2N/Astart_over:
2N/A /* Get the consumer index */
2N/A cons_indx = cq->cq_consindx & cq_wrap_around_mask;
2N/A
2N/A /* Calculate the pointer to the first CQ entry */
2N/A cqe = (uint32_t *)&cq->cq_addr[cons_indx];
2N/A
2N/A /*
2N/A * Keep pulling entries from the CQ until we find an entry owned by
2N/A * the hardware. As long as there the CQE's owned by SW, process
2N/A * each entry by calling dapli_hermon_cq_cqe_consume() and updating the
2N/A * CQ consumer index. Note: We only update the consumer index if
2N/A * dapli_hermon_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.
2N/A * Otherwise, it indicates that we are going to "recycle" the CQE
2N/A * (probably because it is a error CQE and corresponds to more than one
2N/A * completion).
2N/A */
2N/A if (HERMON_CQE_OWNER_IS_SW(cq, cqe)) {
2N/A if (HERMON_CQE_OPCODE_GET(cqe) == HERMON_CQE_RESIZE_OPCODE) {
2N/A if (dapli_hermon_cq_resize_helper(cq))
2N/A return (DAT_ERROR(DAT_INTERNAL_ERROR, 0));
2N/A goto start_over;
2N/A }
2N/A status = dapli_hermon_cq_cqe_consume(cq, cqe, wc_p);
2N/A if (status == TAVOR_CQ_SYNC_AND_DB) {
2N/A /* Reset to hardware ownership is implicit in Hermon */
2N/A
2N/A /* Increment the consumer index */
2N/A cq->cq_consindx++;
2N/A dapli_hermon_cq_update_ci(cq, cq->cq_poll_dbp);
2N/A }
2N/A dat_status = DAT_SUCCESS;
2N/A } else {
2N/A if (cq->cq_wrid_reap_head) /* look before leaping */
2N/A dapls_tavor_wrid_cq_reap(cq);
2N/A dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0);
2N/A }
2N/A return (dat_status);
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_cq_cqe_consume()
2N/A * Converts a given CQE into a ibt_wc_t object
2N/A */
2N/Astatic int
2N/Adapli_hermon_cq_cqe_consume(ib_cq_handle_t cqhdl, uint32_t *cqe,
2N/A ibt_wc_t *wc)
2N/A{
2N/A uint_t flags;
2N/A uint_t type;
2N/A uint_t opcode;
2N/A int status;
2N/A
2N/A /*
2N/A * Determine if this is an "error" CQE by examining "opcode". If it
2N/A * is an error CQE, then call dapli_hermon_cq_errcqe_consume() and
2N/A * return whatever status it returns. Otherwise, this is a successful
2N/A * completion.
2N/A */
2N/A opcode = HERMON_CQE_OPCODE_GET(cqe);
2N/A if (opcode == HERMON_CQE_ERR_OPCODE) {
2N/A status = dapli_hermon_cq_errcqe_consume(cqhdl, cqe, wc);
2N/A return (status);
2N/A }
2N/A TAVOR_CQE_WQEADDRSZ_SET(cqe, (HTOBE_32(cqe[6]) >> 10) &
2N/A ~HERMON_WQE_NDS_MASK);
2N/A
2N/A /*
2N/A * Fetch the Work Request ID using the information in the CQE.
2N/A * See tavor_wr.c for more details.
2N/A */
2N/A wc->wc_id = dapls_tavor_wrid_get_entry(cqhdl, (tavor_hw_cqe_t *)cqe,
2N/A HERMON_CQE_SENDRECV_GET(cqe) >> 6, 0, NULL);
2N/A wc->wc_qpn = TAVOR_CQE_QPNUM_GET(cqe);
2N/A
2N/A /*
2N/A * Parse the CQE opcode to determine completion type. This will set
2N/A * not only the type of the completion, but also any flags that might
2N/A * be associated with it (e.g. whether immediate data is present).
2N/A */
2N/A flags = IBT_WC_NO_FLAGS;
2N/A if (HERMON_CQE_SENDRECV_GET(cqe) != TAVOR_COMPLETION_RECV) {
2N/A
2N/A /*
2N/A * Send CQE
2N/A *
2N/A * The following opcodes will not be generated in uDAPL
2N/A * case TAVOR_CQE_SND_RDMAWR_IMM:
2N/A * case TAVOR_CQE_SND_SEND_IMM:
2N/A * case TAVOR_CQE_SND_ATOMIC_CS:
2N/A * case TAVOR_CQE_SND_ATOMIC_FA:
2N/A */
2N/A switch (opcode) {
2N/A case TAVOR_CQE_SND_RDMAWR:
2N/A type = IBT_WRC_RDMAW;
2N/A break;
2N/A
2N/A case TAVOR_CQE_SND_SEND:
2N/A type = IBT_WRC_SEND;
2N/A break;
2N/A
2N/A case TAVOR_CQE_SND_RDMARD:
2N/A type = IBT_WRC_RDMAR;
2N/A wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cqe);
2N/A break;
2N/A
2N/A case TAVOR_CQE_SND_BIND_MW:
2N/A type = IBT_WRC_BIND;
2N/A break;
2N/A
2N/A default:
2N/A wc->wc_status = IBT_WC_LOCAL_CHAN_OP_ERR;
2N/A return (TAVOR_CQ_SYNC_AND_DB);
2N/A }
2N/A } else {
2N/A
2N/A /*
2N/A * Receive CQE
2N/A *
2N/A * The following opcodes will not be generated in uDAPL
2N/A *
2N/A * case TAVOR_CQE_RCV_RECV_IMM:
2N/A * case TAVOR_CQE_RCV_RECV_IMM2:
2N/A * case TAVOR_CQE_RCV_RDMAWR_IMM:
2N/A * case TAVOR_CQE_RCV_RDMAWR_IMM2:
2N/A */
2N/A switch (opcode) {
2N/A case HERMON_CQE_RCV_SEND:
2N/A type = IBT_WRC_RECV;
2N/A wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cqe);
2N/A break;
2N/A default:
2N/A wc->wc_status = IBT_WC_LOCAL_CHAN_OP_ERR;
2N/A return (TAVOR_CQ_SYNC_AND_DB);
2N/A }
2N/A }
2N/A wc->wc_type = type;
2N/A wc->wc_flags = flags;
2N/A /* If we got here, completion status must be success */
2N/A wc->wc_status = IBT_WC_SUCCESS;
2N/A
2N/A return (TAVOR_CQ_SYNC_AND_DB);
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_cq_errcqe_consume()
2N/A */
2N/Astatic int
2N/Adapli_hermon_cq_errcqe_consume(ib_cq_handle_t cqhdl, uint32_t *cqe,
2N/A ibt_wc_t *wc)
2N/A{
2N/A dapls_tavor_wrid_entry_t wre;
2N/A uint_t status;
2N/A uint_t send_or_recv;
2N/A
2N/A dapl_dbg_log(DAPL_DBG_TYPE_EVD, "errcqe_consume:cqe.eth=%x, wqe=%x\n",
2N/A TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe),
2N/A TAVOR_CQE_WQEADDRSZ_GET(cqe));
2N/A
2N/A status = ((uint8_t *)cqe)[0x1B];
2N/A TAVOR_CQE_WQEADDRSZ_SET(cqe, (HTOBE_32(cqe[6]) >> 10) &
2N/A ~HERMON_WQE_NDS_MASK);
2N/A if (HERMON_CQE_SENDRECV_GET(cqe) == 0) {
2N/A send_or_recv = 0;
2N/A } else {
2N/A send_or_recv = 1;
2N/A }
2N/A
2N/A /*
2N/A * Fetch the Work Request ID using the information in the CQE.
2N/A * See tavor_wr.c for more details.
2N/A */
2N/A wc->wc_id = dapls_tavor_wrid_get_entry(cqhdl, (tavor_hw_cqe_t *)cqe,
2N/A send_or_recv, 1, &wre);
2N/A wc->wc_qpn = TAVOR_CQE_QPNUM_GET(cqe);
2N/A
2N/A /*
2N/A * Parse the CQE opcode to determine completion type. We know that
2N/A * the CQE is an error completion, so we extract only the completion
2N/A * status here.
2N/A */
2N/A switch (status) {
2N/A case TAVOR_CQE_LOC_LEN_ERR:
2N/A status = IBT_WC_LOCAL_LEN_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_LOC_OP_ERR:
2N/A status = IBT_WC_LOCAL_CHAN_OP_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_LOC_PROT_ERR:
2N/A status = IBT_WC_LOCAL_PROTECT_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_WR_FLUSHED_ERR:
2N/A status = IBT_WC_WR_FLUSHED_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_MW_BIND_ERR:
2N/A status = IBT_WC_MEM_WIN_BIND_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_BAD_RESPONSE_ERR:
2N/A status = IBT_WC_BAD_RESPONSE_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_LOCAL_ACCESS_ERR:
2N/A status = IBT_WC_LOCAL_ACCESS_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_REM_INV_REQ_ERR:
2N/A status = IBT_WC_REMOTE_INVALID_REQ_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_REM_ACC_ERR:
2N/A status = IBT_WC_REMOTE_ACCESS_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_REM_OP_ERR:
2N/A status = IBT_WC_REMOTE_OP_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_TRANS_TO_ERR:
2N/A status = IBT_WC_TRANS_TIMEOUT_ERR;
2N/A break;
2N/A
2N/A case TAVOR_CQE_RNRNAK_TO_ERR:
2N/A status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
2N/A break;
2N/A
2N/A /*
2N/A * The following error codes are not supported in the Tavor driver
2N/A * as they relate only to Reliable Datagram completion statuses:
2N/A * case TAVOR_CQE_LOCAL_RDD_VIO_ERR:
2N/A * case TAVOR_CQE_REM_INV_RD_REQ_ERR:
2N/A * case TAVOR_CQE_EEC_REM_ABORTED_ERR:
2N/A * case TAVOR_CQE_INV_EEC_NUM_ERR:
2N/A * case TAVOR_CQE_INV_EEC_STATE_ERR:
2N/A * case TAVOR_CQE_LOC_EEC_ERR:
2N/A */
2N/A
2N/A default:
2N/A status = IBT_WC_LOCAL_CHAN_OP_ERR;
2N/A break;
2N/A }
2N/A wc->wc_status = status;
2N/A wc->wc_type = 0;
2N/A
2N/A /*
2N/A * Consume the CQE
2N/A * Return status to indicate that doorbell and sync may be
2N/A * necessary.
2N/A */
2N/A return (TAVOR_CQ_SYNC_AND_DB);
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_cq_notify()
2N/A * This function is used for arming the CQ by ringing the CQ doorbell.
2N/A *
2N/A * Note: there is something very subtle here. This code assumes a very
2N/A * specific behavior of the kernel driver. The cmd_sn field of the
2N/A * arm_dbr is updated by the kernel driver whenever a notification
2N/A * event for the cq is received. This code extracts the cmd_sn field
2N/A * from the arm_dbr to know the right value to use. The arm_dbr is
2N/A * always updated atomically so that neither the kernel driver nor this
2N/A * will get confused about what the other is doing.
2N/A *
2N/A * Note: param is not used here. It is necessary for arming a CQ for
2N/A * N completions (param is N), but no uDAPL API supports this for now.
2N/A * Thus, we declare ARGSUSED to make lint happy.
2N/A */
2N/A/*ARGSUSED*/
2N/Astatic DAT_RETURN
2N/Adapli_hermon_cq_notify(ib_cq_handle_t cq, int flags, uint32_t param)
2N/A{
2N/A uint32_t cqnum;
2N/A uint32_t *target;
2N/A uint32_t old_cmd, cmp, new, tmp, cmd_sn;
2N/A
2N/A /*
2N/A * Determine if we are trying to get the next completion or the next
2N/A * "solicited" completion. Then hit the appropriate doorbell.
2N/A */
2N/A cqnum = cq->cq_num;
2N/A target = cq->cq_arm_dbp;
2N/Aretry:
2N/A cmp = *target;
2N/A tmp = HTOBE_32(cmp);
2N/A old_cmd = tmp & (0x7 << 24);
2N/A cmd_sn = tmp & (0x3 << 28);
2N/A
2N/A if (flags == IB_NOTIFY_ON_NEXT_COMP) {
2N/A if (old_cmd != HERMON_CQDB_NOTIFY_CQ) {
2N/A new = HTOBE_32(cmd_sn | HERMON_CQDB_NOTIFY_CQ |
2N/A (cq->cq_consindx & 0xFFFFFF));
2N/A tmp = atomic_cas_32(target, cmp, new);
2N/A if (tmp != cmp)
2N/A goto retry;
2N/A dapli_hermon_cq_doorbell(cq->cq_iauar,
2N/A HERMON_CQDB_NOTIFY_CQ, cqnum,
2N/A cmd_sn, cq->cq_consindx);
2N/A } /* else it's already armed */
2N/A } else if (flags == IB_NOTIFY_ON_NEXT_SOLICITED) {
2N/A if (old_cmd != HERMON_CQDB_NOTIFY_CQ &&
2N/A old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) {
2N/A new = HTOBE_32(cmd_sn | HERMON_CQDB_NOTIFY_CQ_SOLICIT |
2N/A (cq->cq_consindx & 0xFFFFFF));
2N/A tmp = atomic_cas_32(target, cmp, new);
2N/A if (tmp != cmp)
2N/A goto retry;
2N/A dapli_hermon_cq_doorbell(cq->cq_iauar,
2N/A HERMON_CQDB_NOTIFY_CQ_SOLICIT, cqnum,
2N/A cmd_sn, cq->cq_consindx);
2N/A } /* else it's already armed */
2N/A } else {
2N/A return (DAT_INVALID_PARAMETER);
2N/A }
2N/A
2N/A return (DAT_SUCCESS);
2N/A}
2N/A
2N/A/*
2N/A * Since uDAPL posts 1 wqe per request, we
2N/A * only need to do stores for the last one.
2N/A */
2N/Astatic void
2N/Adapli_hermon_wqe_headroom(ib_qp_handle_t qp, uint32_t start)
2N/A{
2N/A uint32_t *wqe_start, *wqe_top, *wqe_base, qsize, invalue;
2N/A int hdrmwqes, wqesizebytes, sectperwqe, i, j;
2N/A
2N/A qsize = qp->qp_sq_numwqe;
2N/A wqesizebytes = qp->qp_sq_wqesz;
2N/A sectperwqe = wqesizebytes >> 6;
2N/A hdrmwqes = qp->qp_sq_headroom;
2N/A wqe_base = (uint32_t *)TAVOR_QP_SQ_ENTRY(qp, 0);
2N/A wqe_top = (uint32_t *)TAVOR_QP_SQ_ENTRY(qp, qsize);
2N/A wqe_start = (uint32_t *)TAVOR_QP_SQ_ENTRY(qp, start);
2N/A
2N/A for (i = 0; i < hdrmwqes - 1; i++) {
2N/A wqe_start += sectperwqe * 16;
2N/A if (wqe_start == wqe_top)
2N/A wqe_start = wqe_base;
2N/A }
2N/A invalue = HTOBE_32(*wqe_start);
2N/A invalue |= 0x7FFFFFFF;
2N/A *wqe_start = HTOBE_32(invalue);
2N/A wqe_start += 16;
2N/A for (j = 1; j < sectperwqe; j++) {
2N/A *wqe_start = 0xFFFFFFFF;
2N/A wqe_start += 16;
2N/A }
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_post_send()
2N/A */
2N/A/* ARGSUSED */
2N/Astatic DAT_RETURN
2N/Adapli_hermon_post_send(DAPL_EP *ep, ibt_send_wr_t *wr, boolean_t ns)
2N/A{
2N/A dapls_tavor_wrid_list_hdr_t *wridlist;
2N/A dapls_tavor_wrid_entry_t *wre_last;
2N/A uint64_t *desc;
2N/A uint64_t *wqe_addr;
2N/A uint32_t desc_sz;
2N/A uint32_t wqeaddrsz, signaled_dbd;
2N/A uint32_t head, tail, next_tail, qsize_msk;
2N/A int status;
2N/A ib_qp_handle_t qp;
2N/A
2N/A if ((ep->qp_state == IBT_STATE_RESET) ||
2N/A (ep->qp_state == IBT_STATE_INIT) ||
2N/A (ep->qp_state == IBT_STATE_RTR)) {
2N/A dapl_dbg_log(DAPL_DBG_TYPE_ERR,
2N/A "post_send: invalid qp_state %d\n", ep->qp_state);
2N/A return (DAT_INVALID_STATE);
2N/A }
2N/A
2N/A qp = ep->qp_handle;
2N/A
2N/A /* Grab the lock for the WRID list */
2N/A dapl_os_lock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
2N/A wridlist = qp->qp_sq_wqhdr->wq_wrid_post;
2N/A
2N/A /* Save away some initial QP state */
2N/A qsize_msk = qp->qp_sq_wqhdr->wq_size - 1;
2N/A tail = qp->qp_sq_wqhdr->wq_tail;
2N/A head = qp->qp_sq_wqhdr->wq_head;
2N/A
2N/A /*
2N/A * Check for "queue full" condition. If the queue is already full,
2N/A * then no more WQEs can be posted, return an error
2N/A */
2N/A if (qp->qp_sq_wqhdr->wq_full != 0) {
2N/A dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
2N/A return (DAT_INSUFFICIENT_RESOURCES);
2N/A }
2N/A
2N/A /*
2N/A * Increment the "tail index" and check for "queue full" condition.
2N/A * If we detect that the current work request is going to fill the
2N/A * work queue, then we mark this condition and continue.
2N/A */
2N/A next_tail = (tail + 1) & qsize_msk;
2N/A if (next_tail == head) {
2N/A qp->qp_sq_wqhdr->wq_full = 1;
2N/A }
2N/A
2N/A /*
2N/A * Get the user virtual address of the location where the next
2N/A * Send WQE should be built
2N/A */
2N/A wqe_addr = TAVOR_QP_SQ_ENTRY(qp, tail);
2N/A
2N/A /*
2N/A * Call tavor_wqe_send_build() to build the WQE at the given address.
2N/A * This routine uses the information in the ibt_send_wr_t and
2N/A * returns the size of the WQE when it returns.
2N/A */
2N/A status = dapli_hermon_wqe_send_build(qp, wr, wqe_addr, &desc_sz);
2N/A if (status != DAT_SUCCESS) {
2N/A dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
2N/A return (status);
2N/A }
2N/A
2N/A /*
2N/A * Get the descriptor (io address) corresponding to the location
2N/A * Send WQE was built.
2N/A */
2N/A desc = TAVOR_QP_SQ_ENTRY(qp, tail);
2N/A
2N/A /*
2N/A * Add a WRID entry to the WRID list. Need to calculate the
2N/A * "wqeaddr" to pass to dapli_tavor_wrid_add_entry().
2N/A * signaled_dbd is still calculated, but ignored.
2N/A */
2N/A wqeaddrsz = HERMON_QP_WQEADDRSZ(qp->qp_sq_counter);
2N/A
2N/A if (wr->wr_flags & IBT_WR_SEND_SIGNAL) {
2N/A signaled_dbd = TAVOR_WRID_ENTRY_SIGNALED;
2N/A }
2N/A
2N/A dapli_tavor_wrid_add_entry(qp->qp_sq_wqhdr, wr->wr_id, wqeaddrsz,
2N/A signaled_dbd);
2N/A
2N/A dapli_hermon_wqe_headroom(qp, next_tail);
2N/A *(uint8_t *)desc ^= 0x80; /* set owner bit */
2N/A
2N/A /*
2N/A * Now if the WRID tail entry is non-NULL, then this
2N/A * represents the entry to which we are chaining the
2N/A * new entries. Since we are going to ring the
2N/A * doorbell for this WQE, we want set its "dbd" bit.
2N/A *
2N/A * On the other hand, if the tail is NULL, even though
2N/A * we will have rung the doorbell for the previous WQE
2N/A * (for the hardware's sake) it is irrelevant to our
2N/A * purposes (for tracking WRIDs) because we know the
2N/A * request must have already completed.
2N/A */
2N/A wre_last = wridlist->wl_wre_old_tail;
2N/A if (wre_last != NULL) {
2N/A wre_last->wr_signaled_dbd |= TAVOR_WRID_ENTRY_DOORBELLED;
2N/A }
2N/A
2N/A /* Update some of the state in the QP */
2N/A qp->qp_sq_lastwqeaddr = wqe_addr;
2N/A qp->qp_sq_wqhdr->wq_tail = next_tail;
2N/A
2N/A if (desc_sz && qp->qp_ia_bf != NULL) { /* use Hermon Blueflame */
2N/A uint64_t *bf_dest, *src64;
2N/A uint8_t *src8;
2N/A int i;
2N/A
2N/A (void) pthread_spin_lock(&hermon_bf_lock);
2N/A
2N/A src8 = (uint8_t *)desc;
2N/A src8[1] = (uint8_t)(qp->qp_sq_counter >> 8);
2N/A src8[2] = (uint8_t)qp->qp_sq_counter;
2N/A src8[4] = (uint8_t)(qp->qp_num >> 16);
2N/A src8[5] = (uint8_t)(qp->qp_num >> 8);
2N/A src8[6] = (uint8_t)qp->qp_num;
2N/A
2N/A src64 = (uint64_t *)desc;
2N/A bf_dest = (uint64_t *)((uintptr_t)qp->qp_ia_bf +
2N/A *qp->qp_ia_bf_toggle);
2N/A *qp->qp_ia_bf_toggle ^= 256; /* 2 256-byte buffers */
2N/A for (i = 0; i < desc_sz * 2; i += 8) {
2N/A bf_dest[i] = src64[i];
2N/A bf_dest[i + 1] = src64[i + 1];
2N/A bf_dest[i + 2] = src64[i + 2];
2N/A bf_dest[i + 3] = src64[i + 3];
2N/A bf_dest[i + 4] = src64[i + 4];
2N/A bf_dest[i + 5] = src64[i + 5];
2N/A bf_dest[i + 6] = src64[i + 6];
2N/A bf_dest[i + 7] = src64[i + 7];
2N/A }
2N/A (void) pthread_spin_unlock(&hermon_bf_lock);
2N/A } else {
2N/A /* Ring the doorbell */
2N/A dapli_hermon_sq_dbreg(qp->qp_iauar, qp->qp_num);
2N/A }
2N/A qp->qp_sq_counter++;
2N/A
2N/A dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
2N/A
2N/A return (DAT_SUCCESS);
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_post_recv()
2N/A */
2N/A/* ARGSUSED */
2N/Astatic DAT_RETURN
2N/Adapli_hermon_post_recv(DAPL_EP *ep, ibt_recv_wr_t *wr, boolean_t ns)
2N/A{
2N/A dapls_tavor_wrid_list_hdr_t *wridlist;
2N/A dapls_tavor_wrid_entry_t *wre_last;
2N/A ib_qp_handle_t qp;
2N/A DAT_RETURN status;
2N/A uint64_t *wqe_addr;
2N/A uint32_t desc_sz;
2N/A uint32_t wqeaddrsz;
2N/A uint32_t head, tail, next_tail, qsize_msk;
2N/A
2N/A if (ep->qp_state == IBT_STATE_RESET) {
2N/A dapl_dbg_log(DAPL_DBG_TYPE_ERR,
2N/A "post_recv: invalid qp_state %d\n", ep->qp_state);
2N/A return (DAT_INVALID_STATE);
2N/A }
2N/A qp = ep->qp_handle;
2N/A
2N/A /* Grab the lock for the WRID list */
2N/A dapl_os_lock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
2N/A wridlist = qp->qp_rq_wqhdr->wq_wrid_post;
2N/A
2N/A /* Save away some initial QP state */
2N/A qsize_msk = qp->qp_rq_wqhdr->wq_size - 1;
2N/A tail = qp->qp_rq_wqhdr->wq_tail;
2N/A head = qp->qp_rq_wqhdr->wq_head;
2N/A
2N/A /*
2N/A * For the ibt_recv_wr_t passed in, parse the request and build a
2N/A * Recv WQE. Link the WQE with the previous WQE and ring the
2N/A * door bell.
2N/A */
2N/A
2N/A /*
2N/A * Check for "queue full" condition. If the queue is already full,
2N/A * then no more WQEs can be posted. So return an error.
2N/A */
2N/A if (qp->qp_rq_wqhdr->wq_full != 0) {
2N/A dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
2N/A return (DAT_INSUFFICIENT_RESOURCES);
2N/A }
2N/A
2N/A /*
2N/A * Increment the "tail index" and check for "queue
2N/A * full" condition. If we detect that the current
2N/A * work request is going to fill the work queue, then
2N/A * we mark this condition and continue.
2N/A */
2N/A next_tail = (tail + 1) & qsize_msk;
2N/A if (next_tail == head) {
2N/A qp->qp_rq_wqhdr->wq_full = 1;
2N/A }
2N/A
2N/A /* The user virtual address of the WQE to be built */
2N/A wqe_addr = TAVOR_QP_RQ_ENTRY(qp, tail);
2N/A
2N/A /*
2N/A * Call tavor_wqe_recv_build() to build the WQE at the given
2N/A * address. This routine uses the information in the
2N/A * ibt_recv_wr_t and returns the size of the WQE.
2N/A */
2N/A status = dapli_hermon_wqe_recv_build(qp, wr, wqe_addr, &desc_sz);
2N/A if (status != DAT_SUCCESS) {
2N/A dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
2N/A return (DAT_INTERNAL_ERROR);
2N/A }
2N/A
2N/A /*
2N/A * Add a WRID entry to the WRID list. Need to calculate the
2N/A * "wqeaddr" and "signaled_dbd" values to pass to
2N/A * dapli_tavor_wrid_add_entry().
2N/A * Note: all Recv WQEs are essentially "signaled"
2N/A */
2N/A wqeaddrsz = HERMON_QP_WQEADDRSZ(qp->qp_rq_counter);
2N/A dapli_tavor_wrid_add_entry(qp->qp_rq_wqhdr, wr->wr_id, wqeaddrsz,
2N/A (uint32_t)TAVOR_WRID_ENTRY_SIGNALED);
2N/A
2N/A /*
2N/A * Now if the WRID tail entry is non-NULL, then this
2N/A * represents the entry to which we are chaining the
2N/A * new entries. Since we are going to ring the
2N/A * doorbell for this WQE, we want set its "dbd" bit.
2N/A *
2N/A * On the other hand, if the tail is NULL, even though
2N/A * we will have rung the doorbell for the previous WQE
2N/A * (for the hardware's sake) it is irrelevant to our
2N/A * purposes (for tracking WRIDs) because we know the
2N/A * request must have already completed.
2N/A */
2N/A wre_last = wridlist->wl_wre_old_tail;
2N/A if (wre_last != NULL) {
2N/A wre_last->wr_signaled_dbd |= TAVOR_WRID_ENTRY_DOORBELLED;
2N/A }
2N/A
2N/A /* Update some of the state in the QP */
2N/A qp->qp_rq_lastwqeaddr = wqe_addr;
2N/A qp->qp_rq_wqhdr->wq_tail = next_tail;
2N/A
2N/A /* Update the doorbell record */
2N/A qp->qp_rq_counter++;
2N/A (qp->qp_rq_dbp)[0] = HTOBE_32(qp->qp_rq_counter);
2N/A
2N/A dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
2N/A
2N/A return (DAT_SUCCESS);
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_post_srq()
2N/A */
2N/A/* ARGSUSED */
2N/Astatic DAT_RETURN
2N/Adapli_hermon_post_srq(DAPL_SRQ *srqp, ibt_recv_wr_t *wr, boolean_t ns)
2N/A{
2N/A ib_srq_handle_t srq;
2N/A DAT_RETURN status;
2N/A uint32_t desc;
2N/A uint64_t *wqe_addr;
2N/A uint32_t head, next_head, qsize_msk;
2N/A uint32_t wqe_index;
2N/A
2N/A
2N/A srq = srqp->srq_handle;
2N/A
2N/A /* Grab the lock for the WRID list */
2N/A dapl_os_lock(&srq->srq_wridlist->wl_lock->wrl_lock);
2N/A
2N/A /*
2N/A * For the ibt_recv_wr_t passed in, parse the request and build a
2N/A * Recv WQE. Link the WQE with the previous WQE and ring the
2N/A * door bell.
2N/A */
2N/A
2N/A /*
2N/A * Check for "queue full" condition. If the queue is already full,
2N/A * ie. there are no free entries, then no more WQEs can be posted.
2N/A * So return an error.
2N/A */
2N/A if (srq->srq_wridlist->wl_freel_entries == 0) {
2N/A dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
2N/A return (DAT_INSUFFICIENT_RESOURCES);
2N/A }
2N/A
2N/A /* Save away some initial SRQ state */
2N/A qsize_msk = srq->srq_wridlist->wl_size - 1;
2N/A head = srq->srq_wridlist->wl_freel_head;
2N/A
2N/A next_head = (head + 1) & qsize_msk;
2N/A
2N/A /* Get the descriptor (IO Address) of the WQE to be built */
2N/A desc = srq->srq_wridlist->wl_free_list[head];
2N/A
2N/A wqe_index = TAVOR_SRQ_WQ_INDEX(srq->srq_wq_desc_addr, desc,
2N/A srq->srq_wq_wqesz);
2N/A
2N/A /* The user virtual address of the WQE to be built */
2N/A wqe_addr = TAVOR_SRQ_WQ_ENTRY(srq, wqe_index);
2N/A
2N/A /*
2N/A * Call dapli_hermon_wqe_srq_build() to build the WQE at the given
2N/A * address. This routine uses the information in the
2N/A * ibt_recv_wr_t and returns the size of the WQE.
2N/A */
2N/A status = dapli_hermon_wqe_srq_build(srq, wr, wqe_addr);
2N/A if (status != DAT_SUCCESS) {
2N/A dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
2N/A return (status);
2N/A }
2N/A
2N/A /*
2N/A * Add a WRID entry to the WRID list.
2N/A */
2N/A dapli_tavor_wrid_add_entry_srq(srq, wr->wr_id, wqe_index);
2N/A
2N/A#if 0
2N/A if (srq->srq_wq_lastwqeindex == -1) {
2N/A last_wqe_addr = NULL;
2N/A } else {
2N/A last_wqe_addr = TAVOR_SRQ_WQ_ENTRY(srq,
2N/A srq->srq_wq_lastwqeindex);
2N/A }
2N/A /*
2N/A * Now link the chain to the old chain (if there was one)
2N/A * and update the wqe_counter in the doorbell record.
2N/A */
2N/AXXX
2N/A dapli_tavor_wqe_srq_linknext(wqe_addr, ns, desc, last_wqe_addr);
2N/A#endif
2N/A
2N/A /* Update some of the state in the SRQ */
2N/A srq->srq_wq_lastwqeindex = wqe_index;
2N/A srq->srq_wridlist->wl_freel_head = next_head;
2N/A srq->srq_wridlist->wl_freel_entries--;
2N/A dapl_os_assert(srq->srq_wridlist->wl_freel_entries <=
2N/A srq->srq_wridlist->wl_size);
2N/A
2N/A /* Update the doorbell record */
2N/A srq->srq_counter++;
2N/A (srq->srq_dbp)[0] = HTOBE_32(srq->srq_counter);
2N/A
2N/A dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
2N/A
2N/A return (DAT_SUCCESS);
2N/A}
2N/A
2N/A/*
2N/A * dapli_hermon_cq_srq_entries_flush()
2N/A */
2N/Astatic void
2N/Adapli_hermon_cq_srq_entries_flush(ib_qp_handle_t qp)
2N/A{
2N/A ib_cq_handle_t cq;
2N/A dapls_tavor_workq_hdr_t *wqhdr;
2N/A tavor_hw_cqe_t *cqe;
2N/A tavor_hw_cqe_t *next_cqe;
2N/A uint32_t cons_indx, tail_cons_indx;
2N/A uint32_t new_indx, check_indx, indx;
2N/A int cqe_qpnum, cqe_type;
2N/A int outstanding_cqes, removed_cqes;
2N/A int i;
2N/A
2N/A /* ASSERT(MUTEX_HELD(&qp->qp_rq_cqhdl->cq_lock)); */
2N/A
2N/A cq = qp->qp_rq_cqhdl;
2N/A wqhdr = qp->qp_rq_wqhdr;
2N/A
2N/A dapl_os_assert(wqhdr->wq_wrid_post != NULL);
2N/A dapl_os_assert(wqhdr->wq_wrid_post->wl_srq_en != 0);
2N/A
2N/A /* Get the consumer index */
2N/A cons_indx = cq->cq_consindx;
2N/A
2N/A /* Calculate the pointer to the first CQ entry */
2N/A cqe = &cq->cq_addr[cons_indx];
2N/A
2N/A /*
2N/A * Loop through the CQ looking for entries owned by software. If an
2N/A * entry is owned by software then we increment an 'outstanding_cqes'
2N/A * count to know how many entries total we have on our CQ. We use this
2N/A * value further down to know how many entries to loop through looking
2N/A * for our same QP number.
2N/A */
2N/A outstanding_cqes = 0;
2N/A tail_cons_indx = cons_indx;
2N/A while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
2N/A /* increment total cqes count */
2N/A outstanding_cqes++;
2N/A
2N/A /* increment the consumer index */
2N/A tail_cons_indx = (tail_cons_indx + 1) & cq_wrap_around_mask;
2N/A
2N/A /* update the pointer to the next cq entry */
2N/A cqe = &cq->cq_addr[tail_cons_indx];
2N/A }
2N/A
2N/A /*
2N/A * Using the 'tail_cons_indx' that was just set, we now know how many
2N/A * total CQEs possible there are. Set the 'check_indx' and the
2N/A * 'new_indx' to the last entry identified by 'tail_cons_indx'
2N/A */
2N/A check_indx = new_indx = (tail_cons_indx - 1) & cq_wrap_around_mask;
2N/A
2N/A for (i = 0; i < outstanding_cqes; i++) {
2N/A cqe = &cq->cq_addr[check_indx];
2N/A
2N/A /* Grab QP number from CQE */
2N/A cqe_qpnum = TAVOR_CQE_QPNUM_GET(cqe);
2N/A cqe_type = HERMON_CQE_SENDRECV_GET(cqe);
2N/A
2N/A /*
2N/A * If the QP number is the same in the CQE as the QP that we
2N/A * have on this SRQ, then we must free up the entry off the
2N/A * SRQ. We also make sure that the completion type is of the
2N/A * 'TAVOR_COMPLETION_RECV' type. So any send completions on
2N/A * this CQ will be left as-is. The handling of returning
2N/A * entries back to HW ownership happens further down.
2N/A */
2N/A if (cqe_qpnum == qp->qp_num &&
2N/A cqe_type == TAVOR_COMPLETION_RECV) {
2N/A /* Add back to SRQ free list */
2N/A (void) dapli_tavor_wrid_find_match_srq(
2N/A wqhdr->wq_wrid_post, cqe);
2N/A } else {
2N/A /* Do Copy */
2N/A if (check_indx != new_indx) {
2N/A next_cqe = &cq->cq_addr[new_indx];
2N/A /*
2N/A * Copy the CQE into the "next_cqe"
2N/A * pointer.
2N/A */
2N/A (void) dapl_os_memcpy(next_cqe, cqe,
2N/A sizeof (tavor_hw_cqe_t));
2N/A }
2N/A new_indx = (new_indx - 1) & cq_wrap_around_mask;
2N/A }
2N/A /* Move index to next CQE to check */
2N/A check_indx = (check_indx - 1) & cq_wrap_around_mask;
2N/A }
2N/A
2N/A /* Initialize removed cqes count */
2N/A removed_cqes = 0;
2N/A
2N/A /* If an entry was removed */
2N/A if (check_indx != new_indx) {
2N/A
2N/A /*
2N/A * Set current pointer back to the beginning consumer index.
2N/A * At this point, all unclaimed entries have been copied to the
2N/A * index specified by 'new_indx'. This 'new_indx' will be used
2N/A * as the new consumer index after we mark all freed entries as
2N/A * having HW ownership. We do that here.
2N/A */
2N/A
2N/A /* Loop through all entries until we reach our new pointer */
2N/A for (indx = cons_indx; indx <= new_indx;
2N/A indx = (indx + 1) & cq_wrap_around_mask) {
2N/A removed_cqes++;
2N/A cqe = &cq->cq_addr[indx];
2N/A
2N/A /* Reset entry to hardware ownership */
2N/A TAVOR_CQE_OWNER_SET_HW(cqe);
2N/A }
2N/A }
2N/A
2N/A /*
2N/A * Update consumer index to be the 'new_indx'. This moves it past all
2N/A * removed entries. Because 'new_indx' is pointing to the last
2N/A * previously valid SW owned entry, we add 1 to point the cons_indx to
2N/A * the first HW owned entry.
2N/A */
2N/A cons_indx = (new_indx + 1) & cq_wrap_around_mask;
2N/A
2N/A /*
2N/A * Now we only ring the doorbell (to update the consumer index) if
2N/A * we've actually consumed a CQ entry. If we found no QP number
2N/A * matches above, then we would not have removed anything. So only if
2N/A * something was removed do we ring the doorbell.
2N/A */
2N/A if ((removed_cqes != 0) && (cq->cq_consindx != cons_indx)) {
2N/A /*
2N/A * Update the consumer index in both the CQ handle and the
2N/A * doorbell record.
2N/A */
2N/A cq->cq_consindx = cons_indx;
2N/A dapli_hermon_cq_update_ci(cq, cq->cq_poll_dbp);
2N/A }
2N/A}
2N/A
2N/Astatic void
2N/Adapli_hermon_rq_prelink(caddr_t first, uint32_t desc_off, uint32_t wqesz,
2N/A uint32_t numwqe, uint32_t nds)
2N/A{
2N/A int i;
2N/A uint32_t *p = (uint32_t *)(uintptr_t)first;
2N/A uint32_t off = desc_off;
2N/A uint32_t pincr = wqesz / sizeof (uint32_t);
2N/A ibt_wr_ds_t sgl;
2N/A
2N/A sgl.ds_va = (ib_vaddr_t)0;
2N/A sgl.ds_key = HERMON_WQE_SGL_INVALID_LKEY;
2N/A sgl.ds_len = (ib_msglen_t)0;
2N/A
2N/A for (i = 0; i < numwqe - 1; i++, p += pincr) {
2N/A off += wqesz;
2N/A p[0] = HTOBE_32(off); /* link curr to next */
2N/A p[1] = nds; /* nds is 0 for SRQ */
2N/A TAVOR_WQE_BUILD_DATA_SEG((void *)&p[2], &sgl);
2N/A }
2N/A p[0] = HTOBE_32(desc_off); /* link last to first */
2N/A p[1] = nds;
2N/A TAVOR_WQE_BUILD_DATA_SEG((void *)&p[2], &sgl);
2N/A}
2N/A
2N/Astatic void
2N/Adapli_hermon_sq_init(caddr_t first, uint32_t wqesz, uint32_t numwqe)
2N/A{
2N/A int i, j;
2N/A uint64_t *wqe = (uint64_t *)(uintptr_t)first;
2N/A
2N/A for (i = 0; i < numwqe; i++) {
2N/A for (j = 0; j < wqesz; j += 64, wqe += 8)
2N/A *(uint32_t *)wqe = 0xFFFFFFFF;
2N/A }
2N/A}
2N/A
2N/Astatic void
2N/Adapli_hermon_qp_init(ib_qp_handle_t qp)
2N/A{
2N/A dapli_hermon_sq_init(qp->qp_sq_buf, qp->qp_sq_wqesz, qp->qp_sq_numwqe);
2N/A qp->qp_rq_counter = 0;
2N/A qp->qp_sq_counter = 0;
2N/A}
2N/A
2N/Astatic void
2N/Adapli_hermon_cq_init(ib_cq_handle_t cq)
2N/A{
2N/A uint32_t i;
2N/A
2N/A (cq->cq_arm_dbp)[0] = HTOBE_32(1 << 28);
2N/A for (i = 0; (1 << i) < cq->cq_size; i++)
2N/A ;
2N/A cq->cq_log_cqsz = i;
2N/A cq->cq_consindx = 0;
2N/A
2N/A /* cq_resize -- needs testing */
2N/A}
2N/A
2N/Astatic void
2N/Adapli_hermon_srq_init(ib_srq_handle_t srq)
2N/A{
2N/A /* pre-link the whole shared receive queue */
2N/A dapli_hermon_rq_prelink(srq->srq_addr, srq->srq_wq_desc_addr,
2N/A srq->srq_wq_wqesz, srq->srq_wq_numwqe, 0);
2N/A srq->srq_counter = 0;
2N/A
2N/A /* needs testing */
2N/A}
2N/A
2N/Avoid
2N/Adapls_init_funcs_hermon(DAPL_HCA *hca_ptr)
2N/A{
2N/A hca_ptr->post_send = dapli_hermon_post_send;
2N/A hca_ptr->post_recv = dapli_hermon_post_recv;
2N/A hca_ptr->post_srq = dapli_hermon_post_srq;
2N/A hca_ptr->cq_peek = dapli_hermon_cq_peek;
2N/A hca_ptr->cq_poll = dapli_hermon_cq_poll;
2N/A hca_ptr->cq_poll_one = dapli_hermon_cq_poll_one;
2N/A hca_ptr->cq_notify = dapli_hermon_cq_notify;
2N/A hca_ptr->srq_flush = dapli_hermon_cq_srq_entries_flush;
2N/A hca_ptr->qp_init = dapli_hermon_qp_init;
2N/A hca_ptr->cq_init = dapli_hermon_cq_init;
2N/A hca_ptr->srq_init = dapli_hermon_srq_init;
2N/A hca_ptr->hermon_resize_cq = 1;
2N/A
2N/A (void) pthread_spin_init(&hermon_bf_lock, 0);
2N/A}