/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file may contain confidential information of
* Mellanox Technologies, Ltd. and should not be distributed in source
* form without approval from Sun Legal.
*/
#include "dapl.h"
#include "dapl_tavor_hw.h"
#include "dapl_tavor_wr.h"
#include "dapl_tavor_ibtf_impl.h"
/*
* Function signatures
*/
extern void dapls_tavor_wrid_cq_reap(ib_cq_handle_t);
extern DAPL_OS_LOCK g_tavor_uar_lock;
#ifndef _LP64
#endif
ibt_wc_t *);
ibt_wc_t *);
/* exported to other HCAs */
/*
* Note: The 64 bit doorbells need to written atomically.
* In 32 bit libraries we need to use the special assembly rtn
* because compiler generated code splits into 2 word writes
*/
/* use a macro to ensure inlining on S10 amd64 compiler */
#else
/*
* dapli_tavor_cq_doorbell()
* Takes the specified cq cmd and cq number and rings the cq doorbell
*/
static void
{
/* Build the doorbell from the parameters */
/* Write the doorbell to UAR */
#ifdef _LP64
/* 32 bit version */
/*
* For 32 bit intel we assign the doorbell in the order
* prescribed by the Tavor PRM, lower to upper addresses
*/
#else
#endif
}
#pragma inline(dapli_tavor_cq_doorbell)
#endif /* _LP64 */
#else
/*
* dapli_tavor_qp_send_doorbell()
* Takes the specified next descriptor information, qp number, opcode and
* rings the send doorbell
*/
static void
{
/* Build the doorbell from the parameters */
/* Write the doorbell to UAR */
#ifdef _LP64
#else
#if defined(i386)
/*
* For 32 bit intel we assign the doorbell in the order
* prescribed by the Tavor PRM, lower to upper addresses
*/
#else
#endif
#endif
}
#pragma inline(dapli_tavor_qp_send_doorbell)
#endif /* _LP64 */
#else
/*
* dapli_tavor_qp_recv_doorbell()
* Takes the specified next descriptor information, qp number and
* rings the recv doorbell
*/
static void
{
/* Build the doorbell from the parameters */
/* Write the doorbell to UAR */
#ifdef _LP64
#else
#if defined(i386)
/*
* For 32 bit intel we assign the doorbell in the order
* prescribed by the Tavor PRM, lower to upper addresses
*/
#else
#endif
#endif
}
#pragma inline(dapli_tavor_qp_recv_doorbell)
#endif /* _LP64 */
/*
* dapls_tavor_max_inline()
* Return the max inline value that should be used.
* Env variable DAPL_MAX_INLINE can override the default.
* If it's not set (or set to -1), default behavior is used.
* If it's zero or negative (except -1) inline is not done.
*/
int
dapls_tavor_max_inline(void)
{
/* Check the env exactly once, otherwise return previous value. */
if (max_inline_env != -2)
return (max_inline_env);
if (max_inline_env != -1)
if (max_inline_env <= 0)
max_inline_env = 0; /* no inlining */
return (max_inline_env);
}
/*
* dapls_ib_max_request_iov(), aka, max send sgl size.
*
* By default, compute reasonable send queue size based on #iovs, #wqes,
* max_iovs, and max inline byte count. If the #wqes is large, then we
* limit how much the SGL (space for inline data) can take. The heuristic
* is to increase the memory for the send queue to a maximum of 32KB:
*
* < 128 wqes increase to at most 256 minus header
* < 256 wqes increase to at most 128 minus header
* >= 256 wqes use SGL unaltered
*
* If the env is supplied (max_inline >= 0), use it without checking.
*/
int
int max_inline_bytes)
{
int ret_iovs;
if (max_inline_bytes > 0) {
} else if (wqes < 128) {
} else if (wqes < 256) {
} else {
}
return (ret_iovs);
}
/*
* dapli_tavor_wqe_send_build()
* Constructs a WQE for a given ibt_send_wr_t
*/
static int
{
int i, num_ds;
num_ds = 0;
/*
* RC is the only supported transport in UDAPL
* For RC requests, we allow "Send", "RDMA Read", "RDMA Write"
*/
case IBT_WRC_SEND:
/*
* If this is a Send request, then all we need is
* the Data Segment processing below.
* Initialize the information for the Data Segments
*/
sizeof (tavor_hw_snd_wqe_nextctrl_t));
if (qp->qp_sq_inline != 0)
break;
case IBT_WRC_RDMAW:
if (qp->qp_sq_inline != 0)
/* FALLTHROUGH */
case IBT_WRC_RDMAR:
qp->qp_sq_inline = 0;
/*
* If this is an RDMA Read or RDMA Write request, then fill
* in the "Remote Address" header fields.
*/
sizeof (tavor_hw_snd_wqe_nextctrl_t));
/*
* Build the Remote Address Segment for the WQE, using
* the information from the RC work request.
*/
/* Update "ds" for filling in Data Segments (below) */
sizeof (tavor_hw_snd_wqe_remaddr_t));
break;
case IBT_WRC_BIND:
/*
* Generate a new R_key
* Increment the upper "unconstrained" bits and need to keep
* the lower "constrained" bits the same it represents
* the MPT index.
*/
new_rkey++;
(old_rkey & tavor_num_mpt_mask));
sizeof (tavor_hw_snd_wqe_nextctrl_t));
/*
* Build the Bind Memory Window Segments for the WQE,
* using the information from the RC Bind memory
* window work request.
*/
/*
* Update the "ds" pointer. Even though the "bind"
* operation requires no SGLs, this is necessary to
* facilitate the correct descriptor size calculations
* (below).
*/
sizeof (tavor_hw_snd_wqe_bind_t));
break;
default:
"dapli_tavor_wqe_send_build: invalid wr_opcode=%d\n",
return (DAT_INTERNAL_ERROR);
}
/*
* Now fill in the Data Segments (SGL) for the Send WQE based on
* the values setup above (i.e. "sgl", "nds", and the "ds" pointer
* Start by checking for a valid number of SGL entries
*/
return (DAT_INVALID_PARAMETER);
}
/*
* For each SGL in the Send Work Request, fill in the Send WQE's data
* segments. Note: We skip any SGL with zero size because Tavor
* hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
* the encoding for zero means a 2GB transfer. Because of this special
* encoding in the hardware, we mask the requested length with
* TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
* zero.)
*/
total_len = 0;
for (i = 0; i < nds; i++)
if (total_len > max_inline_bytes)
}
for (i = 0; i < nds; i++) {
continue;
}
(void) dapl_os_memcpy(dst,
}
/* Return the size of descriptor (in 16-byte chunks) */
} else {
for (i = 0; i < nds; i++) {
continue;
}
/*
* Fill in the Data Segment(s) for the current WQE,
* using the information contained in the
* scatter-gather list of the work request.
*/
num_ds++;
}
/* Return the size of descriptor (in 16-byte chunks) */
}
return (DAT_SUCCESS);
}
/*
* dapli_tavor_wqe_send_linknext()
* Takes a WQE and links it to the prev WQE chain
*/
static void
{
next = 0;
ctrl = 0;
/* Set the "c" (i.e. "signaled") bit appropriately */
}
/* Set the "s" (i.e. "solicited") bit appropriately */
}
/* Set the "e" (i.e. "event") bit if notification is needed */
if (!ns) {
}
/*
* The "i" bit is unused since uDAPL doesn't support
* the immediate data
*/
/* initialize the ctrl and next fields of the current descriptor */
/*
* Calculate the "next" field of the prev descriptor. This amounts
* to setting up the "next_wqe_addr", "nopcode", "fence", and "nds"
* fields (see tavor_hw.h for more).
*/
/*
* Determine the value for the Tavor WQE "nopcode" field
* by using the IBTF opcode from the work request
*/
case IBT_WRC_RDMAW:
break;
case IBT_WRC_SEND:
break;
case IBT_WRC_RDMAR:
break;
case IBT_WRC_BIND:
break;
default:
/* Unsupported opcodes in UDAPL */
"dapli_tavor_wqe_send_linknext: invalid nopcode=%d\n",
nopcode);
return;
}
if (fence) {
}
/*
* A send queue doorbell will be rung for the next
* WQE on the chain, set the current WQE's "dbd" bit.
* Note: We also update the "dbinfo" structure here to pass
* back information about what should (later) be included
* in the send queue doorbell.
*/
/*
* Send queue doorbell will be rung for the next WQE on
* the chain, update the prev WQE's "next" field and return.
*/
}
}
/*
* dapli_tavor_wqe_recv_build()
* Builds the recv WQE for a given ibt_recv_wr_t
*/
static DAT_RETURN
{
int i;
int num_ds;
/* Fill in the Data Segments (SGL) for the Recv WQE */
sizeof (tavor_hw_rcv_wqe_nextctrl_t));
num_ds = 0;
/* Check for valid number of SGL entries */
return (DAT_INVALID_PARAMETER);
}
/*
* For each SGL in the Recv Work Request, fill in the Recv WQE's data
* segments. Note: We skip any SGL with zero size because Tavor
* hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
* the encoding for zero means a 2GB transfer. Because of this special
* encoding in the hardware, we mask the requested length with
* TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
* zero.)
*/
continue;
}
/*
* Fill in the Data Segment(s) for the receive WQE, using the
* information contained in the scatter-gather list of the
* work request.
*/
num_ds++;
}
/* Return the size of descriptor (in 16-byte chunks) */
return (DAT_SUCCESS);
}
/*
* dapli_tavor_wqe_recv_linknext()
* Links a recv WQE to the prev chain
*/
static void
{
/*
* Note: curr_addr is the last WQE (In uDAPL we manipulate 1 WQE
* at a time. If there is no next descriptor (i.e. if the current
* descriptor is the last WQE on the chain), then set "next" field
* to TAVOR_WQE_DBD_MASK. This is because the Tavor hardware
* requires the "dbd" bit to be set to one for all Recv WQEs.
* In either case, we must add a single bit in the "reserved" field
* (TAVOR_RCV_WQE_NDA0_WA_MASK) following the NDA. This is the
* workaround for a known Tavor errata that can cause Recv WQEs with
* zero in the NDA field to behave improperly.
*
* If notification suppression is not desired then we set
* the "E" bit in the ctrl field.
*/
if (!ns) { /* notification needed - so set the "E" bit */
}
/* update the WQE */
/*
* Calculate the "next" field of the descriptor. This amounts
* to setting up the "next_wqe_addr", "dbd", and "nds" fields
* (see tavor_hw.h for more).
*/
/*
* If this WQE is supposed to be linked to the previous
* descriptor, then we need to update not only the previous
* WQE's "next" fields but we must not touch this WQE's
* "ctrl" fields.
*/
}
}
/*
* dapli_tavor_wqe_srq_build()
* Builds the recv WQE for a given ibt_recv_wr_t
*/
static DAT_RETURN
{
int i;
int num_ds;
/* Fill in the Data Segments (SGL) for the Recv WQE */
sizeof (tavor_hw_rcv_wqe_nextctrl_t));
num_ds = 0;
/* Check for valid number of SGL entries */
return (DAT_INVALID_PARAMETER);
}
/*
* For each SGL in the Recv Work Request, fill in the Recv WQE's data
* segments. Note: We skip any SGL with zero size because Tavor
* hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
* the encoding for zero means a 2GB transfer. Because of this special
* encoding in the hardware, we mask the requested length with
* TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
* zero.)
*/
continue;
}
/*
* Fill in the Data Segment(s) for the receive WQE, using the
* information contained in the scatter-gather list of the
* work request.
*/
num_ds++;
}
/*
* For SRQ, if the number of data segments is less than the maximum
* specified at alloc, then we have to fill in a special "key" entry in
* the sgl entry after the last valid one in this post request. We do
* that here.
*/
}
return (DAT_SUCCESS);
}
/*
* dapli_tavor_wqe_srq_linknext()
* Links a srq recv WQE to the prev chain
*/
static void
{
/*
* Note: curr_addr is the last WQE (In uDAPL we manipulate 1 WQE
* at a time. If there is no next descriptor (i.e. if the current
* descriptor is the last WQE on the chain), then set "next" field
* to TAVOR_WQE_DBD_MASK. This is because the Tavor hardware
* requires the "dbd" bit to be set to one for all Recv WQEs.
* In either case, we must add a single bit in the "reserved" field
* (TAVOR_RCV_WQE_NDA0_WA_MASK) following the NDA. This is the
* workaround for a known Tavor errata that can cause Recv WQEs with
* zero in the NDA field to behave improperly.
*
* If notification suppression is not desired then we set
* the "E" bit in the ctrl field.
*/
if (!ns) { /* notification needed - so set the "E" bit */
}
/* update the WQE */
/*
* Calculate the "next" field of the descriptor. This amounts
* to setting up the "next_wqe_addr", "dbd", and "nds" fields
* (see tavor_hw.h for more).
*/
/*
* If this WQE is supposed to be linked to the previous
* descriptor, then we need to update not only the previous
* WQE's "next" fields but we must not touch this WQE's
* "ctrl" fields.
*/
}
}
/*
* dapli_tavor_cq_peek()
* Peeks into a given CQ to check if there are any events that can be
* polled. It returns the number of CQEs that can be polled.
*/
static void
{
/* Get the consumer index */
/*
* Calculate the wrap around mask. Note: This operation only works
* because all Tavor completion queues have power-of-2 sizes
*/
/* Calculate the pointer to the first CQ entry */
/*
* Count entries in the CQ until we find an entry owned by
* the hardware.
*/
polled_cnt = 0;
while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
/* Error CQE map to multiple work completions */
if ((opcode == TAVOR_CQE_SEND_ERR_OPCODE) ||
(opcode == TAVOR_CQE_RECV_ERR_OPCODE)) {
} else {
polled_cnt++;
}
/* Increment the consumer index */
/* Update the pointer to the next CQ entry */
}
*num_cqe = polled_cnt;
}
/*
* dapli_tavor_cq_poll()
* This routine polls CQEs out of a CQ and puts them into the ibt_wc_t
* array that is passed in.
*/
static DAT_RETURN
{
int status;
/* Get the consumer index */
/*
* Calculate the wrap around mask. Note: This operation only works
* because all Tavor completion queues have power-of-2 sizes
*/
/* Calculate the pointer to the first CQ entry */
/*
* Keep pulling entries from the CQ until we find an entry owned by
* the hardware. As long as there the CQE's owned by SW, process
* each entry by calling dapli_tavor_cq_cqe_consume() and updating the
* CQ consumer index. Note: We only update the consumer index if
* dapli_tavor_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.
* Otherwise, it indicates that we are going to "recycle" the CQE
* (probably because it is a error CQE and corresponds to more than one
* completion).
*/
polled_cnt = 0;
while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
&wc_p[polled_cnt++]);
if (status == TAVOR_CQ_SYNC_AND_DB) {
/* Reset entry to hardware ownership */
/* Increment the consumer index */
/* Update the pointer to the next CQ entry */
}
/*
* If we have run out of space to store work completions,
* then stop and return the ones we have pulled of the CQ.
*/
if (polled_cnt >= num_wc) {
break;
}
}
/*
* Now we only ring the doorbell (to update the consumer index) if
* we've actually consumed a CQ entry. If we have, for example,
* pulled from a CQE that we are still in the process of "recycling"
* for error purposes, then we would not update the consumer index.
*/
/*
* Post doorbell to update the consumer index. Doorbell
* value indicates number of entries consumed (minus 1)
*/
} else {
}
} else if (polled_cnt == 0) {
/*
* If the CQ is empty, we can try to free up some of the WRID
* list containers.
*/
}
if (num_polled != NULL) {
*num_polled = polled_cnt;
}
return (dat_status);
}
/*
* dapli_tavor_cq_poll_one()
* This routine polls one CQE out of a CQ and puts ot into the ibt_wc_t
*/
static DAT_RETURN
{
int status;
/* Get the consumer index */
/* Calculate the pointer to the first CQ entry */
/*
* Keep pulling entries from the CQ until we find an entry owned by
* the hardware. As long as there the CQE's owned by SW, process
* each entry by calling dapli_tavor_cq_cqe_consume() and updating the
* CQ consumer index. Note: We only update the consumer index if
* dapli_tavor_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.
* Otherwise, it indicates that we are going to "recycle" the CQE
* (probably because it is a error CQE and corresponds to more than one
* completion).
*/
if (TAVOR_CQE_OWNER_IS_SW(cqe)) {
if (status == TAVOR_CQ_SYNC_AND_DB) {
/* Reset entry to hardware ownership */
/* Increment the consumer index */
cq->cq_consindx =
}
} else {
}
return (dat_status);
}
/*
* dapli_tavor_cq_cqe_consume()
* Converts a given CQE into a ibt_wc_t object
*/
static int
{
int status;
/*
* Determine if this is an "error" CQE by examining "opcode". If it
* is an error CQE, then call dapli_tavor_cq_errcqe_consume() and return
* whatever status it returns. Otherwise, this is a successful
* completion.
*/
if ((opcode == TAVOR_CQE_SEND_ERR_OPCODE) ||
(opcode == TAVOR_CQE_RECV_ERR_OPCODE)) {
return (status);
}
/*
* Fetch the Work Request ID using the information in the CQE.
* See tavor_wr.c for more details.
*/
/*
* Parse the CQE opcode to determine completion type. This will set
* not only the type of the completion, but also any flags that might
* be associated with it (e.g. whether immediate data is present).
*/
/*
* Send CQE
*
* The following opcodes will not be generated in uDAPL
* case TAVOR_CQE_SND_RDMAWR_IMM:
* case TAVOR_CQE_SND_SEND_IMM:
* case TAVOR_CQE_SND_ATOMIC_CS:
* case TAVOR_CQE_SND_ATOMIC_FA:
*/
switch (opcode) {
case TAVOR_CQE_SND_RDMAWR:
break;
case TAVOR_CQE_SND_SEND:
type = IBT_WRC_SEND;
break;
case TAVOR_CQE_SND_RDMARD:
break;
case TAVOR_CQE_SND_BIND_MW:
type = IBT_WRC_BIND;
break;
default:
return (TAVOR_CQ_SYNC_AND_DB);
}
} else {
/*
* Receive CQE
*
* The following opcodes will not be generated in uDAPL
*
* case TAVOR_CQE_RCV_RECV_IMM:
* case TAVOR_CQE_RCV_RECV_IMM2:
* case TAVOR_CQE_RCV_RDMAWR_IMM:
* case TAVOR_CQE_RCV_RDMAWR_IMM2:
*/
switch (opcode & 0x1F) {
case TAVOR_CQE_RCV_RECV:
/* FALLTHROUGH */
case TAVOR_CQE_RCV_RECV2:
type = IBT_WRC_RECV;
break;
default:
return (TAVOR_CQ_SYNC_AND_DB);
}
}
/* If we got here, completion status must be success */
return (TAVOR_CQ_SYNC_AND_DB);
}
/*
* dapli_tavor_cq_errcqe_consume()
*/
static int
{
/*
* Fetch the Work Request ID using the information in the CQE.
* See tavor_wr.c for more details.
*/
/*
* Parse the CQE opcode to determine completion type. We know that
* the CQE is an error completion, so we extract only the completion
* status here.
*/
switch (status) {
case TAVOR_CQE_LOC_LEN_ERR:
break;
case TAVOR_CQE_LOC_OP_ERR:
break;
case TAVOR_CQE_LOC_PROT_ERR:
break;
case TAVOR_CQE_WR_FLUSHED_ERR:
break;
case TAVOR_CQE_MW_BIND_ERR:
break;
break;
break;
break;
case TAVOR_CQE_REM_ACC_ERR:
break;
case TAVOR_CQE_REM_OP_ERR:
break;
case TAVOR_CQE_TRANS_TO_ERR:
break;
case TAVOR_CQE_RNRNAK_TO_ERR:
break;
/*
* The following error codes are not supported in the Tavor driver
* as they relate only to Reliable Datagram completion statuses:
* case TAVOR_CQE_LOCAL_RDD_VIO_ERR:
* case TAVOR_CQE_REM_INV_RD_REQ_ERR:
* case TAVOR_CQE_EEC_REM_ABORTED_ERR:
* case TAVOR_CQE_INV_EEC_NUM_ERR:
* case TAVOR_CQE_INV_EEC_STATE_ERR:
* case TAVOR_CQE_LOC_EEC_ERR:
*/
default:
break;
}
/*
* Now we do all the checking that's necessary to handle completion
* queue entry "recycling"
*
* It is not necessary here to try to sync the WQE as we are only
* attempting to read from the Work Queue (and hardware does not
* write to it).
*/
/*
* We can get doorbell info, WQE address, size for the next WQE
* from the "wre" (which was filled in above in the call to the
* tavor_wrid_get_entry() routine)
*/
/*
* Get the doorbell count from the CQE. This indicates how many
* completions this one CQE represents.
*/
/*
* Determine if we're ready to consume this CQE yet or not. If the
* next WQE has size zero (i.e. no next WQE) or if the doorbell count
* by the current CQE (return TAVOR_CQ_SYNC_AND_DB). Otherwise, the
* current CQE needs to be recycled (see below).
*/
/*
* Consume the CQE
* Return status to indicate that doorbell and sync may be
* necessary.
*/
return (TAVOR_CQ_SYNC_AND_DB);
} else {
/*
* Recycle the CQE for use in the next PollCQ() call
* Decrement the doorbell count, modify the error status,
* and update the WQE address and size (to point to the
* next WQE on the chain. Put these update entries back
* into the CQE.
* Despite the fact that we have updated the CQE, it is not
* necessary for us to attempt to sync this entry just yet
* as we have not changed the "hardware's view" of the
* entry (i.e. we have not modified the "owner" bit - which
* is all that the Tavor hardware really cares about.
*/
"errcqe_consume: recycling cqe.eth=%x, wqe=%x\n",
return (TAVOR_CQ_RECYCLE_ENTRY);
}
}
/*
* dapli_tavor_cq_notify()
* This function is used for arming the CQ by ringing the CQ doorbell.
*/
static DAT_RETURN
{
/*
* Determine if we are trying to get the next completion or the next
* "solicited" completion. Then hit the appropriate doorbell.
*/
if (flags == IB_NOTIFY_ON_NEXT_COMP) {
} else if (flags == IB_NOTIFY_ON_NEXT_SOLICITED) {
} else if (flags == IB_NOTIFY_ON_NEXT_NCOMP) {
} else {
return (DAT_INVALID_PARAMETER);
}
return (DAT_SUCCESS);
}
/*
* dapli_tavor_post_send()
*/
static DAT_RETURN
{
int status;
return (DAT_INVALID_STATE);
}
/* Grab the lock for the WRID list */
/* Save away some initial QP state */
/*
* Check for "queue full" condition. If the queue is already full,
* then no more WQEs can be posted, return an error
*/
return (DAT_INSUFFICIENT_RESOURCES);
}
/*
* Increment the "tail index" and check for "queue full" condition.
* If we detect that the current work request is going to fill the
* work queue, then we mark this condition and continue.
*/
}
/*
* Get the user virtual address of the location where the next
* Send WQE should be built
*/
/*
* Call tavor_wqe_send_build() to build the WQE at the given address.
* This routine uses the information in the ibt_send_wr_t and
* returns the size of the WQE when it returns.
*/
if (status != DAT_SUCCESS) {
return (status);
}
/*
* Get the descriptor (io address) corresponding to the location
* Send WQE was built.
*/
/*
* Add a WRID entry to the WRID list. Need to calculate the
* "wqeaddrsz" and "signaled_dbd" values to pass to
* dapli_tavor_wrid_add_entry()
*/
}
/*
* Now link the wqe to the old chain (if there was one)
*/
/*
* Now if the WRID tail entry is non-NULL, then this
* represents the entry to which we are chaining the
* new entries. Since we are going to ring the
* doorbell for this WQE, we want set its "dbd" bit.
*
* On the other hand, if the tail is NULL, even though
* we will have rung the doorbell for the previous WQE
* (for the hardware's sake) it is irrelevant to our
* purposes (for tracking WRIDs) because we know the
* request must have already completed.
*/
}
/* Update some of the state in the QP */
/* Ring the doorbell */
return (DAT_SUCCESS);
}
/*
* dapli_tavor_post_recv()
*/
static DAT_RETURN
{
return (DAT_INVALID_STATE);
}
/* Grab the lock for the WRID list */
/* Save away some initial QP state */
/*
* For the ibt_recv_wr_t passed in, parse the request and build a
* Recv WQE. Link the WQE with the previous WQE and ring the
* door bell.
*/
/*
* Check for "queue full" condition. If the queue is already full,
* then no more WQEs can be posted. So return an error.
*/
return (DAT_INSUFFICIENT_RESOURCES);
}
/*
* Increment the "tail index" and check for "queue
* full" condition. If we detect that the current
* work request is going to fill the work queue, then
* we mark this condition and continue.
*/
}
/* Get the descriptor (IO Address) of the WQE to be built */
/* The user virtual address of the WQE to be built */
/*
* Call tavor_wqe_recv_build() to build the WQE at the given
* address. This routine uses the information in the
* ibt_recv_wr_t and returns the size of the WQE.
*/
if (status != DAT_SUCCESS) {
return (DAT_INTERNAL_ERROR);
}
/*
* Add a WRID entry to the WRID list. Need to calculate the
* "wqeaddrsz" and "signaled_dbd" values to pass to
* dapli_tavor_wrid_add_entry().
* Note: all Recv WQEs are essentially "signaled"
*/
/*
* Now link the chain to the old chain (if there was one)
* and ring the doorbel for the recv work queue.
*/
/*
* Now if the WRID tail entry is non-NULL, then this
* represents the entry to which we are chaining the
* new entries. Since we are going to ring the
* doorbell for this WQE, we want set its "dbd" bit.
*
* On the other hand, if the tail is NULL, even though
* we will have rung the doorbell for the previous WQE
* (for the hardware's sake) it is irrelevant to our
* purposes (for tracking WRIDs) because we know the
* request must have already completed.
*/
}
/* Update some of the state in the QP */
/* Ring the doorbell */
return (DAT_SUCCESS);
}
/*
* dapli_tavor_post_srq()
*/
static DAT_RETURN
{
/* Grab the lock for the WRID list */
/*
* For the ibt_recv_wr_t passed in, parse the request and build a
* Recv WQE. Link the WQE with the previous WQE and ring the
* door bell.
*/
/*
* Check for "queue full" condition. If the queue is already full,
* ie. there are no free entries, then no more WQEs can be posted.
* So return an error.
*/
return (DAT_INSUFFICIENT_RESOURCES);
}
/* Save away some initial SRQ state */
/* Get the descriptor (IO Address) of the WQE to be built */
srq->srq_wq_wqesz);
/* The user virtual address of the WQE to be built */
/*
* Call dapli_tavor_wqe_srq_build() to build the WQE at the given
* address. This routine uses the information in the
* ibt_recv_wr_t and returns the size of the WQE.
*/
if (status != DAT_SUCCESS) {
return (status);
}
/*
* Add a WRID entry to the WRID list.
*/
} else {
}
/*
* Now link the chain to the old chain (if there was one)
* and ring the doorbell for the SRQ.
*/
/* Update some of the state in the SRQ */
/* Ring the doorbell - for SRQ nds = 0 */
return (DAT_SUCCESS);
}
/*
* dapli_tavor_wrid_add_entry()
*/
extern void
{
/*
* Find the entry in the container pointed to by the "tail" index.
* Add all of the relevant information to that entry, including WRID,
* "wqeaddrsz" parameter, and whether it was signaled/unsignaled
*/
/*
* Update the "wrid_old_tail" pointer to point to the entry we just
* inserted into the queue. By tracking this pointer (the pointer to
* the most recently inserted entry) it will possible later in the
* PostSend() and PostRecv() code paths to find the entry that needs
* tavor_post_send()).
*/
/* Update the tail index */
/*
* If the "tail" index has just wrapped over into the "head" index,
* then we have filled the container. We use the "full" flag to
* indicate this condition and to distinguish it from the "empty"
* condition (where head and tail are also equal).
*/
}
}
/*
* dapli_tavor_wrid_add_entry_srq()
*/
extern void
{
/* ASSERT on impossible wqe_index values */
/*
* Setup the WRE.
*
* Given the 'wqe_index' value, we store the WRID at this WRE offset.
* And we set the WRE to be signaled_dbd so that on poll CQ we can find
* this information and associate the WRID to the WQE found on the CQE.
* Note: all Recv WQEs are essentially "signaled"
*/
}
/*
* dapli_tavor_cq_srq_entries_flush()
*/
static void
{
int i;
/* ASSERT(MUTEX_HELD(&qp->qp_rq_cqhdl->cq_lock)); */
/* Get the consumer index */
/*
* Calculate the wrap around mask. Note: This operation only works
* because all Tavor completion queues have power-of-2 sizes
*/
/* Calculate the pointer to the first CQ entry */
/*
* Loop through the CQ looking for entries owned by software. If an
* entry is owned by software then we increment an 'outstanding_cqes'
* count to know how many entries total we have on our CQ. We use this
* value further down to know how many entries to loop through looking
* for our same QP number.
*/
outstanding_cqes = 0;
while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
/* increment total cqes count */
/* increment the consumer index */
/* update the pointer to the next cq entry */
}
/*
* Using the 'tail_cons_indx' that was just set, we now know how many
* total CQEs possible there are. Set the 'check_indx' and the
* 'new_indx' to the last entry identified by 'tail_cons_indx'
*/
for (i = 0; i < outstanding_cqes; i++) {
/* Grab QP number from CQE */
/*
* If the QP number is the same in the CQE as the QP that we
* have on this SRQ, then we must free up the entry off the
* SRQ. We also make sure that the completion type is of the
* 'TAVOR_COMPLETION_RECV' type. So any send completions on
* this CQ will be left as-is. The handling of returning
* entries back to HW ownership happens further down.
*/
/* Add back to SRQ free list */
(void) dapli_tavor_wrid_find_match_srq(
} else {
/* Do Copy */
if (check_indx != new_indx) {
/*
* Copy the CQE into the "next_cqe"
* pointer.
*/
sizeof (tavor_hw_cqe_t));
}
}
/* Move index to next CQE to check */
}
/* Initialize removed cqes count */
removed_cqes = 0;
/* If an entry was removed */
if (check_indx != new_indx) {
/*
* Set current pointer back to the beginning consumer index.
* At this point, all unclaimed entries have been copied to the
* index specified by 'new_indx'. This 'new_indx' will be used
* as the new consumer index after we mark all freed entries as
* having HW ownership. We do that here.
*/
/* Loop through all entries until we reach our new pointer */
removed_cqes++;
/* Reset entry to hardware ownership */
}
}
/*
* Update consumer index to be the 'new_indx'. This moves it past all
* removed entries. Because 'new_indx' is pointing to the last
* previously valid SW owned entry, we add 1 to point the cons_indx to
* the first HW owned entry.
*/
/*
* Now we only ring the doorbell (to update the consumer index) if
* we've actually consumed a CQ entry. If we found no QP number
* matches above, then we would not have removed anything. So only if
* something was removed do we ring the doorbell.
*/
/*
* Post doorbell to update the consumer index. Doorbell
* value indicates number of entries consumed (minus 1)
*/
} else {
}
}
}
/* ARGSUSED */
static void
{
}
/* ARGSUSED */
static void
{
}
/* ARGSUSED */
static void
{
}
void
{
hca_ptr->hermon_resize_cq = 0;
}