tavor_qp.c revision 9e39c5ba00a55fa05777cc94b148296af305e135
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Tavor Queue Pair Processing Routines
*
* Implements all the routines necessary for allocating, freeing, and
* querying the Tavor queue pairs.
*/
#include <sys/sysmacros.h>
tavor_rsrc_t *qpc);
static int tavor_qpn_avl_compare(const void *q, const void *e);
/*
* tavor_qp_alloc()
* Context: Can be called only from user or kernel context.
*/
int
{
char *errormsg;
/*
* Check the "options" flag. Currently this flag tells the driver
* whether or not the QP's work queues should be come from normal
* system memory or whether they should be allocated from DDR memory.
*/
} else {
}
/*
* Extract the necessary info from the tavor_qp_info_t structure
*/
/*
* Determine whether QP is being allocated for userland access or
* whether it is being allocated for kernel access. If the QP is
* being allocated for userland access, then lookup the UAR doorbell
* page number for the current process. Note: If this is not found
* (e.g. if the process has not previously open()'d the Tavor driver),
* then an error is returned.
*/
if (qp_is_umap) {
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail;
}
}
/*
* Determine whether QP is being associated with an SRQ
*/
if (qp_srq_en) {
/*
* Check for valid SRQ handle pointers
*/
/* Set "status" and "errormsg" and goto failure */
"invalid SRQ handle");
goto qpalloc_fail;
}
}
/*
*/
(type != IBT_UC_RQP))) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail;
}
/*
* Only RC is supported on an SRQ -- This is a Tavor hardware
* limitation. Arbel native mode will not have this shortcoming.
*/
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail;
}
/*
* Check for valid PD handle pointer
*/
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail;
}
/*
* If on an SRQ, check to make sure the PD is the same
*/
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail;
}
/* Increment the reference count on the protection domain (PD) */
/*
* Check for valid CQ handle pointers
*/
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail1;
}
/*
* Increment the reference count on the CQs. One or both of these
* could return error if we determine that the given CQ is already
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail1;
}
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail2;
}
/*
* Allocate an QP context entry. This will be filled in with all
* the necessary parameters to define the Queue Pair. Unlike
* other Tavor hardware resources, ownership is not immediately
* given to hardware in the final step here. Instead, we must
* wait until the QP is later transitioned to the "Init" state before
* passing the QP to hardware. If we fail here, we must undo all
* the reference count (CQ and PD).
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail3;
}
/*
* Allocate the software structure for tracking the queue pair
* (i.e. the Tavor Queue Pair handle). If we fail here, we must
* undo the reference counts and the previous resource allocation.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail4;
}
/*
* Calculate the QP number from QPC index. This routine handles
* all of the operations necessary to keep track of used, unused,
* and released QP numbers.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail5;
}
/*
* If this will be a user-mappable QP, then allocate an entry for
* the "userland resources database". This will later be added to
* the database (after all further QP operations are successful).
* If we fail here, we must undo the reference counts and the
* previous resource allocation.
*/
if (qp_is_umap) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail6;
}
}
/*
* If this is an RC QP, then pre-allocate the maximum number of RDB
* entries. This allows us to ensure that we can later cover all
* the resources needed by hardware for handling multiple incoming
* RDMA Reads. Note: These resources are obviously not always
* necessary. They are allocated here anyway. Someday maybe this
* can be modified to allocate these on-the-fly (i.e. only if RDMA
* Read or Atomic operations are enabled) XXX
* If we fail here, we have a bunch of resource and reference count
* cleanup to do.
*/
if (type == IBT_RC_RQP) {
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail7;
}
/* Calculate offset (into DDR memory) of RDB entries */
}
/*
* Calculate the appropriate size for the work queues.
* Note: All Tavor QP work queues must be a power-of-2 in size. Also
* they may not be any smaller than TAVOR_QP_MIN_SIZE. This step is
* to round the requested size up to the next highest power-of-2
*/
}
}
/*
* Next we verify that the rounded-up size is valid (i.e. consistent
* then obviously we have a lot of cleanup to do before returning.
*/
(!qp_srq_en && (log_qp_rq_size >
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail8;
}
/*
* Next we verify that the requested number of SGL is valid (i.e.
* limits). If not, then obviously the same cleanup needs to be done.
*/
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail8;
}
/*
* Determine this QP's WQE sizes (for both the Send and Recv WQEs).
* This will depend on the requested number of SGLs. Note: this
* has the side-effect of also calculating the real number of SGLs
* (for the calculated WQE size).
*
* For QP's on an SRQ, we set these to 0.
*/
if (qp_srq_en) {
qp->qp_rq_log_wqesz = 0;
} else {
}
/*
* Allocate the memory for QP work queues. Note: The location from
* which we will allocate these work queues has been passed in
* through the tavor_qp_options_t structure. Since Tavor work queues
* are not allowed to cross a 32-bit (4GB) boundary, the alignment of
* the work queue memory is very important. We used to allocate
* work queues (the combined receive and send queues) so that they
* would be aligned on their combined size. That alignment guaranteed
* that they would never cross the 4GB boundary (Tavor work queues
* are on the order of MBs at maximum). Now we are able to relax
* this alignment constraint by ensuring that the IB address assigned
* to the queue memory (as a result of the tavor_mr_register() call)
* is offset from zero.
* Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
* guarantee the alignment, but when attempting to use IOMMU bypass
* mode we found that we were not allowed to specify any alignment
* that was more restrictive than the system page size.
* So we avoided this constraint by passing two alignment values,
* one for the memory allocation itself and the other for the DMA
* handle (for later bind). This used to cause more memory than
* necessary to be allocated (in order to guarantee the more
* restrictive alignment contraint). But be guaranteeing the
* zero-based IB virtual address for the queue, we are able to
* conserve this memory.
* Note: If QP is not user-mappable, then it may come from either
* kernel system memory or from HCA-attached local DDR memory.
*/
/* QP on SRQ sets these to 0 */
if (qp_srq_en) {
rq_wqe_size = 0;
rq_size = 0;
} else {
}
if (qp_is_umap) {
} else {
}
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail8;
}
if (sq_wqe_size > rq_wqe_size) {
/*
* If QP's on an SRQ, we set the rq_buf to NULL
*/
if (qp_srq_en)
else
} else {
}
/*
* Register the memory for the QP work queues. The memory for the
* QP must be registered in the Tavor TPT tables. This gives us the
* LKey to specify in the QP context later. Note: The memory for
* Tavor work queues (both Send and Recv) must be contiguous and
* registered as a single memory region. Note also: If the work
* queue is to be allocated from DDR memory, then only a "bypass"
* mapping is appropriate. And if the QP memory is user-mappable,
* then we force DDI_DMA_CONSISTENT mapping.
* Also, in order to meet the alignment restriction, we pass the
* "mro_bind_override_addr" flag in the call to tavor_mr_register().
* This guarantees that the resulting IB vaddr will be zero-based
* (modulo the offset into the first page).
* If we fail here, we still have the bunch of resource and reference
* count cleanup to do.
*/
if (qp_is_umap) {
} else {
if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
if (dma_xfer_mode == DDI_DMA_STREAMING) {
}
} else {
}
}
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto qpalloc_fail9;
}
/*
* Calculate the offset between the kernel virtual address space
* and the IB virtual address space. This will be used when
* posting work requests to properly initialize each WQE.
*/
/*
* Fill in all the return arguments (if necessary). This includes
* real work queue sizes, real SGLs, and QP number
*/
/* QP on an SRQ set these to 0 */
if (qp_srq_en) {
} else {
}
}
}
/*
* Fill in the rest of the Tavor Queue Pair handle. We can update
* the following fields for use in further operations on the QP.
*/
qp->qp_is_special = 0;
/* QP on an SRQ sets this to 0 */
if (qp_srq_en) {
qp->qp_rq_bufsz = 0;
} else {
}
qp->qp_forward_sqd_event = 0;
qp->qp_sqd_still_draining = 0;
qp->qp_mcg_refcnt = 0;
/*
* If this QP is to be associated with an SRQ, then set the SRQ handle
* appropriately.
*/
if (qp_srq_en) {
} else {
}
/* Determine if later ddi_dma_sync will be necessary */
/* Determine the QP service type */
if (type == IBT_RC_RQP) {
} else if (type == IBT_UD_RQP) {
} else {
}
/* Zero out the QP context */
/*
* Put QP handle in Tavor QPNum-to-QPHdl list. Then fill in the
* "qphdl" and return success
*/
/*
* If this is a user-mappable QP, then we need to insert the previously
* allocated entry into the "userland resources database". This will
* allow for later lookup during devmap() (i.e. mmap()) calls.
*/
if (qp_is_umap) {
}
return (DDI_SUCCESS);
/*
* The following is cleanup for all possible failure cases in this routine
*/
if (type == IBT_RC_RQP) {
}
if (qp_is_umap) {
}
/*
* Releasing the QPN will also free up the QPC context. Update
* the QPC context pointer to indicate this.
*/
if (qpc) {
}
return (status);
}
/*
* tavor_special_qp_alloc()
* Context: Can be called only from user or kernel context.
*/
int
{
char *errormsg;
/*
* Check the "options" flag. Currently this flag tells the driver
* whether or not the QP's work queues should be come from normal
* system memory or whether they should be allocated from DDR memory.
*/
} else {
}
/*
* Extract the necessary info from the tavor_qp_info_t structure
*/
/*
* Check for valid special QP type (only SMI & GSI supported)
*/
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail;
}
/*
* Check for valid port number
*/
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail;
}
/*
* Check for valid PD handle pointer
*/
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail;
}
/* Increment the reference count on the PD */
/*
* Check for valid CQ handle pointers
*/
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail1;
}
/*
* Increment the reference count on the CQs. One or both of these
* could return error if we determine that the given CQ is already
* being used with a non-special QP (i.e. a normal QP).
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail1;
}
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail2;
}
/*
* Allocate the special QP resources. Essentially, this allocation
* amounts to checking if the request special QP has already been
* allocated. If successful, the QP context return is an actual
* QP context that has been "aliased" to act as a special QP of the
* appropriate type (and for the appropriate port). Just as in
* tavor_qp_alloc() above, ownership for this QP context is not
* immediately given to hardware in the final step here. Instead, we
* wait until the QP is later transitioned to the "Init" state before
* passing the QP to hardware. If we fail here, we must undo all
* the reference count (CQ and PD).
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail3;
}
/*
* Allocate the software structure for tracking the special queue
* pair (i.e. the Tavor Queue Pair handle). If we fail here, we
* must undo the reference counts and the previous resource allocation.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail4;
}
/*
* Actual QP number is a combination of the index of the QPC and
* the port number. This is because the special QP contexts must
* be allocated two-at-a-time.
*/
/*
* Calculate the appropriate size for the work queues.
* Note: All Tavor QP work queues must be a power-of-2 in size. Also
* they may not be any smaller than TAVOR_QP_MIN_SIZE. This step is
* to round the requested size up to the next highest power-of-2
*/
}
}
/*
* Next we verify that the rounded-up size is valid (i.e. consistent
* then obviously we have a bit of cleanup to do before returning.
*/
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail5;
}
/*
* Next we verify that the requested number of SGL is valid (i.e.
* limits). If not, then obviously the same cleanup needs to be done.
*/
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail5;
}
/*
* Determine this QP's WQE sizes (for both the Send and Recv WQEs).
* This will depend on the requested number of SGLs. Note: this
* has the side-effect of also calculating the real number of SGLs
* (for the calculated WQE size).
*/
if (type == IBT_SMI_SQP) {
} else {
}
/*
* Allocate the memory for QP work queues. Note: The location from
* which we will allocate these work queues has been passed in
* through the tavor_qp_options_t structure. Since Tavor work queues
* are not allowed to cross a 32-bit (4GB) boundary, the alignment of
* the work queue memory is very important. We used to allocate
* work queues (the combined receive and send queues) so that they
* would be aligned on their combined size. That alignment guaranteed
* that they would never cross the 4GB boundary (Tavor work queues
* are on the order of MBs at maximum). Now we are able to relax
* this alignment constraint by ensuring that the IB address assigned
* to the queue memory (as a result of the tavor_mr_register() call)
* is offset from zero.
* Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
* guarantee the alignment, but when attempting to use IOMMU bypass
* mode we found that we were not allowed to specify any alignment
* that was more restrictive than the system page size.
* So we avoided this constraint by passing two alignment values,
* one for the memory allocation itself and the other for the DMA
* handle (for later bind). This used to cause more memory than
* necessary to be allocated (in order to guarantee the more
* restrictive alignment contraint). But be guaranteeing the
* zero-based IB virtual address for the queue, we are able to
* conserve this memory.
*/
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail5;
}
if (sq_wqe_size > rq_wqe_size) {
} else {
}
/*
* Register the memory for the special QP work queues. The memory for
* the special QP must be registered in the Tavor TPT tables. This
* gives us the LKey to specify in the QP context later. Note: The
* memory for Tavor work queues (both Send and Recv) must be contiguous
* and registered as a single memory region. Note also: If the work
* queue is to be allocated from DDR memory, then only a "bypass"
* mapping is appropriate.
* Also, in order to meet the alignment restriction, we pass the
* "mro_bind_override_addr" flag in the call to tavor_mr_register().
* This guarantees that the resulting IB vaddr will be zero-based
* (modulo the offset into the first page).
* If we fail here, we have a bunch of resource and reference count
* cleanup to do.
*/
if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
if (dma_xfer_mode == DDI_DMA_STREAMING) {
}
} else {
}
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto spec_qpalloc_fail6;
}
/*
* Calculate the offset between the kernel virtual address space
* and the IB virtual address space. This will be used when
* posting work requests to properly initialize each WQE.
*/
/*
* Fill in all the return arguments (if necessary). This includes
* real work queue sizes, real SGLs, and QP number (which will be
* either zero or one, depending on the special QP type)
*/
}
/*
* Fill in the rest of the Tavor Queue Pair handle. We can update
* the following fields for use in further operations on the QP.
*/
qp->qp_is_umap = 0;
qp->qp_pkeyindx = 0;
qp->qp_mcg_refcnt = 0;
/* Determine if later ddi_dma_sync will be necessary */
/* All special QPs are UD QP service type */
/* Zero out the QP context */
/*
* Put QP handle in Tavor QPNum-to-QPHdl list. Then fill in the
* "qphdl" and return success
*/
return (DDI_SUCCESS);
/*
* The following is cleanup for all possible failure cases in this routine
*/
}
return (status);
}
/*
* tavor_qp_free()
* This function frees up the QP resources. Depending on the value
* of the "free_qp_flags", the QP number may not be released until
* a subsequent call to tavor_qp_release_qpn().
*
* Context: Can be called only from user or kernel context.
*/
/* ARGSUSED */
int
{
int status;
char *errormsg;
/*
* Pull all the necessary information from the Tavor Queue Pair
* handle. This is necessary here because the resource for the
* QP handle is going to be freed up as part of this operation.
*/
/*
* If the QP is part of an MCG, then we fail the qp_free
*/
if (qp->qp_mcg_refcnt != 0) {
goto qpfree_fail;
}
/*
* If the QP is not already in "Reset" state, then transition to
* "Reset". This is necessary because software does not reclaim
* ownership of the QP context until the QP is in the "Reset" state.
* If the ownership transfer fails for any reason, then it is an
* indication that something (either in HW or SW) has gone seriously
* wrong. So we print a warning message and return.
*/
/* Set "status" and "errormsg" and goto failure */
"reset QP context");
goto qpfree_fail;
}
/*
* Do any additional handling necessary for the transition
* to the "Reset" state (e.g. update the WRID lists)
*/
}
/*
* If this was a user-mappable QP, then we need to remove its entry
* from the "userland resources database". If it is also currently
* mmap()'d out to a user process, then we need to call
* devmap_devmem_remap() to remap the QP memory to an invalid mapping.
* We also need to invalidate the QP tracking information for the
* user mapping.
*/
if (qp->qp_is_umap) {
&umapdb);
if (status != DDI_SUCCESS) {
return (ibc_get_ci_failure(0));
}
if (status != DDI_SUCCESS) {
"devmap_devmem_remap()");
return (ibc_get_ci_failure(0));
}
}
}
/*
* Put NULL into the Tavor QPNum-to-QPHdl list. This will allow any
* in-progress events to detect that the QP corresponding to this
* number has been freed. Note: it does depend in whether we are
* freeing a special QP or not.
*/
if (qp->qp_is_special) {
} else {
}
/*
* Drop the QP lock
* At this point the lock is no longer necessary. We cannot
* protect from multiple simultaneous calls to free the same QP.
* In addition, since the QP lock is contained in the QP "software
* handle" resource, which we will free (see below), it is
* important that we have no further references to that memory.
*/
/*
* Free the QP resources
* Start by deregistering and freeing the memory for work queues.
* Next free any previously allocated context information
* (depending on QP type)
* Finally, decrement the necessary reference counts.
* If this fails for any reason, then it is an indication that
* something (either in HW or SW) has gone seriously wrong. So we
* print a warning message and return.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto qpfree_fail;
}
/* Free the memory for the QP */
/*
* Free up the remainder of the QP resources. Note: we have a few
* different resources to free up depending on whether the QP is a
* special QP or not. As described above, if any of these fail for
* any reason it is an indication that something (either in HW or SW)
* has gone seriously wrong. So we print a warning message and
* return.
*/
if (qp->qp_is_special) {
/* Free up resources for the special QP */
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
"failed special QP rsrc");
goto qpfree_fail;
}
} else {
/* Free up the RDB entries resource */
if (type == TAVOR_QP_RC) {
}
/*
* Check the flags and determine whether to release the
* QPN or not, based on their value.
*/
if (free_qp_flags == IBC_FREE_QP_ONLY) {
} else {
}
}
/* Free the Tavor Queue Pair handle */
/* Decrement the reference counts on CQs, PD and SRQ (if needed) */
if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
}
/* Set the qphdl pointer to NULL and return success */
return (DDI_SUCCESS);
return (status);
}
/*
* tavor_qp_query()
* Context: Can be called from interrupt or base context.
*/
int
{
int status;
/*
* Grab the temporary QPC entry from QP software state
*/
/* Convert the current Tavor QP state to IBTF QP state */
case TAVOR_QP_RESET:
break;
case TAVOR_QP_INIT:
break;
case TAVOR_QP_RTR:
break;
case TAVOR_QP_RTS:
break;
case TAVOR_QP_SQERR:
break;
case TAVOR_QP_SQD:
if (qp->qp_sqd_still_draining) {
} else {
}
break;
case TAVOR_QP_ERR:
break;
default:
return (ibc_get_ci_failure(0));
}
/* SRQ Hook. */
/*
* The following QP information is always returned, regardless of
* the current QP state. Note: Some special handling is necessary
* for calculating the QP number on special QP (QP0 and QP1).
*/
if (qp->qp_is_special) {
} else {
}
/*
* If QP is currently in the "Reset" state, then only the above are
* returned
*/
if (qp_state == IBT_STATE_RESET) {
return (DDI_SUCCESS);
}
/*
* Post QUERY_QP command to firmware
*
* We do a TAVOR_NOSLEEP here because we are holding the "qp_lock".
* Since we may be in the interrupt context (or subsequently raised
* to interrupt level by priority inversion), we do not want to block
* in this routine waiting for success.
*/
if (status != TAVOR_CMD_SUCCESS) {
status);
return (ibc_get_ci_failure(0));
}
/*
* Fill in the additional QP info based on the QP's transport type.
*/
/* Fill in the UD-specific info */
/* Fill in the RC-specific info */
/* Grab the path migration state information */
} else {
}
/* Get the common primary address path fields */
/* Fill in the additional primary address path fields */
/* Get the common alternate address path fields */
/* Fill in the additional alternate address path fields */
/* Get the RNR retry time from primary path */
/* Fill in the UC-specific info */
/* Grab the path migration state information */
} else {
}
/* Get the common primary address path fields */
/* Fill in the additional primary address path fields */
/* Get the common alternate address path fields */
/* Fill in the additional alternate address path fields */
/*
* Set the enable flags based on RDMA enable bits (by
* definition UC doesn't support Atomic or RDMA Read)
*/
} else {
return (ibc_get_ci_failure(0));
}
/*
* Under certain circumstances it is possible for the Tavor hardware
* to transition to one of the error states without software directly
* knowing about it. The QueryQP() call is the one place where we
* have an opportunity to sample and update our view of the QP state.
*/
}
}
return (DDI_SUCCESS);
}
/*
* tavor_qp_create_qpn()
* Context: Can be called from interrupt or base context.
*/
static int
{
/*
* Build a query (for the AVL tree lookup) and attempt to find
* a previously added entry that has a matching QPC index. If
* no matching entry is found, then allocate, initialize, and
* add an entry to the AVL tree.
* If a matching entry is found, then increment its QPN counter
* and reference counter.
*/
/*
* Allocate and initialize a QPN entry, then insert
* it into the AVL tree.
*/
sizeof (tavor_qpn_entry_t), KM_NOSLEEP);
return (DDI_FAILURE);
}
entry->qpn_refcnt = 0;
entry->qpn_counter = 0;
}
/*
* Make the AVL tree entry point to the QP context resource that
* it will be responsible for tracking
*/
/*
* Setup the QP handle to point to the AVL tree entry. Then
* generate the new QP number from the entry's QPN counter value
* and the hardware's QP context table index.
*/
/*
* Increment the reference counter and QPN counter. The QPN
* counter always indicates the next available number for use.
*/
entry->qpn_counter++;
entry->qpn_refcnt++;
return (DDI_SUCCESS);
}
/*
* tavor_qp_release_qpn()
* Context: Can be called only from user or kernel context.
*/
void
{
/*
* If we are releasing the QP number here, then we decrement the
* reference count and check for zero references. If there are
* zero references, then we free the QPC context (if it hadn't
* already been freed during a TAVOR_QPN_FREE_ONLY free, i.e. for
* reuse with another similar QP number) and remove the tracking
* structure from the QP number AVL tree and free the structure.
* If we are not releasing the QP number here, then, as long as we
* have not exhausted the usefulness of the QPC context (that is,
* re-used it too many times without the reference count having
* gone to zero), we free up the QPC context for use by another
* thread (which will use it to construct a different QP number
* from the same QPC table index).
*/
if (flags == TAVOR_QPN_RELEASE) {
entry->qpn_refcnt--;
/*
* If the reference count is zero, then we free the QPC
* context (if it hadn't already been freed in an early
* tracking structure from the QP number AVL tree.
*/
if (entry->qpn_refcnt == 0) {
}
/*
* If the current entry has served it's useful
* purpose (i.e. been reused the maximum allowable
* number of times), then remove it from QP number
* AVL tree and free it up.
*/
}
}
} else if (flags == TAVOR_QPN_FREE_ONLY) {
/*
* Even if we are not freeing the QP number, that will not
* always prevent us from releasing the QPC context. In fact,
* since the QPC context only forms part of the whole QPN,
* we want to free it up for use by other consumers. But
* if the reference count is non-zero (which it will always
* be when we are doing TAVOR_QPN_FREE_ONLY) and the counter
* has reached its maximum value, then we cannot reuse the
* QPC context until the reference count eventually reaches
* zero (in TAVOR_QPN_RELEASE, above).
*/
}
}
}
/*
* tavor_qpn_db_compare()
* Context: Can be called from user or kernel context.
*/
static int
tavor_qpn_avl_compare(const void *q, const void *e)
{
entry = (tavor_qpn_entry_t *)e;
query = (tavor_qpn_entry_t *)q;
return (-1);
return (+1);
} else {
return (0);
}
}
/*
* tavor_qpn_avl_init()
* Context: Only called from attach() path context
*/
void
{
/* Initialize the lock used for QP number (QPN) AVL tree access */
/* Initialize the AVL tree for the QP number (QPN) storage */
sizeof (tavor_qpn_entry_t),
}
/*
* tavor_qpn_avl_fini()
*/
void
{
void *cookie;
/*
* Empty all entries (if necessary) and destroy the AVL tree
* that was used for QP number (QPN) tracking.
*/
}
/* Destroy the lock used for QP number (QPN) AVL tree access */
}
/*
* tavor_qphdl_from_qpnum()
* Context: Can be called from interrupt or base context.
*
* This routine is important because changing the unconstrained
* portion of the QP number is critical to the detection of a
* potential race condition in the QP event handler code (i.e. the case
* where a QP is freed and alloc'd again before an event for the
* "old" QP can be handled).
*
* While this is not a perfect solution (not sure that one exists)
* it does help to mitigate the chance that this race condition will
* cause us to deliver a "stale" event to the new QP owner. Note:
* this solution does not scale well because the number of constrained
* bits increases (and, hence, the number of unconstrained bits
* decreases) as the number of supported QPs grows. For small and
* intermediate values, it should hopefully provide sufficient
* protection.
*/
{
/* Calculate the QP table index from the qpnum */
}
/*
* tavor_special_qp_rsrc_alloc
* Context: Can be called from interrupt or base context.
*/
static int
{
int status;
if (type == IBT_SMI_SQP) {
/*
* Check here to see if the driver has been configured
* to instruct the Tavor firmware to handle all incoming
* SMP messages (i.e. messages sent to SMA). If so,
* then we will treat QP0 as if it has already been
* allocated (for internal use). Otherwise, if we allow
* the allocation to happen, it will cause unexpected
* behaviors (e.g. Tavor SMA becomes unresponsive).
*/
TAVOR_TNF_ERROR, "");
return (IBT_QP_IN_USE);
}
/*
* If this is the first QP0 allocation, then post
* a CONF_SPECIAL_QP firmware command
*/
if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) {
if (status != TAVOR_CMD_SUCCESS) {
"command failed: %08x\n", status);
status);
return (IBT_INSUFF_RESOURCE);
}
}
/*
* Now check (and, if necessary, modify) the flags to indicate
* whether the allocation was successful
*/
return (IBT_QP_IN_USE);
}
} else {
/*
* If this is the first QP1 allocation, then post
* a CONF_SPECIAL_QP firmware command
*/
if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) {
if (status != TAVOR_CMD_SUCCESS) {
"command failed: %08x\n", status);
status);
return (IBT_INSUFF_RESOURCE);
}
}
/*
* Now check (and, if necessary, modify) the flags to indicate
* whether the allocation was successful
*/
TAVOR_TNF_ERROR, "");
return (IBT_QP_IN_USE);
}
}
return (DDI_SUCCESS);
}
/*
* tavor_special_qp_rsrc_free
* Context: Can be called from interrupt or base context.
*/
static int
{
int status;
if (type == IBT_SMI_SQP) {
/*
* If this is the last QP0 free, then post a CONF_SPECIAL_QP
* firmware command
*/
if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) {
if (status != TAVOR_CMD_SUCCESS) {
"command failed: %08x\n", status);
status);
return (ibc_get_ci_failure(0));
}
}
} else {
/*
* If this is the last QP1 free, then post a CONF_SPECIAL_QP
* firmware command
*/
if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) {
if (status != TAVOR_CMD_SUCCESS) {
"command failed: %08x\n", status);
status);
return (ibc_get_ci_failure(0));
}
}
}
return (DDI_SUCCESS);
}
/*
* tavor_qp_sgl_to_logwqesz()
* Context: Can be called from interrupt or base context.
*/
static void
{
switch (wq_type) {
case TAVOR_QP_WQ_TYPE_SENDQ:
/*
* Use requested maximum SGL to calculate max descriptor size
* (while guaranteeing that the descriptor size is a
* power-of-2 cachelines).
*/
}
/* Make sure descriptor is at least the minimum size */
/* Calculate actual number of SGL (given WQE size) */
break;
case TAVOR_QP_WQ_TYPE_RECVQ:
/*
* Same as above (except for Recv WQEs)
*/
}
/* Make sure descriptor is at least the minimum size */
/* Calculate actual number of SGL (given WQE size) */
break;
/*
* Same as above (except for MLX transport WQEs). For these
* WQEs we have to account for the space consumed by the
* "inline" packet headers. (This is smaller than for QP1
* below because QP0 is not allowed to send packets with a GRH.
*/
}
/* Make sure descriptor is at least the minimum size */
/* Calculate actual number of SGL (given WQE size) */
break;
/*
* Same as above. For these WQEs we again have to account for
* the space consumed by the "inline" packet headers. (This
* is larger than for QP0 above because we have to account for
* the possibility of a GRH in each packet - and this
* introduces an alignment issue that causes us to consume
* an additional 8 bytes).
*/
}
/* Make sure descriptor is at least the minimum size */
/* Calculate actual number of SGL (given WQE size) */
break;
default:
TAVOR_TNF_ERROR, "");
break;
}
/* Fill in the return values */
}