tavor_mr.c revision c7facc54c4abed9e554ff80225311e6b7048d3c9
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
*
* Implements all the routines necessary to provide the requisite memory
* registration verbs. These include operations like RegisterMemRegion(),
* DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
* etc., that affect Memory Regions. It also includes the verbs that
* affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
* and QueryMemWindow().
*/
/*
* Used by tavor_mr_keycalc() below to fill in the "unconstrained" portion
* of Tavor memory keys (LKeys and RKeys)
*/
/*
* The Tavor umem_lockmemory() callback ops. When userland memory is
* registered, these callback ops are specified. The tavor_umap_umemlock_cb()
* callback will be called whenever the memory for the corresponding
* ddi_umem_cookie_t is being freed.
*/
static struct umem_callback_ops tavor_umem_cbops = {
};
/*
* tavor_mr_register()
* Context: Can be called from interrupt or base context.
*/
int
{
int status;
/*
* Fill in the "bind" struct. This struct provides the majority
* of the information that will be used to distinguish between an
* "addr" binding (as is the case here) and a "buf" binding (see
* below). The "bind" struct is later passed to tavor_mr_mem_bind()
* which does most of the "heavy lifting" for the Tavor memory
* registration routines.
*/
if (status != DDI_SUCCESS) {
TAVOR_TNF_ERROR, "");
return (status);
}
return (DDI_SUCCESS);
}
/*
* tavor_mr_register_buf()
* Context: Can be called from interrupt or base context.
*/
int
{
int status;
/*
* Fill in the "bind" struct. This struct provides the majority
* of the information that will be used to distinguish between an
* "addr" binding (see above) and a "buf" binding (as is the case
* here). The "bind" struct is later passed to tavor_mr_mem_bind()
* which does most of the "heavy lifting" for the Tavor memory
* registration routines. Note: We have chosen to provide
* "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
* not set). It is not critical what value we choose here as it need
* only be unique for the given RKey (which will happen by default),
* so the choice here is somewhat arbitrary.
*/
} else {
}
if (status != DDI_SUCCESS) {
TAVOR_TNF_ERROR, "");
return (status);
}
return (DDI_SUCCESS);
}
/*
* tavor_mr_register_shared()
* Context: Can be called from interrupt or base context.
*/
int
{
int status, umem_flags;
char *errormsg;
/*
* Check the sleep flag. Ensure that it is consistent with the
* current thread context (i.e. if we are currently in the interrupt
* context, then we shouldn't be attempting to sleep).
*/
if ((sleep == TAVOR_SLEEP) &&
(sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
/* Set "status" and "errormsg" and goto failure */
goto mrshared_fail;
}
/* Increment the reference count on the protection domain (PD) */
/*
* Allocate an MPT entry. This will be filled in with all the
* necessary parameters to define the shared memory region.
* Specifically, it will be made to reference the currently existing
* MTT entries and ownership of the MPT will be passed to the hardware
* in the last step below. If we fail here, we must undo the
* protection domain reference count.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto mrshared_fail1;
}
/*
* Allocate the software structure for tracking the shared memory
* region (i.e. the Tavor Memory Region handle). If we fail here, we
* must undo the protection domain reference count and the previous
* resource allocation.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto mrshared_fail2;
}
/*
* Setup and validate the memory region access flags. This means
* translating the IBTF's enable flags into the access flags that
* will be used in later operations.
*/
mr->mr_accflag = 0;
/*
* Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
* from a certain number of "constrained" bits (the least significant
* bits) and some number of "unconstrained" bits. The constrained
* bits must be set to the index of the entry in the MPT table, but
* the unconstrained bits can be set to any value we wish. Note:
* if no remote access is required, then the RKey value is not filled
* in. Otherwise both Rkey and LKey are given the same value.
*/
}
/* Grab the MR lock for the current memory region */
/*
* Check here to see if the memory region has already been partially
* deregistered as a result of a tavor_umap_umemlock_cb() callback.
* If so, this is an error, return failure.
*/
/* Set "status" and "errormsg" and goto failure */
goto mrshared_fail3;
}
/*
* Determine if the original memory was from userland and, if so, pin
* the pages (again) with umem_lockmemory(). This will guarantee a
* separate callback for each of this shared region's MR handles.
* If this is userland memory, then allocate an entry in the
* "userland resources database". This will later be added to
* the database (after all further memory registration operations are
* successful). If we fail here, we must undo all the above setup.
*/
if (mr_is_umem) {
~PAGEOFFSET);
if (status != 0) {
/* Set "status" and "errormsg" and goto failure */
goto mrshared_fail3;
}
/* Set "status" and "errormsg" and goto failure */
goto mrshared_fail4;
}
}
/*
* Copy the MTT resource pointer (and additional parameters) from
* the original Tavor Memory Region handle. Note: this is normally
* where the tavor_mr_mem_bind() routine would be called, but because
* we already have bound and filled-in MTT entries it is simply a
* matter here of managing the MTT reference count and grabbing the
* address of the MTT table entries (for filling in the shared region's
* MPT entry).
*/
/*
* Increment the MTT reference count (to reflect the fact that
* the MTT is now shared)
*/
/*
* Update the new "bind" virtual address. Do some extra work here
* to ensure proper alignment. That is, make sure that the page
* offset for the beginning of the old range is the same as the
* offset for this new mapping
*/
/*
* Get the base address for the MTT table. This will be necessary
* in the next step when we are setting up the MPT entry.
*/
/*
* Fill in the MPT entry. This is the final step before passing
* ownership of the MPT entry to the Tavor hardware. We use all of
* the information collected/calculated above to fill in the
* requisite portions of the MPT.
*/
/*
* Write the MPT entry to hardware. Lastly, we pass ownership of
* the entry to the hardware. Note: in general, this operation
* shouldn't fail. But if it does, we have to undo everything we've
* done above before returning error.
*/
if (status != TAVOR_CMD_SUCCESS) {
status);
/* Set "status" and "errormsg" and goto failure */
"tavor SW2HW_MPT command");
goto mrshared_fail5;
}
/*
* Fill in the rest of the Tavor Memory Region handle. Having
* successfully transferred ownership of the MPT, we can update the
* following fields for use in further operations on the MR.
*/
/*
* If this is userland memory, then we need to insert the previously
* allocated entry into the "userland resources database". This will
* allow for later coordination between the tavor_umap_umemlock_cb()
* callback and tavor_mr_deregister().
*/
if (mr_is_umem) {
}
return (DDI_SUCCESS);
/*
* The following is cleanup for all possible failure cases in this routine
*/
if (mr_is_umem) {
}
if (mr_is_umem) {
}
return (status);
}
/*
* tavor_mr_deregister()
* Context: Can be called from interrupt or base context.
*/
/* ARGSUSED */
int
{
int status, shared_mtt;
char *errormsg;
/*
* Check the sleep flag. Ensure that it is consistent with the
* current thread context (i.e. if we are currently in the interrupt
* context, then we shouldn't be attempting to sleep).
*/
if ((sleep == TAVOR_SLEEP) &&
(sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
/* Set "status" and "errormsg" and goto failure */
return (status);
}
/*
* Pull all the necessary information from the Tavor Memory Region
* handle. This is necessary here because the resource for the
* MR handle is going to be freed up as part of the this
* deregistration
*/
/*
* Check here to see if the memory region has already been partially
* deregistered as a result of the tavor_umap_umemlock_cb() callback.
* If so, then jump to the end and free the remaining resources.
*/
goto mrdereg_finish_cleanup;
}
/*
* We must drop the "mr_lock" here to ensure that both SLEEP and
* NOSLEEP calls into the firmware work as expected. Also, if two
* threads are attemping to access this MR (via de-register,
* re-register, or otherwise), then we allow the firmware to enforce
* the checking, that only one deregister is valid.
*/
/*
* Reclaim MPT entry from hardware (if necessary). Since the
* tavor_mr_deregister() routine is used in the memory region
* reregistration process as well, it is possible that we will
* not always wish to reclaim ownership of the MPT. Check the
* "level" arg and, if necessary, attempt to reclaim it. If
* the ownership transfer fails for any reason, we check to see
* what command status was returned from the hardware. The only
* "expected" error status is the one that indicates an attempt to
* deregister a memory region that has memory windows bound to it
*/
if (level >= TAVOR_MR_DEREG_ALL) {
if (status != TAVOR_CMD_SUCCESS) {
if (status == TAVOR_CMD_REG_BOUND) {
return (IBT_MR_IN_USE);
} else {
"failed: %08x\n", status);
status);
return (IBT_INVALID_PARAM);
}
}
}
/*
* Re-grab the mr_lock here. Since further access to the protected
* 'mr' structure is needed, and we would have returned previously for
* the multiple deregistration case, we can safely grab the lock here.
*/
/*
* If the memory had come from userland, then we do a lookup in the
* "userland resources database". On success, we free the entry, call
* ddi_umem_unlock(), and continue the cleanup. On failure (which is
* an indication that the umem_lockmemory() callback has called
* tavor_mr_deregister()), we call ddi_umem_unlock() and invalidate
* the "mr_umemcookie" field in the MR handle (this will be used
* later to detect that only partial cleaup still remains to be done
* on the MR handle).
*/
if (mr->mr_is_umem) {
&umapdb);
if (status == DDI_SUCCESS) {
} else {
}
}
/*
* Decrement the MTT reference count. Since the MTT resource
* may be shared between multiple memory regions (as a result
* of a "RegisterSharedMR" verb) it is important that we not
* free up or unbind resources prematurely. If it's not shared (as
* indicated by the return status), then free the resource.
*/
if (!shared_mtt) {
}
/*
* Free up the MTT entries and unbind the memory. Here, as above, we
* attempt to free these resources only if it is appropriate to do so.
*/
if (!shared_mtt) {
if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) {
}
}
/*
* If the MR handle has been invalidated, then drop the
* lock and return success. Note: This only happens because
* the umem_lockmemory() callback has been triggered. The
* cleanup here is partial, and further cleanup (in a
* subsequent tavor_mr_deregister() call) will be necessary.
*/
return (DDI_SUCCESS);
}
/* Free the Tavor Memory Region handle */
/* Free up the MPT entry resource */
/* Decrement the reference count on the protection domain (PD) */
/* Set the mrhdl pointer to NULL and return success */
return (DDI_SUCCESS);
}
/*
* tavor_mr_query()
* Context: Can be called from interrupt or base context.
*/
/* ARGSUSED */
int
{
/*
* Check here to see if the memory region has already been partially
* deregistered as a result of a tavor_umap_umemlock_cb() callback.
* If so, this is an error, return failure.
*/
return (IBT_MR_HDL_INVALID);
}
/* Fill in the queried attributes */
/* Fill in the "local" attributes */
/*
* Fill in the "remote" attributes (if necessary). Note: the
* remote attributes are only valid if the memory region has one
* or more of the remote access flags set.
*/
}
/*
* If region is mapped for streaming (i.e. noncoherent), then set sync
* is required
*/
return (DDI_SUCCESS);
}
/*
* tavor_mr_reregister()
* Context: Can be called from interrupt or base context.
*/
int
{
int status;
/*
* Fill in the "bind" struct. This struct provides the majority
* of the information that will be used to distinguish between an
* "addr" binding (as is the case here) and a "buf" binding (see
* below). The "bind" struct is later passed to tavor_mr_mem_bind()
* which does most of the "heavy lifting" for the Tavor memory
* registration (and reregistration) routines.
*/
if (status != DDI_SUCCESS) {
TAVOR_TNF_ERROR, "");
return (status);
}
return (DDI_SUCCESS);
}
/*
* tavor_mr_reregister_buf()
* Context: Can be called from interrupt or base context.
*/
int
{
int status;
/*
* Fill in the "bind" struct. This struct provides the majority
* of the information that will be used to distinguish between an
* "addr" binding (see above) and a "buf" binding (as is the case
* here). The "bind" struct is later passed to tavor_mr_mem_bind()
* which does most of the "heavy lifting" for the Tavor memory
* registration routines. Note: We have chosen to provide
* "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
* not set). It is not critical what value we choose here as it need
* only be unique for the given RKey (which will happen by default),
* so the choice here is somewhat arbitrary.
*/
} else {
}
if (status != DDI_SUCCESS) {
TAVOR_TNF_ERROR, "");
return (status);
}
return (DDI_SUCCESS);
}
/*
* tavor_mr_sync()
* Context: Can be called from interrupt or base context.
*/
/* ARGSUSED */
int
{
int status, i;
char *errormsg;
/* Process each of the ibt_mr_sync_t's */
for (i = 0; i < num_segs; i++) {
/* Check for valid memory region handle */
/* Set "status" and "errormsg" and goto failure */
goto mrsync_fail;
}
/*
* Check here to see if the memory region has already been
* partially deregistered as a result of a
* tavor_umap_umemlock_cb() callback. If so, this is an
* error, return failure.
*/
/* Set "status" and "errormsg" and goto failure */
goto mrsync_fail;
}
/* Check for valid bounds on sync request */
/* Set "status" and "errormsg" and goto failure */
goto mrsync_fail;
}
/* Set "status" and "errormsg" and goto failure */
goto mrsync_fail;
}
/* Determine what type (i.e. direction) for sync */
} else {
/* Set "status" and "errormsg" and goto failure */
goto mrsync_fail;
}
}
return (DDI_SUCCESS);
errormsg);
return (status);
}
/*
* tavor_mw_alloc()
* Context: Can be called from interrupt or base context.
*/
int
{
int status;
char *errormsg;
/*
* Check the sleep flag. Ensure that it is consistent with the
* current thread context (i.e. if we are currently in the interrupt
* context, then we shouldn't be attempting to sleep).
*/
if ((sleep == TAVOR_SLEEP) &&
(sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
/* Set "status" and "errormsg" and goto failure */
goto mwalloc_fail;
}
/* Increment the reference count on the protection domain (PD) */
/*
* Allocate an MPT entry (for use as a memory window). Since the
* Tavor hardware uses the MPT entry for memory regions and for
* memory windows, we will fill in this MPT with all the necessary
* parameters for the memory window. And then (just as we do for
* memory regions) ownership will be passed to the hardware in the
* final step below. If we fail here, we must undo the protection
* domain reference count.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto mwalloc_fail1;
}
/*
* Allocate the software structure for tracking the memory window (i.e.
* the Tavor Memory Window handle). Note: This is actually the same
* software structure used for tracking memory regions, but since many
* of the same properties are needed, only a single structure is
* necessary. If we fail here, we must undo the protection domain
* reference count and the previous resource allocation.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto mwalloc_fail2;
}
/*
* Calculate an "unbound" RKey from MPT index. In much the same way
* as we do for memory regions (above), this key is constructed from
* a "constrained" (which depends on the MPT index) and an
* "unconstrained" portion (which may be arbitrarily chosen).
*/
/*
* Fill in the MPT entry. This is the final step before passing
* ownership of the MPT entry to the Tavor hardware. We use all of
* the information collected/calculated above to fill in the
* requisite portions of the MPT. Note: fewer entries in the MPT
* entry are necessary to allocate a memory window.
*/
/*
* Write the MPT entry to hardware. Lastly, we pass ownership of
* the entry to the hardware. Note: in general, this operation
* shouldn't fail. But if it does, we have to undo everything we've
* done above before returning error.
*/
if (status != TAVOR_CMD_SUCCESS) {
status);
/* Set "status" and "errormsg" and goto failure */
"tavor SW2HW_MPT command");
goto mwalloc_fail3;
}
/*
* Fill in the rest of the Tavor Memory Window handle. Having
* successfully transferred ownership of the MPT, we can update the
* following fields for use in further operations on the MW.
*/
return (DDI_SUCCESS);
return (status);
}
/*
* tavor_mw_free()
* Context: Can be called from interrupt or base context.
*/
int
{
int status;
char *errormsg;
/*
* Check the sleep flag. Ensure that it is consistent with the
* current thread context (i.e. if we are currently in the interrupt
* context, then we shouldn't be attempting to sleep).
*/
if ((sleep == TAVOR_SLEEP) &&
(sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
/* Set "status" and "errormsg" and goto failure */
return (status);
}
/*
* Pull all the necessary information from the Tavor Memory Window
* handle. This is necessary here because the resource for the
* MW handle is going to be freed up as part of the this operation.
*/
/*
* Reclaim the MPT entry from hardware. Note: in general, it is
* unexpected for this operation to return an error.
*/
if (status != TAVOR_CMD_SUCCESS) {
status);
return (IBT_INVALID_PARAM);
}
/* Free the Tavor Memory Window handle */
/* Free up the MPT entry resource */
/* Decrement the reference count on the protection domain (PD) */
/* Set the mwhdl pointer to NULL and return success */
return (DDI_SUCCESS);
}
/*
* tavor_mr_keycalc()
* Context: Can be called from interrupt or base context.
*/
void
{
/*
* Generate a simple key from counter. Note: We increment this
* static variable _intentionally_ without any kind of mutex around
* it. First, single-threading all operations through a single lock
* would be a bad idea (from a performance point-of-view). Second,
* the upper "unconstrained" bits don't really have to be unique
* because the lower bits are guaranteed to be (although we do make a
* best effort to ensure that they are). Third, the window for the
* race (where both threads read and update the counter at the same
* time) is incredibly small.
* And, lastly, we'd like to make this into a "random" key XXX
*/
}
/*
* tavor_mr_common_reg()
* Context: Can be called from interrupt or base context.
*/
static int
{
char *errormsg;
/*
* Check the "options" flag. Currently this flag tells the driver
* whether or not the region should be bound normally (i.e. with
* entries written into the PCI IOMMU), whether it should be
* registered to bypass the IOMMU, and whether or not the resulting
* address should be "zero-based" (to aid the alignment restrictions
* for QPs).
*/
bind_dmahdl = NULL;
bind_override_addr = 0;
} else {
}
/* Extract the flags field from the tavor_bind_info_t */
/*
* Check for invalid length. Check is the length is zero or if the
* length is larger than the maximum configured value. Return error
* if it is.
*/
/* Set "status" and "errormsg" and goto failure */
goto mrcommon_fail;
}
/*
* Check the sleep flag. Ensure that it is consistent with the
* current thread context (i.e. if we are currently in the interrupt
* context, then we shouldn't be attempting to sleep).
*/
if ((sleep == TAVOR_SLEEP) &&
(sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
/* Set "status" and "errormsg" and goto failure */
goto mrcommon_fail;
}
/*
* Get the base address for the MTT table. This will be necessary
* below when we are setting up the MPT entry.
*/
/* Increment the reference count on the protection domain (PD) */
/*
* Allocate an MPT entry. This will be filled in with all the
* necessary parameters to define the memory region. And then
* ownership will be passed to the hardware in the final step
* below. If we fail here, we must undo the protection domain
* reference count.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto mrcommon_fail1;
}
/*
* Allocate the software structure for tracking the memory region (i.e.
* the Tavor Memory Region handle). If we fail here, we must undo
* the protection domain reference count and the previous resource
* allocation.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto mrcommon_fail2;
}
/*
* Setup and validate the memory region access flags. This means
* translating the IBTF's enable flags into the access flags that
* will be used in later operations.
*/
mr->mr_accflag = 0;
if (flags & IBT_MR_ENABLE_WINDOW_BIND)
if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
if (flags & IBT_MR_ENABLE_REMOTE_READ)
if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
/*
* Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
* from a certain number of "constrained" bits (the least significant
* bits) and some number of "unconstrained" bits. The constrained
* bits must be set to the index of the entry in the MPT table, but
* the unconstrained bits can be set to any value we wish. Note:
* if no remote access is required, then the RKey value is not filled
* in. Otherwise both Rkey and LKey are given the same value.
*/
}
/*
* Determine if the memory is from userland and pin the pages
* with umem_lockmemory() if necessary.
* Then, if this is userland memory, allocate an entry in the
* "userland resources database". This will later be added to
* the database (after all further memory registration operations are
* successful). If we fail here, we must undo the reference counts
* and the previous resource allocations.
*/
if (mr_is_umem) {
if (status != 0) {
/* Set "status" and "errormsg" and goto failure */
goto mrcommon_fail3;
}
/* Set "status" and "errormsg" and goto failure */
goto mrcommon_fail3;
}
/* Set "status" and "errormsg" and goto failure */
goto mrcommon_fail4;
}
}
/*
* Setup the bindinfo for the mtt bind call
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
/*
* When mtt_bind fails, freerbuf has already been done,
* so make sure not to call it again.
*/
goto mrcommon_fail5;
}
/*
* Allocate MTT reference count (to track shared memory regions).
* This reference count resource may never be used on the given
* memory region, but if it is ever later registered as "shared"
* memory region then this resource will be necessary. If we fail
* here, we do pretty much the same as above to clean up.
*/
&mtt_refcnt);
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto mrcommon_fail6;
}
/*
* Fill in the MPT entry. This is the final step before passing
* ownership of the MPT entry to the Tavor hardware. We use all of
* the information collected/calculated above to fill in the
* requisite portions of the MPT.
*/
if (bind_override_addr == 0) {
} else {
}
/*
* Write the MPT entry to hardware. Lastly, we pass ownership of
* the entry to the hardware. Note: in general, this operation
* shouldn't fail. But if it does, we have to undo everything we've
* done above before returning error.
*/
if (status != TAVOR_CMD_SUCCESS) {
status);
/* Set "status" and "errormsg" and goto failure */
"tavor SW2HW_MPT command");
goto mrcommon_fail7;
}
/*
* Fill in the rest of the Tavor Memory Region handle. Having
* successfully transferred ownership of the MPT, we can update the
* following fields for use in further operations on the MR.
*/
/*
* If this is userland memory, then we need to insert the previously
* allocated entry into the "userland resources database". This will
* allow for later coordination between the tavor_umap_umemlock_cb()
* callback and tavor_mr_deregister().
*/
if (mr_is_umem) {
}
return (DDI_SUCCESS);
/*
* The following is cleanup for all possible failure cases in this routine
*/
if (mr_is_umem) {
}
if (mr_is_umem) {
/*
* Free up the memory ddi_umem_iosetup() allocates
* internally.
*/
}
}
return (status);
}
/*
* tavor_mr_mtt_bind()
* Context: Can be called from interrupt or base context.
*/
int
{
int status;
char *errormsg;
/*
* Check the sleep flag. Ensure that it is consistent with the
* current thread context (i.e. if we are currently in the interrupt
* context, then we shouldn't be attempting to sleep).
*/
if ((sleep == TAVOR_SLEEP) &&
(sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
/* Set "status" and "errormsg" and goto failure */
goto mrmttbind_fail;
}
/*
* Bind the memory and determine the mapped addresses. This is
* the first of two routines that do all the "heavy lifting" for
* the Tavor memory registration routines. The tavor_mr_mem_bind()
* routine takes the "bind" struct with all its fields filled
* in and returns a list of DMA cookies (for the PCI mapped addresses
* corresponding to the specified address region) which are used by
* the tavor_mr_fast_mtt_write() routine below. If we fail here, we
* must undo all the previous resource allocation (and PD reference
* count).
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto mrmttbind_fail;
}
/*
* Determine number of pages spanned. This routine uses the
* information in the "bind" struct to determine the required
* number of MTT entries needed (and returns the suggested page size -
* as a "power-of-2" - for each MTT entry).
*/
/*
* Allocate the MTT entries. Use the calculations performed above to
* allocate the required number of MTT entries. Note: MTT entries are
* allocated in "MTT segments" which consist of complete cachelines
* (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG()
* macro is used to do the proper conversion. If we fail here, we
* must not only undo all the previous resource allocation (and PD
* reference count), but we must also unbind the memory.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto mrmttbind_fail2;
}
/*
* Write the mapped addresses into the MTT entries. This is part two
* of the "heavy lifting" routines that we talked about above. Note:
* we pass the suggested page size from the earlier operation here.
* And if we fail here, we again do pretty much the same huge clean up.
*/
if (status != DDI_SUCCESS) {
/* Set "status" and "errormsg" and goto failure */
goto mrmttbind_fail3;
}
return (DDI_SUCCESS);
/*
* The following is cleanup for all possible failure cases in this routine
*/
return (status);
}
/*
* tavor_mr_mtt_unbind()
* Context: Can be called from interrupt or base context.
*/
int
{
/*
* Free up the MTT entries and unbind the memory. Here, as above, we
* attempt to free these resources only if it is appropriate to do so.
*/
return (DDI_SUCCESS);
}
/*
* tavor_mr_common_rereg()
* Context: Can be called from interrupt or base context.
*/
static int
{
int status;
char *errormsg;
/*
* Check here to see if the memory region corresponds to a userland
* mapping. Reregistration of userland memory regions is not
* currently supported. Return failure. XXX
*/
if (mr->mr_is_umem) {
/* Set "status" and "errormsg" and goto failure */
goto mrrereg_fail;
}
/* Pull MPT resource pointer from the Tavor Memory Region handle */
/* Extract the flags field from the tavor_bind_info_t */
/*
* Check the sleep flag. Ensure that it is consistent with the
* current thread context (i.e. if we are currently in the interrupt
* context, then we shouldn't be attempting to sleep).
*/
if ((sleep == TAVOR_SLEEP) &&
(sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
/* Set "status" and "errormsg" and goto failure */
goto mrrereg_fail;
}
/*
* First step is to temporarily invalidate the MPT entry. This
* regains ownership from the hardware, and gives us the opportunity
* to modify the entry. Note: The HW2SW_MPT command returns the
* current MPT entry contents. These are saved away here because
* they will be reused in a later step below. If the region has
* bound memory windows that we fail returning an "in use" error code.
* Otherwise, this is an unexpected error and we deregister the
* memory region and return error.
*
* We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect
* against holding the lock around this rereg call in all contexts.
*/
if (status != TAVOR_CMD_SUCCESS) {
if (status == TAVOR_CMD_REG_BOUND) {
return (IBT_MR_IN_USE);
} else {
"%08x\n", status);
/*
* Call deregister and ensure that all current
* resources get freed up
*/
"memory region");
}
return (ibc_get_ci_failure(0));
}
}
/*
* If we're changing the protection domain, then validate the new one
*/
if (flags & IBT_MR_CHANGE_PD) {
/* Check for valid PD handle pointer */
/*
* Call deregister and ensure that all current
* resources get properly freed up. Unnecessary
* here to attempt to regain software ownership
* of the MPT entry as that has already been
* done above.
*/
DDI_SUCCESS) {
"memory region");
}
/* Set "status" and "errormsg" and goto failure */
goto mrrereg_fail;
}
/* Use the new PD handle in all operations below */
} else {
/* Use the current PD handle in all operations below */
}
/*
* If we're changing access permissions, then validate the new ones
*/
if (flags & IBT_MR_CHANGE_ACCESS) {
/*
* Validate the access flags. Both remote write and remote
* atomic require the local write flag to be set
*/
if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
(flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
!(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
/*
* Call deregister and ensure that all current
* resources get properly freed up. Unnecessary
* here to attempt to regain software ownership
* of the MPT entry as that has already been
* done above.
*/
DDI_SUCCESS) {
"memory region");
}
/* Set "status" and "errormsg" and goto failure */
"invalid access flags");
goto mrrereg_fail;
}
/*
* Setup and validate the memory region access flags. This
* means translating the IBTF's enable flags into the access
* flags that will be used in later operations.
*/
acc_flags_to_use = 0;
if (flags & IBT_MR_ENABLE_WINDOW_BIND)
if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
if (flags & IBT_MR_ENABLE_REMOTE_READ)
if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
} else {
}
/*
* If we're modifying the translation, then figure out whether
* we can reuse the current MTT resources. This means calling
* tavor_mr_rereg_xlat_helper() which does most of the heavy lifting
* for the reregistration. If the current memory region contains
* sufficient MTT entries for the new regions, then it will be
* reused and filled in. Otherwise, new entries will be allocated,
* the old ones will be freed, and the new entries will be filled
* in. Note: If we're not modifying the translation, then we
* should already have all the information we need to update the MPT.
* Also note: If tavor_mr_rereg_xlat_helper() fails, it will return
* a "dereg_level" which is the level of cleanup that needs to be
* passed to tavor_mr_deregister() to finish the cleanup.
*/
if (flags & IBT_MR_CHANGE_TRANSLATION) {
if (status != DDI_SUCCESS) {
/*
* Call deregister and ensure that all resources get
* properly freed up.
*/
sleep) != DDI_SUCCESS) {
"memory region");
}
/* Set "status" and "errormsg" and goto failure */
goto mrrereg_fail;
}
} else {
}
/*
* Calculate new keys (Lkey, Rkey) from MPT index. Just like they were
* when the region was first registered, each key is formed from
* "constrained" bits and "unconstrained" bits. Note: If no remote
* access is required, then the RKey value is not filled in. Otherwise
* both Rkey and LKey are given the same value.
*/
if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
}
/*
* Update the MPT entry with the new information. Some of this
* information is retained from the previous operation, some of
* it is new based on request.
*/
/*
* Write the updated MPT entry to hardware
*
* We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect
* against holding the lock around this rereg call in all contexts.
*/
if (status != TAVOR_CMD_SUCCESS) {
status);
/*
* Call deregister and ensure that all current resources get
* properly freed up. Unnecessary here to attempt to regain
* software ownership of the MPT entry as that has already
* been done above.
*/
"region");
}
return (ibc_get_ci_failure(0));
}
/*
* If we're changing PD, then update their reference counts now.
* This means decrementing the reference count on the old PD and
* incrementing the reference count on the new PD.
*/
if (flags & IBT_MR_CHANGE_PD) {
}
/*
* Update the contents of the Tavor Memory Region handle to reflect
* what has been changed.
*/
mr->mr_is_umem = 0;
/* New MR handle is same as the old */
return (DDI_SUCCESS);
return (status);
}
/*
* tavor_mr_rereg_xlat_helper
* Context: Can be called from interrupt or base context.
* Note: This routine expects the "mr_lock" to be held when it
* is called. Upon returning failure, this routine passes information
* about what "dereg_level" should be passed to tavor_mr_deregister().
*/
static int
{
int status;
char *errormsg;
/*
* Check the "options" flag. Currently this flag tells the driver
* whether or not the region should be bound normally (i.e. with
* entries written into the PCI IOMMU) or whether it should be
* registered to bypass the IOMMU.
*/
} else {
}
/*
* Check for invalid length. Check is the length is zero or if the
* length is larger than the maximum configured value. Return error
* if it is.
*/
/*
* Deregister will be called upon returning failure from this
* routine. This will ensure that all current resources get
* properly freed up. Unnecessary to attempt to regain
* software ownership of the MPT entry as that has already
* been done above (in tavor_mr_reregister())
*/
/* Set "status" and "errormsg" and goto failure */
goto mrrereghelp_fail;
}
/*
* Determine the number of pages necessary for new region and the
* number of pages supported by the current MTT resources
*/
/*
* Depending on whether we have enough pages or not, the next step is
* to fill in a set of MTT entries that reflect the new mapping. In
* the first case below, we already have enough entries. This means
* we need to unbind the memory from the previous mapping, bind the
* memory for the new mapping, write the new MTT entries, and update
* the mr to reflect the changes.
* In the second case below, we do not have enough entries in the
* current mapping. So, in this case, we need not only to unbind the
* current mapping, but we need to free up the MTT resources associated
* with that mapping. After we've successfully done that, we continue
* by binding the new memory, allocating new MTT entries, writing the
* new MTT entries, and updating the mr to reflect the changes.
*/
/*
* If this region is being shared (i.e. MTT refcount != 1), then we
* can't reuse the current MTT resources regardless of their size.
* Instead we'll need to alloc new ones (below) just as if there
* hadn't been enough room in the current entries.
*/
if (TAVOR_MTT_IS_NOT_SHARED(swrc_old) &&
(nummtt_needed <= nummtt_in_currrsrc)) {
/*
* Unbind the old mapping for this memory region, but retain
* the ddi_dma_handle_t (if possible) for reuse in the bind
* operation below. Note: If original memory region was
* bound for IOMMU bypass and the new region can not use
* bypass, then a new DMA handle will be necessary.
*/
reuse_dmahdl = 1;
} else {
reuse_dmahdl = 0;
}
/*
* Bind the new memory and determine the mapped addresses.
* As described, this routine and tavor_mr_fast_mtt_write()
* do the majority of the work for the memory registration
* operations. Note: When we successfully finish the binding,
* we will set the "bi_free_dmahdl" flag to indicate that
* even though we may have reused the ddi_dma_handle_t we do
* wish it to be freed up at some later time. Note also that
* if we fail, we may need to cleanup the ddi_dma_handle_t.
*/
if (status != DDI_SUCCESS) {
if (reuse_dmahdl) {
}
/*
* Deregister will be called upon returning failure
* from this routine. This will ensure that all
* current resources get properly freed up.
* Unnecessary to attempt to regain software ownership
* of the MPT entry as that has already been done
* above (in tavor_mr_reregister()). Also unnecessary
* to attempt to unbind the memory.
*/
/* Set "status" and "errormsg" and goto failure */
goto mrrereghelp_fail;
}
if (reuse_dmahdl) {
}
/*
* Using the new mapping, but reusing the current MTT
* resources, write the updated entries to MTT
*/
if (status != DDI_SUCCESS) {
/*
* Deregister will be called upon returning failure
* from this routine. This will ensure that all
* current resources get properly freed up.
* Unnecessary to attempt to regain software ownership
* of the MPT entry as that has already been done
* above (in tavor_mr_reregister()). Also unnecessary
* to attempt to unbind the memory.
*
* But we do need to unbind the newly bound memory
* before returning.
*/
/* Set "status" and "errormsg" and goto failure */
"failed write mtt");
goto mrrereghelp_fail;
}
/* Put the updated information into the Mem Region handle */
} else {
/*
* Check if the memory region MTT is shared by any other MRs.
* Since the resource may be shared between multiple memory
* regions (as a result of a "RegisterSharedMR()" verb) it is
* important that we not unbind any resources prematurely.
*/
if (!TAVOR_MTT_IS_SHARED(swrc_old)) {
/*
* Unbind the old mapping for this memory region, but
* retain the ddi_dma_handle_t for reuse in the bind
* operation below. Note: This can only be done here
* because the region being reregistered is not
* currently shared. Also if original memory region
* was bound for IOMMU bypass and the new region can
* not use bypass, then a new DMA handle will be
* necessary.
*/
reuse_dmahdl = 1;
} else {
reuse_dmahdl = 0;
}
} else {
reuse_dmahdl = 0;
}
/*
* Bind the new memory and determine the mapped addresses.
* As described, this routine and tavor_mr_fast_mtt_write()
* do the majority of the work for the memory registration
* operations. Note: When we successfully finish the binding,
* we will set the "bi_free_dmahdl" flag to indicate that
* even though we may have reused the ddi_dma_handle_t we do
* wish it to be freed up at some later time. Note also that
* if we fail, we may need to cleanup the ddi_dma_handle_t.
*/
if (status != DDI_SUCCESS) {
if (reuse_dmahdl) {
}
/*
* Deregister will be called upon returning failure
* from this routine. This will ensure that all
* current resources get properly freed up.
* Unnecessary to attempt to regain software ownership
* of the MPT entry as that has already been done
* above (in tavor_mr_reregister()). Also unnecessary
* to attempt to unbind the memory.
*/
/* Set "status" and "errormsg" and goto failure */
goto mrrereghelp_fail;
}
if (reuse_dmahdl) {
}
/*
* Allocate the new MTT entries resource
*/
if (status != DDI_SUCCESS) {
/*
* Deregister will be called upon returning failure
* from this routine. This will ensure that all
* current resources get properly freed up.
* Unnecessary to attempt to regain software ownership
* of the MPT entry as that has already been done
* above (in tavor_mr_reregister()). Also unnecessary
* to attempt to unbind the memory.
*
* But we do need to unbind the newly bound memory
* before returning.
*/
/* Set "status" and "errormsg" and goto failure */
goto mrrereghelp_fail;
}
/*
* Allocate MTT reference count (to track shared memory
* regions). As mentioned elsewhere above, this reference
* count resource may never be used on the given memory region,
* but if it is ever later registered as a "shared" memory
* region then this resource will be necessary. Note: This
* is only necessary here if the existing memory region is
* already being shared (because otherwise we already have
* a useable reference count resource).
*/
if (TAVOR_MTT_IS_SHARED(swrc_old)) {
sleep, &mtt_refcnt);
if (status != DDI_SUCCESS) {
/*
* Deregister will be called upon returning
* failure from this routine. This will ensure
* that all current resources get properly
* freed up. Unnecessary to attempt to regain
* software ownership of the MPT entry as that
* has already been done above (in
* tavor_mr_reregister()). Also unnecessary
* to attempt to unbind the memory.
*
* But we need to unbind the newly bound
* memory and free up the newly allocated MTT
* entries before returning.
*/
*dereg_level =
/* Set "status"/"errormsg", goto failure */
"failed reference count");
goto mrrereghelp_fail;
}
} else {
}
/*
* Using the new mapping and the new MTT resources, write the
* updated entries to MTT
*/
if (status != DDI_SUCCESS) {
/*
* Deregister will be called upon returning failure
* from this routine. This will ensure that all
* current resources get properly freed up.
* Unnecessary to attempt to regain software ownership
* of the MPT entry as that has already been done
* above (in tavor_mr_reregister()). Also unnecessary
* to attempt to unbind the memory.
*
* But we need to unbind the newly bound memory,
* free up the newly allocated MTT entries, and
* (possibly) free the new MTT reference count
* resource before returning.
*/
if (TAVOR_MTT_IS_SHARED(swrc_old)) {
}
/* Set "status" and "errormsg" and goto failure */
goto mrrereghelp_fail;
}
/*
* Check if the memory region MTT is shared by any other MRs.
* Since the resource may be shared between multiple memory
* regions (as a result of a "RegisterSharedMR()" verb) it is
* important that we not free up any resources prematurely.
*/
if (TAVOR_MTT_IS_SHARED(swrc_old)) {
/* Decrement MTT reference count for "old" region */
} else {
/* Free up the old MTT entries resource */
}
/* Put the updated information into the mrhdl */
}
/*
* Calculate and return the updated MTT address (in the DDR address
* space). This will be used by the caller (tavor_mr_reregister) in
* the updated MPT entry
*/
return (DDI_SUCCESS);
return (status);
}
/*
* tavor_mr_nummtt_needed()
* Context: Can be called from interrupt or base context.
*/
/* ARGSUSED */
static uint64_t
{
/*
* For now we specify the page size as 8Kb (the default page size for
* the sun4u architecture), or 4Kb for x86. Figure out optimal page
* size by examining the dmacookies XXX
*/
}
/*
* tavor_mr_mem_bind()
* Context: Can be called from interrupt or base context.
*/
static int
{
int status;
/* bi_type must be set to a meaningful value to get a bind handle */
/* Set the callback flag appropriately */
/* Determine whether to map STREAMING or CONSISTENT */
/*
* Initialize many of the default DMA attributes. Then, if we're
* bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
*/
#ifdef __sparc
/*
* First, disable streaming and switch to consistent if
* configured to do so and IOMMU BYPASS is enabled.
*/
}
/*
* Then, if streaming is still specified, then "bypass" is not
* allowed.
*/
if ((dma_xfer_mode == DDI_DMA_CONSISTENT) &&
}
#endif
/* Allocate a DMA handle for the binding */
if (status != DDI_SUCCESS) {
TAVOR_TNF_ERROR, "");
return (status);
}
} else {
bind->bi_free_dmahdl = 0;
}
/*
* Bind the memory to get the PCI mapped addresses. The decision
* to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
* is determined by the "bi_type" flag. Note: if the bind operation
* fails then we have to free up the DMA handle and return error.
*/
} else { /* TAVOR_BINDHDL_BUF || TAVOR_BINDHDL_UBUF */
}
if (status != DDI_DMA_MAPPED) {
if (bind->bi_free_dmahdl != 0) {
}
"");
return (status);
}
return (DDI_SUCCESS);
}
/*
* tavor_mr_mem_unbind()
* Context: Can be called from interrupt or base context.
*/
static void
{
int status;
/*
* In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to
* is actually allocated by ddi_umem_iosetup() internally, then
* it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE
* not to free it again later.
*/
}
/*
* Unbind the DMA memory for the region
*
* Note: The only way ddi_dma_unbind_handle() currently
* can return an error is if the handle passed in is invalid.
* Since this should never happen, we choose to return void
* from this function! If this does return an error, however,
* then we print a warning message to the console.
*/
if (status != DDI_SUCCESS) {
TAVOR_TNF_ERROR, "");
return;
}
/* Free up the DMA handle */
if (bind->bi_free_dmahdl != 0) {
}
}
/*
* tavor_mr_fast_mtt_write()
* Context: Can be called from interrupt or base context.
*/
static int
{
int i;
/* Calculate page size from the suggested value passed in */
/*
* Walk the "cookie list" and fill in the MTT table entries
*/
i = 0;
while (cookie_cnt-- > 0) {
/*
* Fill in the mapped addresses (calculated above) and
* set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry.
*/
i++;
if (addr == 0) {
static int do_once = 1;
do_once))
if (do_once) {
do_once = 0;
"dma_cookie address from caller\n");
}
break;
}
}
/*
* When we've reached the end of the current DMA cookie,
* jump to the next cookie (if there are more)
*/
if (cookie_cnt != 0) {
}
}
return (DDI_SUCCESS);
}
/*
* tavor_mtt_refcnt_inc()
* Context: Can be called from interrupt or base context.
*/
static int
{
/* Increment the MTT's reference count */
return (cnt);
}
/*
* tavor_mtt_refcnt_dec()
* Context: Can be called from interrupt or base context.
*/
static int
{
/* Decrement the MTT's reference count */
return (cnt);
}