amd_iommu_page_tables.c revision 94f1124e84b3daba182b39a5df7b044074cb1c71
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
#include <sys/amd_iommu.h>
#include <sys/bootconf.h>
#include <sys/sysmacros.h>
#include <sys/ddidmareq.h>
#include "amd_iommu_impl.h"
#include "amd_iommu_acpi.h"
#include "amd_iommu_page_tables.h"
ddi_dma_attr_t amd_iommu_pgtable_dma_attr = {
DMA_ATTR_V0,
0U, /* dma_attr_addr_lo */
0xffffffffffffffffULL, /* dma_attr_addr_hi */
0xffffffffU, /* dma_attr_count_max */
(uint64_t)4096, /* dma_attr_align */
1, /* dma_attr_burstsizes */
64, /* dma_attr_minxfer */
0xffffffffU, /* dma_attr_maxxfer */
0xffffffffU, /* dma_attr_seg */
1, /* dma_attr_sgllen, variable */
64, /* dma_attr_granular */
0 /* dma_attr_flags */
};
static amd_iommu_domain_t **amd_iommu_domain_table;
static struct {
int f_count;
amd_iommu_page_table_t *f_list;
} amd_iommu_pgtable_freelist;
int amd_iommu_no_pgtable_freelist;
/*ARGSUSED*/
static int
amd_iommu_get_src_bdf(amd_iommu_t *iommu, int32_t bdf, int32_t *src_bdfp)
{
amd_iommu_acpi_ivhd_t *hinfop;
hinfop = amd_iommu_lookup_ivhd(bdf);
if (hinfop == NULL || hinfop->ach_src_deviceid == -1)
*src_bdfp = bdf;
else
*src_bdfp = hinfop->ach_src_deviceid;
return (DDI_SUCCESS);
}
static dev_info_t *
amd_iommu_pci_dip(dev_info_t *rdip, const char *path)
{
dev_info_t *pdip;
const char *driver = ddi_driver_name(rdip);
int instance = ddi_get_instance(rdip);
const char *f = "amd_iommu_pci_dip";
/* Hold rdip so it and its parents don't go away */
ndi_hold_devi(rdip);
if (ddi_is_pci_dip(rdip))
return (rdip);
pdip = rdip;
while (pdip = ddi_get_parent(pdip)) {
if (ddi_is_pci_dip(pdip)) {
ndi_hold_devi(pdip);
ndi_rele_devi(rdip);
return (pdip);
}
}
cmn_err(CE_WARN, "%s: %s%d dip = %p has no PCI parent, path = %s",
f, driver, instance, (void *)rdip, path);
ndi_rele_devi(rdip);
ASSERT(0);
return (NULL);
}
/*ARGSUSED*/
static int
amd_iommu_get_domain(amd_iommu_t *iommu, dev_info_t *rdip, int alias,
uint16_t deviceid, domain_id_t *domainid, const char *path)
{
const char *f = "amd_iommu_get_domain";
*domainid = AMD_IOMMU_INVALID_DOMAIN;
ASSERT(strcmp(ddi_driver_name(rdip), "agpgart") != 0);
switch (deviceid) {
case AMD_IOMMU_INVALID_DOMAIN:
case AMD_IOMMU_IDENTITY_DOMAIN:
case AMD_IOMMU_PASSTHRU_DOMAIN:
case AMD_IOMMU_SYS_DOMAIN:
*domainid = AMD_IOMMU_SYS_DOMAIN;
break;
default:
*domainid = deviceid;
break;
}
if (amd_iommu_debug & AMD_IOMMU_DEBUG_DEVTBL) {
cmn_err(CE_NOTE, "%s: domainid for %s = %d",
f, path, *domainid);
}
return (DDI_SUCCESS);
}
static uint16_t
hash_domain(domain_id_t domainid)
{
return (domainid % AMD_IOMMU_DOMAIN_HASH_SZ);
}
/*ARGSUSED*/
void
amd_iommu_init_page_tables(amd_iommu_t *iommu)
{
amd_iommu_domain_table = kmem_zalloc(
sizeof (amd_iommu_domain_t *) * AMD_IOMMU_DOMAIN_HASH_SZ, KM_SLEEP);
}
/*ARGSUSED*/
void
amd_iommu_fini_page_tables(amd_iommu_t *iommu)
{
if (amd_iommu_domain_table) {
kmem_free(amd_iommu_domain_table,
sizeof (amd_iommu_domain_t *) * AMD_IOMMU_DOMAIN_HASH_SZ);
amd_iommu_domain_table = NULL;
}
}
static amd_iommu_domain_t *
amd_iommu_lookup_domain(amd_iommu_t *iommu, domain_id_t domainid,
map_type_t type, int km_flags)
{
uint16_t idx;
amd_iommu_domain_t *dp;
char name[AMD_IOMMU_VMEM_NAMELEN+1];
ASSERT(amd_iommu_domain_table);
idx = hash_domain(domainid);
for (dp = amd_iommu_domain_table[idx]; dp; dp = dp->d_next) {
if (dp->d_domainid == domainid)
return (dp);
}
ASSERT(type != AMD_IOMMU_INVALID_MAP);
dp = kmem_zalloc(sizeof (*dp), km_flags);
if (dp == NULL)
return (NULL);
dp->d_domainid = domainid;
dp->d_pgtable_root_4K = 0; /* make this explicit */
if (type == AMD_IOMMU_VMEM_MAP) {
uint64_t base;
uint64_t size;
(void) snprintf(name, sizeof (name), "dvma_idx%d_domain%d",
iommu->aiomt_idx, domainid);
base = MMU_PAGESIZE;
size = AMD_IOMMU_SIZE_4G - MMU_PAGESIZE;
dp->d_vmem = vmem_create(name, (void *)(uintptr_t)base, size,
MMU_PAGESIZE, NULL, NULL, NULL, 0,
km_flags == KM_SLEEP ? VM_SLEEP : VM_NOSLEEP);
if (dp->d_vmem == NULL) {
kmem_free(dp, sizeof (*dp));
return (NULL);
}
} else {
dp->d_vmem = NULL;
}
dp->d_next = amd_iommu_domain_table[idx];
dp->d_prev = NULL;
amd_iommu_domain_table[idx] = dp;
if (dp->d_next)
dp->d_next->d_prev = dp;
dp->d_ref = 0;
return (dp);
}
static void
amd_iommu_teardown_domain(amd_iommu_t *iommu, amd_iommu_domain_t *dp)
{
uint16_t idx;
int flags;
amd_iommu_cmdargs_t cmdargs = {0};
domain_id_t domainid = dp->d_domainid;
const char *f = "amd_iommu_teardown_domain";
ASSERT(dp->d_ref == 0);
idx = hash_domain(dp->d_domainid);
if (dp->d_prev == NULL)
amd_iommu_domain_table[idx] = dp->d_next;
else
dp->d_prev->d_next = dp->d_next;
if (dp->d_next)
dp->d_next->d_prev = dp->d_prev;
if (dp->d_vmem != NULL) {
vmem_destroy(dp->d_vmem);
dp->d_vmem = NULL;
}
kmem_free(dp, sizeof (*dp));
cmdargs.ca_domainid = (uint16_t)domainid;
cmdargs.ca_addr = (uintptr_t)0x7FFFFFFFFFFFF000;
flags = AMD_IOMMU_CMD_FLAGS_PAGE_PDE_INVAL |
AMD_IOMMU_CMD_FLAGS_PAGE_INVAL_S;
if (amd_iommu_cmd(iommu, AMD_IOMMU_CMD_INVAL_IOMMU_PAGES,
&cmdargs, flags, 0) != DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: idx=%d: domainid=%d"
"Failed to invalidate domain in IOMMU HW cache",
f, iommu->aiomt_idx, cmdargs.ca_domainid);
}
}
static int
amd_iommu_get_deviceid(amd_iommu_t *iommu, dev_info_t *rdip, int32_t *deviceid,
int *aliasp, const char *path)
{
int bus = -1;
int device = -1;
int func = -1;
uint16_t bdf;
int32_t src_bdf;
dev_info_t *idip = iommu->aiomt_dip;
const char *driver = ddi_driver_name(idip);
int instance = ddi_get_instance(idip);
dev_info_t *pci_dip;
const char *f = "amd_iommu_get_deviceid";
/* be conservative. Always assume an alias */
*aliasp = 1;
*deviceid = 0;
/* Check for special special devices (rdip == NULL) */
if (rdip == NULL) {
if (amd_iommu_get_src_bdf(iommu, -1, &src_bdf) != DDI_SUCCESS) {
cmn_err(CE_WARN,
"%s: %s%d: idx=%d, failed to get SRC BDF "
"for special-device",
f, driver, instance, iommu->aiomt_idx);
return (DDI_DMA_NOMAPPING);
}
*deviceid = src_bdf;
*aliasp = 1;
return (DDI_SUCCESS);
}
if (amd_iommu_debug & AMD_IOMMU_DEBUG_DEVTBL) {
cmn_err(CE_NOTE, "%s: attempting to get deviceid for %s",
f, path);
}
pci_dip = amd_iommu_pci_dip(rdip, path);
if (pci_dip == NULL) {
cmn_err(CE_WARN, "%s: %s%d: idx = %d, failed to get PCI dip "
"for rdip=%p, path = %s",
f, driver, instance, iommu->aiomt_idx, (void *)rdip,
path);
return (DDI_DMA_NOMAPPING);
}
if (acpica_get_bdf(pci_dip, &bus, &device, &func) != DDI_SUCCESS) {
ndi_rele_devi(pci_dip);
cmn_err(CE_WARN, "%s: %s%d: idx=%d, failed to get BDF for "
"PCI dip (%p). rdip path = %s",
f, driver, instance, iommu->aiomt_idx,
(void *)pci_dip, path);
return (DDI_DMA_NOMAPPING);
}
ndi_rele_devi(pci_dip);
if (bus > UINT8_MAX || bus < 0 ||
device > UINT8_MAX || device < 0 ||
func > UINT8_MAX || func < 0) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d, invalid BDF(%d,%d,%d) "
"for PCI dip (%p). rdip path = %s", f, driver, instance,
iommu->aiomt_idx,
bus, device, func,
(void *)pci_dip, path);
return (DDI_DMA_NOMAPPING);
}
bdf = ((uint8_t)bus << 8) | ((uint8_t)device << 3) | (uint8_t)func;
if (amd_iommu_get_src_bdf(iommu, bdf, &src_bdf) != DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d, failed to get SRC BDF "
"for PCI dip (%p) rdip path = %s.",
f, driver, instance, iommu->aiomt_idx, (void *)pci_dip,
path);
return (DDI_DMA_NOMAPPING);
}
if (amd_iommu_debug & AMD_IOMMU_DEBUG_DEVTBL) {
cmn_err(CE_NOTE, "%s: Deviceid = %u for path = %s",
f, src_bdf, path);
}
*deviceid = src_bdf;
*aliasp = (src_bdf != bdf);
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static int
init_devtbl(amd_iommu_t *iommu, uint64_t *devtbl_entry, domain_id_t domainid,
amd_iommu_domain_t *dp)
{
uint64_t entry[4] = {0};
int i;
/* If already passthru, don't touch */
if (AMD_IOMMU_REG_GET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_V) == 0 &&
AMD_IOMMU_REG_GET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_TV) == 0) {
return (0);
}
if (AMD_IOMMU_REG_GET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_V) == 1 &&
AMD_IOMMU_REG_GET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_TV) == 1) {
ASSERT(dp->d_pgtable_root_4K ==
AMD_IOMMU_REG_GET64(&(devtbl_entry[0]),
AMD_IOMMU_DEVTBL_ROOT_PGTBL));
ASSERT(dp->d_domainid == AMD_IOMMU_REG_GET64(&(devtbl_entry[1]),
AMD_IOMMU_DEVTBL_DOMAINID));
return (0);
}
/* New devtbl entry for this domain. Bump up the domain ref-count */
dp->d_ref++;
entry[3] = 0;
entry[2] = 0;
AMD_IOMMU_REG_SET64(&(entry[1]), AMD_IOMMU_DEVTBL_SYSMGT, 1);
AMD_IOMMU_REG_SET64(&(entry[1]), AMD_IOMMU_DEVTBL_EX, 1);
AMD_IOMMU_REG_SET64(&(entry[1]), AMD_IOMMU_DEVTBL_SD, 0);
AMD_IOMMU_REG_SET64(&(entry[1]), AMD_IOMMU_DEVTBL_CACHE, 0);
AMD_IOMMU_REG_SET64(&(entry[1]), AMD_IOMMU_DEVTBL_IOCTL, 1);
AMD_IOMMU_REG_SET64(&(entry[1]), AMD_IOMMU_DEVTBL_SA, 0);
AMD_IOMMU_REG_SET64(&(entry[1]), AMD_IOMMU_DEVTBL_SE, 1);
AMD_IOMMU_REG_SET64(&(entry[1]), AMD_IOMMU_DEVTBL_IOTLB, 1);
AMD_IOMMU_REG_SET64(&(entry[1]), AMD_IOMMU_DEVTBL_DOMAINID,
(uint16_t)domainid);
AMD_IOMMU_REG_SET64(&(entry[0]), AMD_IOMMU_DEVTBL_IW, 1);
AMD_IOMMU_REG_SET64(&(entry[0]), AMD_IOMMU_DEVTBL_IR, 1);
AMD_IOMMU_REG_SET64(&(entry[0]), AMD_IOMMU_DEVTBL_ROOT_PGTBL,
dp->d_pgtable_root_4K);
AMD_IOMMU_REG_SET64(&(entry[0]), AMD_IOMMU_DEVTBL_PG_MODE,
AMD_IOMMU_PGTABLE_MAXLEVEL);
AMD_IOMMU_REG_SET64(&(entry[0]), AMD_IOMMU_DEVTBL_TV,
domainid == AMD_IOMMU_PASSTHRU_DOMAIN ? 0 : 1);
AMD_IOMMU_REG_SET64(&(entry[0]), AMD_IOMMU_DEVTBL_V,
domainid == AMD_IOMMU_PASSTHRU_DOMAIN ? 0 : 1);
for (i = 1; i < 4; i++) {
devtbl_entry[i] = entry[i];
}
devtbl_entry[0] = entry[0];
/* we did an actual init */
return (1);
}
void
amd_iommu_set_passthru(amd_iommu_t *iommu, dev_info_t *rdip)
{
int32_t deviceid;
int alias;
uint64_t *devtbl_entry;
amd_iommu_cmdargs_t cmdargs = {0};
char *path;
int pathfree;
int V;
int TV;
int instance;
const char *driver;
const char *f = "amd_iommu_set_passthru";
if (rdip) {
driver = ddi_driver_name(rdip);
instance = ddi_get_instance(rdip);
} else {
driver = "special-device";
instance = 0;
}
path = kmem_alloc(MAXPATHLEN, KM_NOSLEEP);
if (path) {
if (rdip)
(void) ddi_pathname(rdip, path);
else
(void) strcpy(path, "special-device");
pathfree = 1;
} else {
pathfree = 0;
path = "<path-mem-alloc-failed>";
}
if (amd_iommu_get_deviceid(iommu, rdip, &deviceid, &alias, path)
!= DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d: rdip=%p. "
"Failed to get device ID for device %s.", f, driver,
instance,
iommu->aiomt_idx, (void *)rdip, path);
goto out;
}
/* No deviceid */
if (deviceid == -1) {
goto out;
}
if ((deviceid + 1) * AMD_IOMMU_DEVTBL_ENTRY_SZ >
iommu->aiomt_devtbl_sz) {
cmn_err(CE_WARN, "%s: %s%d: IOMMU idx=%d, deviceid (%u) "
"for rdip (%p) exceeds device table size (%u), path=%s",
f, driver,
instance, iommu->aiomt_idx, deviceid, (void *)rdip,
iommu->aiomt_devtbl_sz, path);
goto out;
}
/*LINTED*/
devtbl_entry = (uint64_t *)&iommu->aiomt_devtbl
[deviceid * AMD_IOMMU_DEVTBL_ENTRY_SZ];
V = AMD_IOMMU_REG_GET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_V);
TV = AMD_IOMMU_REG_GET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_TV);
/* Already passthru */
if (V == 0 && TV == 0) {
goto out;
}
/* Existing translations */
if (V == 1 && TV == 1) {
goto out;
}
/* Invalid setting */
if (V == 0 && TV == 1) {
goto out;
}
AMD_IOMMU_REG_SET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_V, 0);
cmdargs.ca_deviceid = (uint16_t)deviceid;
(void) amd_iommu_cmd(iommu, AMD_IOMMU_CMD_INVAL_DEVTAB_ENTRY,
&cmdargs, 0, 0);
out:
if (pathfree)
kmem_free(path, MAXPATHLEN);
}
static int
amd_iommu_set_devtbl_entry(amd_iommu_t *iommu, dev_info_t *rdip,
domain_id_t domainid, uint16_t deviceid, amd_iommu_domain_t *dp,
const char *path)
{
uint64_t *devtbl_entry;
amd_iommu_cmdargs_t cmdargs = {0};
int error;
dev_info_t *idip = iommu->aiomt_dip;
const char *driver = ddi_driver_name(idip);
int instance = ddi_get_instance(idip);
const char *f = "amd_iommu_set_devtbl_entry";
if (amd_iommu_debug & AMD_IOMMU_DEBUG_DEVTBL) {
cmn_err(CE_WARN, "%s: attempting to set devtbl entry for %s",
f, path);
}
if ((deviceid + 1) * AMD_IOMMU_DEVTBL_ENTRY_SZ >
iommu->aiomt_devtbl_sz) {
cmn_err(CE_WARN, "%s: %s%d: IOMMU idx=%d, deviceid (%u) "
"for rdip (%p) exceeds device table size (%u), path=%s",
f, driver,
instance, iommu->aiomt_idx, deviceid, (void *)rdip,
iommu->aiomt_devtbl_sz, path);
return (DDI_DMA_NOMAPPING);
}
/*LINTED*/
devtbl_entry = (uint64_t *)&iommu->aiomt_devtbl
[deviceid * AMD_IOMMU_DEVTBL_ENTRY_SZ];
if (amd_iommu_debug & AMD_IOMMU_DEBUG_DEVTBL) {
cmn_err(CE_WARN, "%s: deviceid=%u devtbl entry (%p) for %s",
f, deviceid, (void *)(uintptr_t)(*devtbl_entry), path);
}
if (init_devtbl(iommu, devtbl_entry, domainid, dp)) {
cmdargs.ca_deviceid = deviceid;
error = amd_iommu_cmd(iommu, AMD_IOMMU_CMD_INVAL_DEVTAB_ENTRY,
&cmdargs, 0, 0);
}
return (error);
}
int
amd_iommu_clear_devtbl_entry(amd_iommu_t *iommu, dev_info_t *rdip,
domain_id_t domainid, uint16_t deviceid, amd_iommu_domain_t *dp,
int *domain_freed, char *path)
{
uint64_t *devtbl_entry;
int error = DDI_SUCCESS;
amd_iommu_cmdargs_t cmdargs = {0};
const char *driver = ddi_driver_name(iommu->aiomt_dip);
int instance = ddi_get_instance(iommu->aiomt_dip);
const char *f = "amd_iommu_clear_devtbl_entry";
if (amd_iommu_debug & AMD_IOMMU_DEBUG_DEVTBL) {
cmn_err(CE_NOTE, "%s: attempting to clear devtbl entry for "
"domainid = %d, deviceid = %u, path = %s",
f, domainid, deviceid, path);
}
if ((deviceid + 1) * AMD_IOMMU_DEVTBL_ENTRY_SZ >
iommu->aiomt_devtbl_sz) {
cmn_err(CE_WARN, "%s: %s%d: IOMMU idx=%d, deviceid (%u) "
"for rdip (%p) exceeds device table size (%u), path = %s",
f, driver, instance,
iommu->aiomt_idx, deviceid, (void *)rdip,
iommu->aiomt_devtbl_sz, path);
return (DDI_FAILURE);
}
/*LINTED*/
devtbl_entry = (uint64_t *)&iommu->aiomt_devtbl
[deviceid * AMD_IOMMU_DEVTBL_ENTRY_SZ];
if (amd_iommu_debug & AMD_IOMMU_DEBUG_DEVTBL) {
cmn_err(CE_WARN, "%s: deviceid=%u devtbl entry (%p) for %s",
f, deviceid, (void *)(uintptr_t)(*devtbl_entry), path);
}
if (AMD_IOMMU_REG_GET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_TV) == 0) {
/* Nothing to do */
return (DDI_SUCCESS);
}
ASSERT(dp->d_pgtable_root_4K == AMD_IOMMU_REG_GET64(&(devtbl_entry[0]),
AMD_IOMMU_DEVTBL_ROOT_PGTBL));
ASSERT(domainid == AMD_IOMMU_REG_GET64(&(devtbl_entry[1]),
AMD_IOMMU_DEVTBL_DOMAINID));
AMD_IOMMU_REG_SET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_TV, 0);
AMD_IOMMU_REG_SET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_ROOT_PGTBL, 0);
AMD_IOMMU_REG_SET64(&(devtbl_entry[0]), AMD_IOMMU_DEVTBL_V, 1);
SYNC_FORDEV(iommu->aiomt_dmahdl);
dp->d_ref--;
ASSERT(dp->d_ref >= 0);
if (dp->d_ref == 0) {
*domain_freed = 1;
}
cmdargs.ca_deviceid = deviceid;
error = amd_iommu_cmd(iommu, AMD_IOMMU_CMD_INVAL_DEVTAB_ENTRY,
&cmdargs, 0, 0);
if (error != DDI_SUCCESS)
error = DDI_FAILURE;
return (error);
}
int
amd_iommu_page_table_hash_init(amd_iommu_page_table_hash_t *ampt)
{
ampt->ampt_hash = kmem_zalloc(sizeof (amd_iommu_page_table_t *) *
AMD_IOMMU_PGTABLE_HASH_SZ, KM_SLEEP);
return (DDI_SUCCESS);
}
void
amd_iommu_page_table_hash_fini(amd_iommu_page_table_hash_t *ampt)
{
kmem_free(ampt->ampt_hash,
sizeof (amd_iommu_page_table_t *) * AMD_IOMMU_PGTABLE_HASH_SZ);
ampt->ampt_hash = NULL;
}
static uint32_t
pt_hashfn(uint64_t pa_4K)
{
return (pa_4K % AMD_IOMMU_PGTABLE_HASH_SZ);
}
static void
amd_iommu_insert_pgtable_hash(amd_iommu_page_table_t *pt)
{
uint64_t pa_4K = ((uint64_t)pt->pt_cookie.dmac_cookie_addr) >> 12;
uint32_t idx = pt_hashfn(pa_4K);
ASSERT((pt->pt_cookie.dmac_cookie_addr & AMD_IOMMU_PGTABLE_ALIGN) == 0);
mutex_enter(&amd_iommu_page_table_hash.ampt_lock);
pt->pt_next = amd_iommu_page_table_hash.ampt_hash[idx];
pt->pt_prev = NULL;
amd_iommu_page_table_hash.ampt_hash[idx] = pt;
if (pt->pt_next)
pt->pt_next->pt_prev = pt;
mutex_exit(&amd_iommu_page_table_hash.ampt_lock);
}
static void
amd_iommu_remove_pgtable_hash(amd_iommu_page_table_t *pt)
{
uint64_t pa_4K = (pt->pt_cookie.dmac_cookie_addr >> 12);
uint32_t idx = pt_hashfn(pa_4K);
ASSERT((pt->pt_cookie.dmac_cookie_addr & AMD_IOMMU_PGTABLE_ALIGN) == 0);
mutex_enter(&amd_iommu_page_table_hash.ampt_lock);
if (pt->pt_next)
pt->pt_next->pt_prev = pt->pt_prev;
if (pt->pt_prev)
pt->pt_prev->pt_next = pt->pt_next;
else
amd_iommu_page_table_hash.ampt_hash[idx] = pt->pt_next;
pt->pt_next = NULL;
pt->pt_prev = NULL;
mutex_exit(&amd_iommu_page_table_hash.ampt_lock);
}
static amd_iommu_page_table_t *
amd_iommu_lookup_pgtable_hash(domain_id_t domainid, uint64_t pgtable_pa_4K)
{
amd_iommu_page_table_t *pt;
uint32_t idx = pt_hashfn(pgtable_pa_4K);
mutex_enter(&amd_iommu_page_table_hash.ampt_lock);
pt = amd_iommu_page_table_hash.ampt_hash[idx];
for (; pt; pt = pt->pt_next) {
if (domainid != pt->pt_domainid)
continue;
ASSERT((pt->pt_cookie.dmac_cookie_addr &
AMD_IOMMU_PGTABLE_ALIGN) == 0);
if ((pt->pt_cookie.dmac_cookie_addr >> 12) == pgtable_pa_4K) {
break;
}
}
mutex_exit(&amd_iommu_page_table_hash.ampt_lock);
return (pt);
}
/*ARGSUSED*/
static amd_iommu_page_table_t *
amd_iommu_lookup_pgtable(amd_iommu_t *iommu, amd_iommu_page_table_t *ppt,
amd_iommu_domain_t *dp, int level, uint16_t index)
{
uint64_t *pdtep;
uint64_t pgtable_pa_4K;
ASSERT(level > 0 && level <= AMD_IOMMU_PGTABLE_MAXLEVEL);
ASSERT(dp);
if (level == AMD_IOMMU_PGTABLE_MAXLEVEL) {
ASSERT(ppt == NULL);
ASSERT(index == 0);
pgtable_pa_4K = dp->d_pgtable_root_4K;
} else {
ASSERT(ppt);
pdtep = &(ppt->pt_pgtblva[index]);
if (AMD_IOMMU_REG_GET64(pdtep, AMD_IOMMU_PTDE_PR) == 0) {
if (amd_iommu_debug & AMD_IOMMU_DEBUG_PAGE_TABLES) {
#ifdef _LP64
cmn_err(CE_NOTE, "Skipping PR=0 pdte: %p",
(void *)(*pdtep));
#else
cmn_err(CE_NOTE, "Skipping PR=0 pdte: %llx",
*pdtep);
#endif
}
return (NULL);
}
pgtable_pa_4K = AMD_IOMMU_REG_GET64(pdtep, AMD_IOMMU_PTDE_ADDR);
}
return (amd_iommu_lookup_pgtable_hash(dp->d_domainid, pgtable_pa_4K));
}
static amd_iommu_page_table_t *
amd_iommu_alloc_from_freelist(void)
{
int i;
uint64_t *pte_array;
amd_iommu_page_table_t *pt;
if (amd_iommu_no_pgtable_freelist == 1)
return (NULL);
if (amd_iommu_pgtable_freelist.f_count == 0)
return (NULL);
pt = amd_iommu_pgtable_freelist.f_list;
amd_iommu_pgtable_freelist.f_list = pt->pt_next;
amd_iommu_pgtable_freelist.f_count--;
pte_array = pt->pt_pgtblva;
for (i = 0; i < AMD_IOMMU_PGTABLE_SZ / (sizeof (*pte_array)); i++) {
ASSERT(pt->pt_pte_ref[i] == 0);
ASSERT(AMD_IOMMU_REG_GET64(&(pte_array[i]),
AMD_IOMMU_PTDE_PR) == 0);
}
return (pt);
}
static int
amd_iommu_alloc_pgtable(amd_iommu_t *iommu, domain_id_t domainid,
const char *path, amd_iommu_page_table_t **ptp, int km_flags)
{
int err;
uint_t ncookies;
amd_iommu_page_table_t *pt;
dev_info_t *idip = iommu->aiomt_dip;
const char *driver = ddi_driver_name(idip);
int instance = ddi_get_instance(idip);
const char *f = "amd_iommu_alloc_pgtable";
*ptp = NULL;
pt = amd_iommu_alloc_from_freelist();
if (pt)
goto init_pgtable;
pt = kmem_zalloc(sizeof (amd_iommu_page_table_t), km_flags);
if (pt == NULL)
return (DDI_DMA_NORESOURCES);
/*
* Each page table is 4K in size
*/
pt->pt_mem_reqsz = AMD_IOMMU_PGTABLE_SZ;
/*
* Alloc a DMA handle. Use the IOMMU dip as we want this DMA
* to *not* enter the IOMMU - no recursive entrance.
*/
err = ddi_dma_alloc_handle(idip, &amd_iommu_pgtable_dma_attr,
km_flags == KM_SLEEP ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
NULL, &pt->pt_dma_hdl);
if (err != DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: domainid = %d, path = %s. "
"Cannot alloc DMA handle for IO Page Table",
f, driver, instance, domainid, path);
kmem_free(pt, sizeof (amd_iommu_page_table_t));
return (err == DDI_DMA_NORESOURCES ? err : DDI_DMA_NOMAPPING);
}
/*
* Alloc memory for IO Page Table.
* XXX remove size_t cast kludge
*/
err = ddi_dma_mem_alloc(pt->pt_dma_hdl, pt->pt_mem_reqsz,
&amd_iommu_devacc, DDI_DMA_CONSISTENT|IOMEM_DATA_UNCACHED,
km_flags == KM_SLEEP ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
NULL, (caddr_t *)&pt->pt_pgtblva,
(size_t *)&pt->pt_mem_realsz, &pt->pt_mem_hdl);
if (err != DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: domainid=%d, path = %s. "
"Cannot allocate DMA memory for IO Page table",
f, driver, instance, domainid, path);
ddi_dma_free_handle(&pt->pt_dma_hdl);
kmem_free(pt, sizeof (amd_iommu_page_table_t));
return (DDI_DMA_NORESOURCES);
}
/*
* The Page table DMA VA must be 4K aligned and
* size >= than requested memory.
*
*/
ASSERT(((uint64_t)(uintptr_t)pt->pt_pgtblva & AMD_IOMMU_PGTABLE_ALIGN)
== 0);
ASSERT(pt->pt_mem_realsz >= pt->pt_mem_reqsz);
/*
* Now bind the handle
*/
err = ddi_dma_addr_bind_handle(pt->pt_dma_hdl, NULL,
(caddr_t)pt->pt_pgtblva, pt->pt_mem_realsz,
DDI_DMA_READ | DDI_DMA_CONSISTENT,
km_flags == KM_SLEEP ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
NULL, &pt->pt_cookie, &ncookies);
if (err != DDI_DMA_MAPPED) {
cmn_err(CE_WARN, "%s: %s%d: domainid=%d, path = %s. "
"Cannot bind memory for DMA to IO Page Tables. "
"bufrealsz=%p",
f, driver, instance, domainid, path,
(void *)(uintptr_t)pt->pt_mem_realsz);
ddi_dma_mem_free(&pt->pt_mem_hdl);
ddi_dma_free_handle(&pt->pt_dma_hdl);
kmem_free(pt, sizeof (amd_iommu_page_table_t));
return (err == DDI_DMA_PARTIAL_MAP ? DDI_DMA_NOMAPPING :
err);
}
/*
* We assume the DMA engine on the IOMMU is capable of handling the
* whole page table in a single cookie. If not and multiple cookies
* are needed we fail.
*/
if (ncookies != 1) {
cmn_err(CE_WARN, "%s: %s%d: domainid = %d, path=%s "
"Cannot handle multiple "
"cookies for DMA to IO page Table, #cookies=%u",
f, driver, instance, domainid, path, ncookies);
(void) ddi_dma_unbind_handle(pt->pt_dma_hdl);
ddi_dma_mem_free(&pt->pt_mem_hdl);
ddi_dma_free_handle(&pt->pt_dma_hdl);
kmem_free(pt, sizeof (amd_iommu_page_table_t));
return (DDI_DMA_NOMAPPING);
}
init_pgtable:
/*
* The address in the cookie must be 4K aligned and >= table size
*/
ASSERT(pt->pt_cookie.dmac_cookie_addr != NULL);
ASSERT((pt->pt_cookie.dmac_cookie_addr & AMD_IOMMU_PGTABLE_ALIGN) == 0);
ASSERT(pt->pt_cookie.dmac_size >= pt->pt_mem_realsz);
ASSERT(pt->pt_cookie.dmac_size >= pt->pt_mem_reqsz);
ASSERT(pt->pt_mem_reqsz >= AMD_IOMMU_PGTABLE_SIZE);
ASSERT(pt->pt_mem_realsz >= pt->pt_mem_reqsz);
ASSERT(pt->pt_pgtblva);
pt->pt_domainid = AMD_IOMMU_INVALID_DOMAIN;
pt->pt_level = 0x7;
pt->pt_index = 0;
pt->pt_ref = 0;
pt->pt_next = NULL;
pt->pt_prev = NULL;
pt->pt_parent = NULL;
bzero(pt->pt_pgtblva, pt->pt_mem_realsz);
SYNC_FORDEV(pt->pt_dma_hdl);
amd_iommu_insert_pgtable_hash(pt);
*ptp = pt;
return (DDI_SUCCESS);
}
static int
amd_iommu_move_to_freelist(amd_iommu_page_table_t *pt)
{
if (amd_iommu_no_pgtable_freelist == 1)
return (DDI_FAILURE);
if (amd_iommu_pgtable_freelist.f_count ==
AMD_IOMMU_PGTABLE_FREELIST_MAX)
return (DDI_FAILURE);
pt->pt_next = amd_iommu_pgtable_freelist.f_list;
amd_iommu_pgtable_freelist.f_list = pt;
amd_iommu_pgtable_freelist.f_count++;
return (DDI_SUCCESS);
}
static void
amd_iommu_free_pgtable(amd_iommu_t *iommu, amd_iommu_page_table_t *pt)
{
int i;
uint64_t *pte_array;
dev_info_t *dip = iommu->aiomt_dip;
int instance = ddi_get_instance(dip);
const char *driver = ddi_driver_name(dip);
const char *f = "amd_iommu_free_pgtable";
ASSERT(pt->pt_ref == 0);
amd_iommu_remove_pgtable_hash(pt);
pte_array = pt->pt_pgtblva;
for (i = 0; i < AMD_IOMMU_PGTABLE_SZ / (sizeof (*pte_array)); i++) {
ASSERT(pt->pt_pte_ref[i] == 0);
ASSERT(AMD_IOMMU_REG_GET64(&(pte_array[i]),
AMD_IOMMU_PTDE_PR) == 0);
}
if (amd_iommu_move_to_freelist(pt) == DDI_SUCCESS)
return;
/* Unbind the handle */
if (ddi_dma_unbind_handle(pt->pt_dma_hdl) != DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d, domainid=%d. "
"Failed to unbind handle: %p for IOMMU Page Table",
f, driver, instance, iommu->aiomt_idx, pt->pt_domainid,
(void *)pt->pt_dma_hdl);
}
/* Free the table memory allocated for DMA */
ddi_dma_mem_free(&pt->pt_mem_hdl);
/* Free the DMA handle */
ddi_dma_free_handle(&pt->pt_dma_hdl);
kmem_free(pt, sizeof (amd_iommu_page_table_t));
}
static int
init_pde(amd_iommu_page_table_t *ppt, amd_iommu_page_table_t *pt)
{
uint64_t *pdep = &(ppt->pt_pgtblva[pt->pt_index]);
uint64_t next_pgtable_pa_4K = (pt->pt_cookie.dmac_cookie_addr) >> 12;
/* nothing to set. PDE is already set */
if (AMD_IOMMU_REG_GET64(pdep, AMD_IOMMU_PTDE_PR) == 1) {
ASSERT(PT_REF_VALID(ppt));
ASSERT(PT_REF_VALID(pt));
ASSERT(ppt->pt_pte_ref[pt->pt_index] == 0);
ASSERT(AMD_IOMMU_REG_GET64(pdep, AMD_IOMMU_PTDE_ADDR)
== next_pgtable_pa_4K);
return (DDI_SUCCESS);
}
ppt->pt_ref++;
ASSERT(PT_REF_VALID(ppt));
/* Page Directories are always RW */
AMD_IOMMU_REG_SET64(pdep, AMD_IOMMU_PTDE_IW, 1);
AMD_IOMMU_REG_SET64(pdep, AMD_IOMMU_PTDE_IR, 1);
AMD_IOMMU_REG_SET64(pdep, AMD_IOMMU_PTDE_ADDR,
next_pgtable_pa_4K);
pt->pt_parent = ppt;
AMD_IOMMU_REG_SET64(pdep, AMD_IOMMU_PTDE_NXT_LVL,
pt->pt_level);
ppt->pt_pte_ref[pt->pt_index] = 0;
AMD_IOMMU_REG_SET64(pdep, AMD_IOMMU_PTDE_PR, 1);
SYNC_FORDEV(ppt->pt_dma_hdl);
ASSERT(AMD_IOMMU_REG_GET64(pdep, AMD_IOMMU_PTDE_PR) == 1);
return (DDI_SUCCESS);
}
static int
init_pte(amd_iommu_page_table_t *pt, uint64_t pa, uint16_t index,
struct ddi_dma_req *dmareq)
{
uint64_t *ptep = &(pt->pt_pgtblva[index]);
uint64_t pa_4K = pa >> 12;
int R;
int W;
/* nothing to set if PTE is already set */
if (AMD_IOMMU_REG_GET64(ptep, AMD_IOMMU_PTDE_PR) == 1) {
/*
* Adjust current permissions
* DDI_DMA_WRITE means direction of DMA is MEM -> I/O
* so that requires Memory READ permissions i.e. sense
* is inverted.
* Note: either or both of DD_DMA_READ/WRITE may be set
*/
if (amd_iommu_no_RW_perms == 0) {
R = AMD_IOMMU_REG_GET64(ptep, AMD_IOMMU_PTDE_IR);
W = AMD_IOMMU_REG_GET64(ptep, AMD_IOMMU_PTDE_IW);
if (R == 0 && ((dmareq->dmar_flags & DDI_DMA_WRITE) ||
(dmareq->dmar_flags & DDI_DMA_RDWR))) {
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_IR, 1);
}
if (W == 0 && ((dmareq->dmar_flags & DDI_DMA_READ) ||
(dmareq->dmar_flags & DDI_DMA_RDWR))) {
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_IW, 1);
}
}
ASSERT(PT_REF_VALID(pt));
pt->pt_pte_ref[index]++;
ASSERT(AMD_IOMMU_REG_GET64(ptep, AMD_IOMMU_PTDE_ADDR)
== pa_4K);
return (DDI_SUCCESS);
}
pt->pt_ref++;
ASSERT(PT_REF_VALID(pt));
/* see comment above about inverting sense of RD/WR */
if (amd_iommu_no_RW_perms == 0) {
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_IR, 0);
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_IW, 0);
if (dmareq->dmar_flags & DDI_DMA_RDWR) {
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_IW, 1);
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_IR, 1);
} else {
if (dmareq->dmar_flags & DDI_DMA_WRITE) {
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_IR, 1);
}
if (dmareq->dmar_flags & DDI_DMA_READ) {
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_IW, 1);
}
}
} else {
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_IR, 1);
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_IW, 1);
}
/* TODO what is correct for FC and U */
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTE_FC, 0);
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTE_U, 0);
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_ADDR, pa_4K);
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_NXT_LVL, 0);
ASSERT(pt->pt_pte_ref[index] == 0);
pt->pt_pte_ref[index] = 1;
AMD_IOMMU_REG_SET64(ptep, AMD_IOMMU_PTDE_PR, 1);
SYNC_FORDEV(pt->pt_dma_hdl);
ASSERT(AMD_IOMMU_REG_GET64(ptep, AMD_IOMMU_PTDE_PR) == 1);
return (DDI_SUCCESS);
}
static void
init_pt(amd_iommu_page_table_t *pt, amd_iommu_domain_t *dp,
int level, uint16_t index)
{
ASSERT(dp);
if (level == AMD_IOMMU_PGTABLE_MAXLEVEL) {
dp->d_pgtable_root_4K = (pt->pt_cookie.dmac_cookie_addr) >> 12;
} else {
ASSERT(level >= 1 && level < AMD_IOMMU_PGTABLE_MAXLEVEL);
}
pt->pt_domainid = dp->d_domainid;
pt->pt_level = level;
pt->pt_index = index;
}
static int
amd_iommu_setup_1_pgtable(amd_iommu_t *iommu, dev_info_t *rdip,
struct ddi_dma_req *dmareq,
domain_id_t domainid, amd_iommu_domain_t *dp,
amd_iommu_page_table_t *ppt,
uint16_t index, int level, uint64_t va, uint64_t pa,
amd_iommu_page_table_t **ptp, uint16_t *next_idxp, const char *path,
int km_flags)
{
int error;
amd_iommu_page_table_t *pt;
const char *driver = ddi_driver_name(rdip);
int instance = ddi_get_instance(rdip);
const char *f = "amd_iommu_setup_1_pgtable";
*ptp = NULL;
*next_idxp = 0;
error = DDI_SUCCESS;
ASSERT(level > 0 && level <= AMD_IOMMU_PGTABLE_MAXLEVEL);
ASSERT(dp);
if (level == AMD_IOMMU_PGTABLE_MAXLEVEL) {
ASSERT(ppt == NULL);
ASSERT(index == 0);
} else {
ASSERT(ppt);
}
/* Check if page table is already allocated */
if (pt = amd_iommu_lookup_pgtable(iommu, ppt, dp, level, index)) {
ASSERT(pt->pt_domainid == domainid);
ASSERT(pt->pt_level == level);
ASSERT(pt->pt_index == index);
goto out;
}
if ((error = amd_iommu_alloc_pgtable(iommu, domainid, path, &pt,
km_flags)) != DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx = %u, domainid = %d, va = %p "
"path = %s", f, driver, instance, iommu->aiomt_idx,
domainid, (void *)(uintptr_t)va, path);
return (error);
}
ASSERT(dp->d_domainid == domainid);
init_pt(pt, dp, level, index);
out:
if (level != AMD_IOMMU_PGTABLE_MAXLEVEL) {
error = init_pde(ppt, pt);
}
if (level == 1) {
ASSERT(error == DDI_SUCCESS);
error = init_pte(pt, pa, AMD_IOMMU_VA_BITS(va, level), dmareq);
} else {
*next_idxp = AMD_IOMMU_VA_BITS(va, level);
*ptp = pt;
}
return (error);
}
typedef enum {
PDTE_NOT_TORN = 0x1,
PDTE_TORN_DOWN = 0x2,
PGTABLE_TORN_DOWN = 0x4
} pdte_tear_t;
static pdte_tear_t
amd_iommu_teardown_pdte(amd_iommu_t *iommu,
amd_iommu_page_table_t *pt, int index)
{
uint8_t next_level;
pdte_tear_t retval;
uint64_t *ptdep = &(pt->pt_pgtblva[index]);
next_level = AMD_IOMMU_REG_GET64(ptdep,
AMD_IOMMU_PTDE_NXT_LVL);
if (AMD_IOMMU_REG_GET64(ptdep, AMD_IOMMU_PTDE_PR) == 1) {
if (pt->pt_level == 1) {
ASSERT(next_level == 0);
/* PTE */
pt->pt_pte_ref[index]--;
if (pt->pt_pte_ref[index] != 0) {
return (PDTE_NOT_TORN);
}
} else {
ASSERT(next_level != 0 && next_level != 7);
}
ASSERT(pt->pt_pte_ref[index] == 0);
ASSERT(PT_REF_VALID(pt));
AMD_IOMMU_REG_SET64(ptdep, AMD_IOMMU_PTDE_PR, 0);
SYNC_FORDEV(pt->pt_dma_hdl);
ASSERT(AMD_IOMMU_REG_GET64(ptdep,
AMD_IOMMU_PTDE_PR) == 0);
pt->pt_ref--;
ASSERT(PT_REF_VALID(pt));
retval = PDTE_TORN_DOWN;
} else {
ASSERT(0);
ASSERT(pt->pt_pte_ref[index] == 0);
ASSERT(PT_REF_VALID(pt));
retval = PDTE_NOT_TORN;
}
if (pt->pt_ref == 0) {
amd_iommu_free_pgtable(iommu, pt);
return (PGTABLE_TORN_DOWN);
}
return (retval);
}
static int
amd_iommu_create_pgtables(amd_iommu_t *iommu, dev_info_t *rdip,
struct ddi_dma_req *dmareq, uint64_t va,
uint64_t pa, uint16_t deviceid, domain_id_t domainid,
amd_iommu_domain_t *dp, const char *path, int km_flags)
{
int level;
uint16_t index;
uint16_t next_idx;
amd_iommu_page_table_t *pt;
amd_iommu_page_table_t *ppt;
int error;
const char *driver = ddi_driver_name(rdip);
int instance = ddi_get_instance(rdip);
const char *f = "amd_iommu_create_pgtables";
if (amd_iommu_debug & AMD_IOMMU_DEBUG_PAGE_TABLES) {
cmn_err(CE_NOTE, "%s: %s%d: idx = %u, domainid = %d, "
"deviceid = %u, va = %p, pa = %p, path = %s",
f, driver, instance,
iommu->aiomt_idx, domainid, deviceid,
(void *)(uintptr_t)va,
(void *)(uintptr_t)pa, path);
}
if (domainid == AMD_IOMMU_PASSTHRU_DOMAIN) {
/* No need for pagetables. Just set up device table entry */
goto passthru;
}
index = 0;
ppt = NULL;
for (level = AMD_IOMMU_PGTABLE_MAXLEVEL; level > 0;
level--, pt = NULL, next_idx = 0) {
if ((error = amd_iommu_setup_1_pgtable(iommu, rdip, dmareq,
domainid, dp, ppt, index, level, va, pa, &pt,
&next_idx, path, km_flags)) != DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d: domainid=%d, "
"deviceid=%u, va= %p, pa = %p, Failed to setup "
"page table(s) at level = %d, path = %s.",
f, driver, instance, iommu->aiomt_idx,
domainid, deviceid, (void *)(uintptr_t)va,
(void *)(uintptr_t)pa, level, path);
return (error);
}
if (level > 1) {
ASSERT(pt);
ASSERT(pt->pt_domainid == domainid);
ppt = pt;
index = next_idx;
} else {
ASSERT(level == 1);
ASSERT(pt == NULL);
ASSERT(next_idx == 0);
ppt = NULL;
index = 0;
}
}
passthru:
if ((error = amd_iommu_set_devtbl_entry(iommu, rdip, domainid, deviceid,
dp, path)) != DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d: rdip=%p, deviceid=%u, "
"domainid=%d."
"Failed to set device table entry for path %s.",
f, driver, instance,
iommu->aiomt_idx, (void *)rdip, deviceid, domainid, path);
return (error);
}
SYNC_FORDEV(iommu->aiomt_dmahdl);
return (DDI_SUCCESS);
}
static int
amd_iommu_destroy_pgtables(amd_iommu_t *iommu, dev_info_t *rdip,
uint64_t pageva, uint16_t deviceid, domain_id_t domainid,
amd_iommu_domain_t *dp, map_type_t type, int *domain_freed, char *path)
{
int level;
int flags;
amd_iommu_cmdargs_t cmdargs = {0};
uint16_t index;
uint16_t prev_index;
amd_iommu_page_table_t *pt;
amd_iommu_page_table_t *ppt;
pdte_tear_t retval;
int tear_level;
int invalidate_pte;
int invalidate_pde;
int error = DDI_FAILURE;
const char *driver = ddi_driver_name(iommu->aiomt_dip);
int instance = ddi_get_instance(iommu->aiomt_dip);
const char *f = "amd_iommu_destroy_pgtables";
if (amd_iommu_debug & AMD_IOMMU_DEBUG_PAGE_TABLES) {
cmn_err(CE_NOTE, "%s: %s%d: idx = %u, domainid = %d, "
"deviceid = %u, va = %p, path = %s",
f, driver, instance,
iommu->aiomt_idx, domainid, deviceid,
(void *)(uintptr_t)pageva, path);
}
if (domainid == AMD_IOMMU_PASSTHRU_DOMAIN) {
/*
* there are no pagetables for the passthru domain.
* Just the device table entry
*/
error = DDI_SUCCESS;
goto passthru;
}
ppt = NULL;
index = 0;
for (level = AMD_IOMMU_PGTABLE_MAXLEVEL; level > 0; level--) {
pt = amd_iommu_lookup_pgtable(iommu, ppt, dp, level, index);
if (pt) {
ppt = pt;
index = AMD_IOMMU_VA_BITS(pageva, level);
continue;
}
break;
}
if (level == 0) {
uint64_t *ptep;
uint64_t pa_4K;
ASSERT(pt);
ASSERT(pt == ppt);
ASSERT(pt->pt_domainid == dp->d_domainid);
ptep = &(pt->pt_pgtblva[index]);
pa_4K = AMD_IOMMU_REG_GET64(ptep, AMD_IOMMU_PTDE_ADDR);
if (amd_iommu_unity_map || type == AMD_IOMMU_UNITY_MAP) {
ASSERT(pageva == (pa_4K << MMU_PAGESHIFT));
}
}
tear_level = -1;
invalidate_pde = 0;
invalidate_pte = 0;
for (++level; level <= AMD_IOMMU_PGTABLE_MAXLEVEL; level++) {
prev_index = pt->pt_index;
ppt = pt->pt_parent;
retval = amd_iommu_teardown_pdte(iommu, pt, index);
switch (retval) {
case PDTE_NOT_TORN:
goto invalidate;
case PDTE_TORN_DOWN:
invalidate_pte = 1;
goto invalidate;
case PGTABLE_TORN_DOWN:
invalidate_pte = 1;
invalidate_pde = 1;
tear_level = level;
break;
}
index = prev_index;
pt = ppt;
}
invalidate:
/*
* Now teardown the IOMMU HW caches if applicable
*/
if (invalidate_pte) {
cmdargs.ca_domainid = (uint16_t)domainid;
if (amd_iommu_pageva_inval_all) {
cmdargs.ca_addr = (uintptr_t)0x7FFFFFFFFFFFF000;
flags = AMD_IOMMU_CMD_FLAGS_PAGE_PDE_INVAL |
AMD_IOMMU_CMD_FLAGS_PAGE_INVAL_S;
} else if (invalidate_pde) {
cmdargs.ca_addr =
(uintptr_t)AMD_IOMMU_VA_INVAL(pageva, tear_level);
flags = AMD_IOMMU_CMD_FLAGS_PAGE_PDE_INVAL |
AMD_IOMMU_CMD_FLAGS_PAGE_INVAL_S;
} else {
cmdargs.ca_addr = (uintptr_t)pageva;
flags = 0;
}
if (amd_iommu_cmd(iommu, AMD_IOMMU_CMD_INVAL_IOMMU_PAGES,
&cmdargs, flags, 0) != DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d: domainid=%d, "
"rdip=%p. Failed to invalidate IOMMU HW cache "
"for %s", f, driver, instance,
iommu->aiomt_idx, domainid, (void *)rdip, path);
error = DDI_FAILURE;
goto out;
}
}
passthru:
if (tear_level == AMD_IOMMU_PGTABLE_MAXLEVEL) {
error = amd_iommu_clear_devtbl_entry(iommu, rdip, domainid,
deviceid, dp, domain_freed, path);
} else {
error = DDI_SUCCESS;
}
out:
SYNC_FORDEV(iommu->aiomt_dmahdl);
return (error);
}
static int
cvt_bind_error(int error)
{
switch (error) {
case DDI_DMA_MAPPED:
case DDI_DMA_PARTIAL_MAP:
case DDI_DMA_NORESOURCES:
case DDI_DMA_NOMAPPING:
break;
default:
cmn_err(CE_PANIC, "Unsupported error code: %d", error);
/*NOTREACHED*/
}
return (error);
}
int
amd_iommu_map_pa2va(amd_iommu_t *iommu, dev_info_t *rdip, ddi_dma_attr_t *attrp,
struct ddi_dma_req *dmareq, uint64_t start_pa, uint64_t pa_sz,
map_type_t type, uint64_t *start_vap, int km_flags)
{
pfn_t pfn_start;
pfn_t pfn_end;
pfn_t pfn;
int alias;
int32_t deviceid;
domain_id_t domainid;
amd_iommu_domain_t *dp;
uint64_t end_pa;
uint64_t start_va;
uint64_t end_va;
uint64_t pg_start;
uint64_t pg_end;
uint64_t pg;
uint64_t va_sz;
char *path;
int error = DDI_DMA_NOMAPPING;
const char *driver = ddi_driver_name(iommu->aiomt_dip);
int instance = ddi_get_instance(iommu->aiomt_dip);
const char *f = "amd_iommu_map_pa2va";
ASSERT(pa_sz != 0);
*start_vap = 0;
ASSERT(rdip);
path = kmem_alloc(MAXPATHLEN, km_flags);
if (path == NULL) {
error = DDI_DMA_NORESOURCES;
goto out;
}
(void) ddi_pathname(rdip, path);
/*
* First get deviceid
*/
if (amd_iommu_get_deviceid(iommu, rdip, &deviceid, &alias, path)
!= DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d: rdip=%p. "
"Failed to get device ID for %s.", f, driver, instance,
iommu->aiomt_idx, (void *)rdip, path);
error = DDI_DMA_NOMAPPING;
goto out;
}
/*
* Next get the domain for this rdip
*/
if (amd_iommu_get_domain(iommu, rdip, alias, deviceid, &domainid, path)
!= DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d: rdip=%p, path=%s. "
"Failed to get domain.", f, driver, instance,
iommu->aiomt_idx, (void *)rdip, path);
error = DDI_DMA_NOMAPPING;
goto out;
}
dp = amd_iommu_lookup_domain(iommu, domainid, type, km_flags);
if (dp == NULL) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d: domainid=%d, rdip=%p. "
"Failed to get device ID for %s.", f, driver, instance,
iommu->aiomt_idx, domainid, (void *)rdip, path);
error = DDI_DMA_NORESOURCES;
goto out;
}
ASSERT(dp->d_domainid == domainid);
pfn_start = start_pa >> MMU_PAGESHIFT;
if (amd_iommu_debug & AMD_IOMMU_DEBUG_PAGE_TABLES) {
cmn_err(CE_NOTE, "pa = %p, pfn_new = %p, pfn_start = %p, "
"pgshift = %d",
(void *)(uintptr_t)start_pa,
(void *)(uintptr_t)(start_pa >> MMU_PAGESHIFT),
(void *)(uintptr_t)pfn_start, MMU_PAGESHIFT);
}
end_pa = start_pa + pa_sz - 1;
pfn_end = end_pa >> MMU_PAGESHIFT;
if (amd_iommu_unity_map || type == AMD_IOMMU_UNITY_MAP) {
start_va = start_pa;
end_va = end_pa;
va_sz = pa_sz;
*start_vap = start_va;
} else {
va_sz = mmu_ptob(pfn_end - pfn_start + 1);
start_va = (uintptr_t)vmem_xalloc(dp->d_vmem, va_sz,
MAX(attrp->dma_attr_align, MMU_PAGESIZE),
0,
attrp->dma_attr_seg + 1,
(void *)(uintptr_t)attrp->dma_attr_addr_lo,
(void *)(uintptr_t)MIN((attrp->dma_attr_addr_hi + 1),
AMD_IOMMU_SIZE_4G), /* XXX rollover */
km_flags == KM_SLEEP ? VM_SLEEP : VM_NOSLEEP);
if (start_va == 0) {
cmn_err(CE_WARN, "%s: No VA resources",
amd_iommu_modname);
error = DDI_DMA_NORESOURCES;
goto out;
}
ASSERT((start_va & MMU_PAGEOFFSET) == 0);
end_va = start_va + va_sz - 1;
*start_vap = start_va + (start_pa & MMU_PAGEOFFSET);
}
pg_start = start_va >> MMU_PAGESHIFT;
pg_end = end_va >> MMU_PAGESHIFT;
pg = pg_start;
for (pfn = pfn_start; pfn <= pfn_end; pfn++, pg++) {
if (amd_iommu_debug & AMD_IOMMU_DEBUG_PAGE_TABLES) {
cmn_err(CE_WARN, "%s: attempting to create page tables "
"for pfn = %p, va = %p, path = %s",
f, (void *)(uintptr_t)(pfn << MMU_PAGESHIFT),
(void *)(uintptr_t)(pg << MMU_PAGESHIFT), path);
}
if (amd_iommu_unity_map || type == AMD_IOMMU_UNITY_MAP) {
ASSERT(pfn == pg);
}
if ((error = amd_iommu_create_pgtables(iommu, rdip, dmareq,
pg << MMU_PAGESHIFT,
pfn << MMU_PAGESHIFT, deviceid, domainid, dp, path,
km_flags)) != DDI_SUCCESS) {
cmn_err(CE_WARN, "Failed to create_pgtables");
goto out;
}
if (amd_iommu_debug & AMD_IOMMU_DEBUG_PAGE_TABLES) {
cmn_err(CE_WARN, "%s: successfuly created page tables "
"for pfn = %p, vapg = %p, path = %s",
f, (void *)(uintptr_t)pfn,
(void *)(uintptr_t)pg, path);
}
}
ASSERT(pg == pg_end + 1);
if (amd_iommu_debug & AMD_IOMMU_DEBUG_PA2VA) {
cmn_err(CE_NOTE, "pa=%p, va=%p",
(void *)(uintptr_t)start_pa,
(void *)(uintptr_t)(*start_vap));
}
error = DDI_DMA_MAPPED;
out:
kmem_free(path, MAXPATHLEN);
return (cvt_bind_error(error));
}
int
amd_iommu_unmap_va(amd_iommu_t *iommu, dev_info_t *rdip, uint64_t start_va,
uint64_t va_sz, map_type_t type)
{
uint64_t end_va;
uint64_t pg_start;
uint64_t pg_end;
uint64_t pg;
uint64_t actual_sz;
char *path;
int pathfree;
int alias;
int32_t deviceid;
domain_id_t domainid;
amd_iommu_domain_t *dp;
int error;
int domain_freed;
const char *driver = ddi_driver_name(iommu->aiomt_dip);
int instance = ddi_get_instance(iommu->aiomt_dip);
const char *f = "amd_iommu_unmap_va";
if (amd_iommu_no_unmap)
return (DDI_SUCCESS);
path = kmem_alloc(MAXPATHLEN, KM_NOSLEEP);
if (path) {
(void) ddi_pathname(rdip, path);
pathfree = 1;
} else {
pathfree = 0;
path = "<path-mem-alloc-failed>";
}
/*
* First get deviceid
*/
if (amd_iommu_get_deviceid(iommu, rdip, &deviceid, &alias, path)
!= DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d: rdip=%p. "
"Failed to get device ID for %s.", f, driver, instance,
iommu->aiomt_idx, (void *)rdip, path);
error = DDI_FAILURE;
goto out;
}
/*
* Next get the domain for this rdip
*/
if (amd_iommu_get_domain(iommu, rdip, alias, deviceid, &domainid, path)
!= DDI_SUCCESS) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d: rdip=%p, path=%s. "
"Failed to get domain.", f, driver, instance,
iommu->aiomt_idx, (void *)rdip, path);
error = DDI_FAILURE;
goto out;
}
/* should never result in domain allocation/vmem_create */
dp = amd_iommu_lookup_domain(iommu, domainid, AMD_IOMMU_INVALID_MAP,
KM_NOSLEEP);
if (dp == NULL) {
cmn_err(CE_WARN, "%s: %s%d: idx=%d: domainid=%d, rdip=%p. "
"Failed to get device ID for %s.", f, driver, instance,
iommu->aiomt_idx, domainid, (void *)rdip, path);
error = DDI_FAILURE;
goto out;
}
ASSERT(dp->d_domainid == domainid);
pg_start = start_va >> MMU_PAGESHIFT;
end_va = start_va + va_sz - 1;
pg_end = end_va >> MMU_PAGESHIFT;
actual_sz = (pg_end - pg_start + 1) << MMU_PAGESHIFT;
domain_freed = 0;
for (pg = pg_start; pg <= pg_end; pg++) {
domain_freed = 0;
if (amd_iommu_destroy_pgtables(iommu, rdip,
pg << MMU_PAGESHIFT, deviceid, domainid, dp, type,
&domain_freed, path) != DDI_SUCCESS) {
error = DDI_FAILURE;
goto out;
}
if (domain_freed) {
ASSERT(pg == pg_end);
break;
}
}
/*
* vmem_xalloc() must be paired with vmem_xfree
*/
if (type == AMD_IOMMU_VMEM_MAP && !amd_iommu_unity_map) {
vmem_xfree(dp->d_vmem,
(void *)(uintptr_t)(pg_start << MMU_PAGESHIFT), actual_sz);
}
if (domain_freed)
amd_iommu_teardown_domain(iommu, dp);
error = DDI_SUCCESS;
out:
if (pathfree)
kmem_free(path, MAXPATHLEN);
return (error);
}