immu_dvma.c revision 9e986f0e5fb5e5ac09af90cd3b63f7836d983f9d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Portions Copyright (c) 2010, Oracle and/or its affiliates.
* All rights reserved.
*/
/*
* Copyright (c) 2009, Intel Corporation.
* All rights reserved.
*/
/*
* DVMA code
* This file contains Intel IOMMU code that deals with DVMA
* i.e. DMA remapping.
*/
#include <sys/sysmacros.h>
#include <sys/pcie.h>
#include <sys/pci_cfgspace.h>
#include <vm/hat_i86.h>
#include <sys/memlist.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
#include <sys/modhash.h>
#include <sys/immu.h>
#undef TEST
/*
* Macros based on PCI spec
*/
#define IMMU_PCI_REV2CLASS(r) ((r) >> 8) /* classcode from revid */
#define IMMU_PCI_CLASS2BASE(c) ((c) >> 16) /* baseclass from classcode */
#define IMMU_PCI_CLASS2SUB(c) (((c) >> 8) & 0xff); /* classcode */
#define IMMU_CONTIG_PADDR(d, p) \
((d).dck_paddr && ((d).dck_paddr + IMMU_PAGESIZE) == (p))
typedef struct dvma_arg {
immu_t *dva_immu;
dev_info_t *dva_rdip;
dev_info_t *dva_ddip;
domain_t *dva_domain;
int dva_level;
immu_flags_t dva_flags;
list_t *dva_list;
int dva_error;
} dvma_arg_t;
static domain_t *domain_create(immu_t *immu, dev_info_t *ddip,
dev_info_t *rdip, immu_flags_t immu_flags);
static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus,
int dev, int func, immu_flags_t immu_flags);
static void destroy_immu_devi(immu_devi_t *immu_devi);
static boolean_t dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma,
uint64_t nvpages, dcookie_t *dcookies, int dcount, dev_info_t *rdip,
immu_flags_t immu_flags);
/* Extern globals */
extern struct memlist *phys_install;
/* static Globals */
/*
* Used to setup DMA objects (memory regions)
* for DMA reads by IOMMU units
*/
static ddi_dma_attr_t immu_dma_attr = {
DMA_ATTR_V0,
0U,
0xffffffffU,
0xffffffffU,
MMU_PAGESIZE, /* MMU page aligned */
0x1,
0x1,
0xffffffffU,
0xffffffffU,
1,
4,
0
};
static ddi_device_acc_attr_t immu_acc_attr = {
DDI_DEVICE_ATTR_V0,
DDI_NEVERSWAP_ACC,
DDI_STRICTORDER_ACC
};
/* globals private to this file */
static kmutex_t immu_domain_lock;
static list_t immu_unity_domain_list;
static list_t immu_xlate_domain_list;
/* structure used to store idx into each level of the page tables */
typedef struct xlate {
int xlt_level;
uint_t xlt_idx;
pgtable_t *xlt_pgtable;
} xlate_t;
/* 0 is reserved by Vt-d spec. Solaris reserves 1 */
#define IMMU_UNITY_DID 1
static mod_hash_t *bdf_domain_hash;
static domain_t *
bdf_domain_lookup(immu_devi_t *immu_devi)
{
domain_t *domain;
int16_t seg = immu_devi->imd_seg;
int16_t bus = immu_devi->imd_bus;
int16_t devfunc = immu_devi->imd_devfunc;
uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
if (seg < 0 || bus < 0 || devfunc < 0) {
return (NULL);
}
domain = NULL;
if (mod_hash_find(bdf_domain_hash,
(void *)bdf, (void *)&domain) == 0) {
ASSERT(domain);
ASSERT(domain->dom_did > 0);
return (domain);
} else {
return (NULL);
}
}
static void
bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain)
{
int16_t seg = immu_devi->imd_seg;
int16_t bus = immu_devi->imd_bus;
int16_t devfunc = immu_devi->imd_devfunc;
uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
int r;
if (seg < 0 || bus < 0 || devfunc < 0) {
return;
}
r = mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain);
ASSERT(r != MH_ERR_DUPLICATE);
ASSERT(r == 0);
}
static int
match_lpc(dev_info_t *pdip, void *arg)
{
immu_devi_t *immu_devi;
dvma_arg_t *dvap = (dvma_arg_t *)arg;
ASSERT(dvap->dva_error == DDI_FAILURE);
ASSERT(dvap->dva_ddip == NULL);
ASSERT(dvap->dva_list);
if (list_is_empty(dvap->dva_list)) {
return (DDI_WALK_TERMINATE);
}
immu_devi = list_head(dvap->dva_list);
for (; immu_devi; immu_devi = list_next(dvap->dva_list,
immu_devi)) {
ASSERT(immu_devi->imd_dip);
if (immu_devi->imd_dip == pdip) {
dvap->dva_ddip = pdip;
dvap->dva_error = DDI_SUCCESS;
return (DDI_WALK_TERMINATE);
}
}
return (DDI_WALK_CONTINUE);
}
static void
immu_devi_set_spclist(dev_info_t *dip, immu_t *immu)
{
list_t *spclist = NULL;
immu_devi_t *immu_devi;
ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_lock)));
immu_devi = IMMU_DEVI(dip);
if (immu_devi->imd_display == B_TRUE) {
spclist = &(immu->immu_dvma_gfx_list);
} else if (immu_devi->imd_lpc == B_TRUE) {
spclist = &(immu->immu_dvma_lpc_list);
}
if (spclist) {
mutex_enter(&(immu->immu_lock));
list_insert_head(spclist, immu_devi);
mutex_exit(&(immu->immu_lock));
}
}
/*
* Set the immu_devi struct in the immu_devi field of a devinfo node
*/
int
immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags)
{
int bus, dev, func;
immu_devi_t *new_imd;
immu_devi_t *immu_devi;
ASSERT(root_devinfo);
ASSERT(dip);
ASSERT(dip != root_devinfo);
immu_devi = immu_devi_get(dip);
if (immu_devi != NULL) {
return (DDI_SUCCESS);
}
bus = dev = func = -1;
/*
* Assume a new immu_devi struct is needed
*/
if (!DEVI_IS_PCI(dip) || acpica_get_bdf(dip, &bus, &dev, &func) != 0) {
/*
* No BDF. Set bus = -1 to indicate this.
* We still need to create a immu_devi struct
* though
*/
bus = -1;
dev = 0;
func = 0;
}
new_imd = create_immu_devi(dip, bus, dev, func, immu_flags);
if (new_imd == NULL) {
ddi_err(DER_WARN, dip, "Failed to create immu_devi "
"structure");
return (DDI_FAILURE);
}
/*
* Check if some other thread allocated a immu_devi while we
* didn't own the lock.
*/
mutex_enter(&(DEVI(dip)->devi_lock));
if (IMMU_DEVI(dip) == NULL) {
IMMU_DEVI_SET(dip, new_imd);
} else {
destroy_immu_devi(new_imd);
}
mutex_exit(&(DEVI(dip)->devi_lock));
return (DDI_SUCCESS);
}
static dev_info_t *
get_lpc_devinfo(immu_t *immu, dev_info_t *rdip, immu_flags_t immu_flags)
{
dvma_arg_t dvarg = {0};
dvarg.dva_list = &(immu->immu_dvma_lpc_list);
dvarg.dva_rdip = rdip;
dvarg.dva_error = DDI_FAILURE;
if (immu_walk_ancestor(rdip, NULL, match_lpc,
&dvarg, NULL, immu_flags) != DDI_SUCCESS) {
ddi_err(DER_MODE, rdip, "Could not walk ancestors to "
"find lpc_devinfo for ISA device");
return (NULL);
}
if (dvarg.dva_error != DDI_SUCCESS || dvarg.dva_ddip == NULL) {
ddi_err(DER_MODE, rdip, "Could not find lpc_devinfo for "
"ISA device");
return (NULL);
}
return (dvarg.dva_ddip);
}
static dev_info_t *
get_gfx_devinfo(dev_info_t *rdip)
{
immu_t *immu;
immu_devi_t *immu_devi;
list_t *list_gfx;
/*
* The GFX device may not be on the same IMMU unit as "agpgart"
* so search globally
*/
immu_devi = NULL;
immu = list_head(&immu_list);
for (; immu; immu = list_next(&immu_list, immu)) {
list_gfx = &(immu->immu_dvma_gfx_list);
if (!list_is_empty(list_gfx)) {
immu_devi = list_head(list_gfx);
break;
}
}
if (immu_devi == NULL) {
ddi_err(DER_WARN, rdip, "IMMU: No GFX device. "
"Cannot redirect agpgart");
return (NULL);
}
/* list is not empty we checked above */
ASSERT(immu_devi);
ASSERT(immu_devi->imd_dip);
ddi_err(DER_LOG, rdip, "IMMU: GFX redirect to %s",
ddi_node_name(immu_devi->imd_dip));
return (immu_devi->imd_dip);
}
static immu_flags_t
dma_to_immu_flags(struct ddi_dma_req *dmareq)
{
immu_flags_t flags = 0;
if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
flags |= IMMU_FLAGS_SLEEP;
} else {
flags |= IMMU_FLAGS_NOSLEEP;
}
#ifdef BUGGY_DRIVERS
flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
#else
/*
* Read and write flags need to be reversed.
* DMA_READ means read from device and write
* to memory. So DMA read means DVMA write.
*/
if (dmareq->dmar_flags & DDI_DMA_READ)
flags |= IMMU_FLAGS_WRITE;
if (dmareq->dmar_flags & DDI_DMA_WRITE)
flags |= IMMU_FLAGS_READ;
/*
* Some buggy drivers specify neither READ or WRITE
* For such drivers set both read and write permissions
*/
if ((dmareq->dmar_flags & (DDI_DMA_READ | DDI_DMA_WRITE)) == 0) {
flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
}
#endif
return (flags);
}
int
pgtable_ctor(void *buf, void *arg, int kmflag)
{
size_t actual_size = 0;
pgtable_t *pgtable;
int (*dmafp)(caddr_t);
caddr_t vaddr;
void *next;
ASSERT(buf);
ASSERT(arg == NULL);
pgtable = (pgtable_t *)buf;
dmafp = (kmflag & KM_NOSLEEP) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
next = kmem_zalloc(IMMU_PAGESIZE, kmflag);
if (next == NULL) {
return (-1);
}
ASSERT(root_devinfo);
if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr,
dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) {
kmem_free(next, IMMU_PAGESIZE);
return (-1);
}
if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE,
&immu_acc_attr, DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED,
dmafp, NULL, &vaddr, &actual_size,
&pgtable->hwpg_memhdl) != DDI_SUCCESS) {
ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
kmem_free(next, IMMU_PAGESIZE);
return (-1);
}
/*
* Memory allocation failure. Maybe a temporary condition
* so return error rather than panic, so we can try again
*/
if (actual_size < IMMU_PAGESIZE) {
ddi_dma_mem_free(&pgtable->hwpg_memhdl);
ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
kmem_free(next, IMMU_PAGESIZE);
return (-1);
}
pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
pgtable->hwpg_vaddr = vaddr;
pgtable->swpg_next_array = next;
rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL);
return (0);
}
void
pgtable_dtor(void *buf, void *arg)
{
pgtable_t *pgtable;
ASSERT(buf);
ASSERT(arg == NULL);
pgtable = (pgtable_t *)buf;
ASSERT(pgtable->swpg_next_array);
/* destroy will panic if lock is held. */
rw_destroy(&(pgtable->swpg_rwlock));
ddi_dma_mem_free(&pgtable->hwpg_memhdl);
ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE);
/* don't zero out hwpg_vaddr and swpg_next_array for debugging */
}
/*
* pgtable_alloc()
* alloc a IOMMU pgtable structure.
* This same struct is used for root and context tables as well.
* This routine allocs the f/ollowing:
* - a pgtable_t struct
* - a HW page which holds PTEs/entries which is accesssed by HW
* so we set up DMA for this page
* - a SW page which is only for our bookeeping
* (for example to hold pointers to the next level pgtable).
* So a simple kmem_alloc suffices
*/
static pgtable_t *
pgtable_alloc(immu_t *immu, immu_flags_t immu_flags)
{
pgtable_t *pgtable;
int kmflags;
ASSERT(immu);
kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
pgtable = kmem_cache_alloc(immu_pgtable_cache, kmflags);
if (pgtable == NULL) {
return (NULL);
}
return (pgtable);
}
static void
pgtable_zero(immu_t *immu, pgtable_t *pgtable)
{
bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE);
bzero(pgtable->swpg_next_array, IMMU_PAGESIZE);
/* Dont need to flush the write we will flush when we use the entry */
immu_regs_cpu_flush(immu, pgtable->hwpg_vaddr, IMMU_PAGESIZE);
}
static void
pgtable_free(immu_t *immu, pgtable_t *pgtable)
{
ASSERT(immu);
ASSERT(pgtable);
kmem_cache_free(immu_pgtable_cache, pgtable);
}
/*
* Function to identify a display device from the PCI class code
*/
static boolean_t
device_is_display(uint_t classcode)
{
static uint_t disp_classes[] = {
0x000100,
0x030000,
0x030001
};
int i, nclasses = sizeof (disp_classes) / sizeof (uint_t);
for (i = 0; i < nclasses; i++) {
if (classcode == disp_classes[i])
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Function that determines if device is PCIEX and/or PCIEX bridge
*/
static boolean_t
device_is_pciex(
uchar_t bus, uchar_t dev, uchar_t func, boolean_t *is_pcib)
{
ushort_t cap;
ushort_t capsp;
ushort_t cap_count = PCI_CAP_MAX_PTR;
ushort_t status;
boolean_t is_pciex = B_FALSE;
*is_pcib = B_FALSE;
status = pci_getw_func(bus, dev, func, PCI_CONF_STAT);
if (!(status & PCI_STAT_CAP))
return (B_FALSE);
capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR);
while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) {
capsp &= PCI_CAP_PTR_MASK;
cap = pci_getb_func(bus, dev, func, capsp);
if (cap == PCI_CAP_ID_PCI_E) {
status = pci_getw_func(bus, dev, func, capsp + 2);
/*
* See section 7.8.2 of PCI-Express Base Spec v1.0a
* for Device/Port Type.
* PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the
* device is a PCIE2PCI bridge
*/
*is_pcib =
((status & PCIE_PCIECAP_DEV_TYPE_MASK) ==
PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? B_TRUE : B_FALSE;
is_pciex = B_TRUE;
}
capsp = (*pci_getb_func)(bus, dev, func,
capsp + PCI_CAP_NEXT_PTR);
}
return (is_pciex);
}
/*
* immu_dvma_get_immu()
* get the immu unit structure for a dev_info node
*/
immu_t *
immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags)
{
immu_devi_t *immu_devi;
immu_t *immu;
/*
* check if immu unit was already found earlier.
* If yes, then it will be stashed in immu_devi struct.
*/
immu_devi = immu_devi_get(dip);
if (immu_devi == NULL) {
if (immu_devi_set(dip, immu_flags) != DDI_SUCCESS) {
/*
* May fail because of low memory. Return error rather
* than panic as we want driver to rey again later
*/
ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
"No immu_devi structure");
/*NOTREACHED*/
}
immu_devi = immu_devi_get(dip);
ASSERT(immu_devi);
}
mutex_enter(&(DEVI(dip)->devi_lock));
if (immu_devi->imd_immu) {
immu = immu_devi->imd_immu;
mutex_exit(&(DEVI(dip)->devi_lock));
return (immu);
}
mutex_exit(&(DEVI(dip)->devi_lock));
immu = immu_dmar_get_immu(dip);
if (immu == NULL) {
ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
"Cannot find immu_t for device");
/*NOTREACHED*/
}
/*
* Check if some other thread found immu
* while lock was not held
*/
immu_devi = immu_devi_get(dip);
/* immu_devi should be present as we found it earlier */
if (immu_devi == NULL) {
ddi_err(DER_PANIC, dip,
"immu_dvma_get_immu: No immu_devi structure");
/*NOTREACHED*/
}
mutex_enter(&(DEVI(dip)->devi_lock));
if (immu_devi->imd_immu == NULL) {
/* nobody else set it, so we should do it */
immu_devi->imd_immu = immu;
immu_devi_set_spclist(dip, immu);
} else {
/*
* if some other thread got immu before
* us, it should get the same results
*/
if (immu_devi->imd_immu != immu) {
ddi_err(DER_PANIC, dip, "Multiple "
"immu units found for device. Expected (%p), "
"actual (%p)", (void *)immu,
(void *)immu_devi->imd_immu);
mutex_exit(&(DEVI(dip)->devi_lock));
/*NOTREACHED*/
}
}
mutex_exit(&(DEVI(dip)->devi_lock));
return (immu);
}
/* ############################# IMMU_DEVI code ############################ */
/*
* Allocate a immu_devi structure and initialize it
*/
static immu_devi_t *
create_immu_devi(dev_info_t *rdip, int bus, int dev, int func,
immu_flags_t immu_flags)
{
uchar_t baseclass, subclass;
uint_t classcode, revclass;
immu_devi_t *immu_devi;
boolean_t pciex = B_FALSE;
int kmflags;
boolean_t is_pcib = B_FALSE;
/* bus == -1 indicate non-PCI device (no BDF) */
ASSERT(bus == -1 || bus >= 0);
ASSERT(dev >= 0);
ASSERT(func >= 0);
kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
immu_devi = kmem_zalloc(sizeof (immu_devi_t), kmflags);
if (immu_devi == NULL) {
ddi_err(DER_WARN, rdip, "Failed to allocate memory for "
"Intel IOMMU immu_devi structure");
return (NULL);
}
immu_devi->imd_dip = rdip;
immu_devi->imd_seg = 0; /* Currently seg can only be 0 */
immu_devi->imd_bus = bus;
immu_devi->imd_pcib_type = IMMU_PCIB_BAD;
if (bus == -1) {
immu_devi->imd_pcib_type = IMMU_PCIB_NOBDF;
return (immu_devi);
}
immu_devi->imd_devfunc = IMMU_PCI_DEVFUNC(dev, func);
immu_devi->imd_sec = 0;
immu_devi->imd_sub = 0;
revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID);
classcode = IMMU_PCI_REV2CLASS(revclass);
baseclass = IMMU_PCI_CLASS2BASE(classcode);
subclass = IMMU_PCI_CLASS2SUB(classcode);
if (baseclass == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
immu_devi->imd_sec = pci_getb_func(bus, dev, func,
PCI_BCNF_SECBUS);
immu_devi->imd_sub = pci_getb_func(bus, dev, func,
PCI_BCNF_SUBBUS);
pciex = device_is_pciex(bus, dev, func, &is_pcib);
if (pciex == B_TRUE && is_pcib == B_TRUE) {
immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCI;
} else if (pciex == B_TRUE) {
immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCIE;
} else {
immu_devi->imd_pcib_type = IMMU_PCIB_PCI_PCI;
}
} else {
immu_devi->imd_pcib_type = IMMU_PCIB_ENDPOINT;
}
/* check for certain special devices */
immu_devi->imd_display = device_is_display(classcode);
immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) &&
(subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE;
immu_devi->imd_domain = NULL;
immu_devi->imd_dvma_flags = immu_global_dvma_flags;
return (immu_devi);
}
static void
destroy_immu_devi(immu_devi_t *immu_devi)
{
kmem_free(immu_devi, sizeof (immu_devi_t));
}
static domain_t *
immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp)
{
immu_devi_t *immu_devi;
domain_t *domain;
dev_info_t *ddip;
ASSERT(rdip);
ASSERT(ddipp);
*ddipp = NULL;
immu_devi = immu_devi_get(rdip);
if (immu_devi == NULL) {
return (NULL);
}
mutex_enter(&(DEVI(rdip)->devi_lock));
domain = immu_devi->imd_domain;
ddip = immu_devi->imd_ddip;
mutex_exit(&(DEVI(rdip)->devi_lock));
if (domain) {
ASSERT(domain->dom_did > 0);
ASSERT(ddip);
*ddipp = ddip;
}
return (domain);
}
/* ############################# END IMMU_DEVI code ######################## */
/* ############################# DOMAIN code ############################### */
/*
* This routine always succeeds
*/
static int
did_alloc(immu_t *immu, dev_info_t *rdip,
dev_info_t *ddip, immu_flags_t immu_flags)
{
int did;
ASSERT(immu);
ASSERT(rdip);
ASSERT(rdip != root_devinfo);
did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1,
(immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP);
if (did == 0) {
ASSERT(immu->immu_unity_domain);
ASSERT(immu->immu_unity_domain->dom_did > 0);
ddi_err(DER_WARN, rdip, "device domain-id alloc error"
" domain-device: %s%d. immu unit is %s. Using "
"unity domain with domain-id (%d)",
ddi_driver_name(ddip), ddi_get_instance(ddip),
immu->immu_name, immu->immu_unity_domain->dom_did);
did = immu->immu_unity_domain->dom_did;
}
return (did);
}
static int
get_branch_domain(dev_info_t *pdip, void *arg)
{
immu_devi_t *immu_devi;
domain_t *domain;
dev_info_t *ddip;
immu_t *immu;
dvma_arg_t *dvp = (dvma_arg_t *)arg;
ASSERT(pdip);
ASSERT(dvp);
ASSERT(dvp->dva_rdip);
/*
* The field dvp->dva_rdip is a work-in-progress
* and gets updated as we walk up the ancestor
* tree. The final ddip is set only when we reach
* the top of the tree. So the dvp->dva_ddip field cannot
* be relied on until we reach the top of the field.
*/
/* immu_devi may not be set. */
immu_devi = immu_devi_get(pdip);
if (immu_devi == NULL) {
if (immu_devi_set(pdip, dvp->dva_flags) != DDI_SUCCESS) {
dvp->dva_error = DDI_FAILURE;
return (DDI_WALK_TERMINATE);
}
}
immu_devi = immu_devi_get(pdip);
ASSERT(immu_devi);
immu = immu_devi->imd_immu;
if (immu == NULL) {
immu = immu_dvma_get_immu(pdip, dvp->dva_flags);
ASSERT(immu);
}
/*
* If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to
* terminate the walk (since the device under the PCIE bridge
* is a PCIE device and has an independent entry in the
* root/context table)
*/
if (dvp->dva_rdip != pdip &&
immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCIE) {
return (DDI_WALK_TERMINATE);
}
/*
* In order to be a domain-dim, it must be a PCI device i.e.
* must have valid BDF. This also eliminates the root complex.
*/
if (immu_devi->imd_pcib_type != IMMU_PCIB_BAD &&
immu_devi->imd_pcib_type != IMMU_PCIB_NOBDF) {
ASSERT(immu_devi->imd_bus >= 0);
ASSERT(immu_devi->imd_devfunc >= 0);
dvp->dva_ddip = pdip;
}
if (immu_devi->imd_display == B_TRUE ||
(dvp->dva_flags & IMMU_FLAGS_UNITY)) {
dvp->dva_domain = immu->immu_unity_domain;
/* continue walking to find ddip */
return (DDI_WALK_CONTINUE);
}
mutex_enter(&(DEVI(pdip)->devi_lock));
domain = immu_devi->imd_domain;
ddip = immu_devi->imd_ddip;
mutex_exit(&(DEVI(pdip)->devi_lock));
if (domain && ddip) {
/* if domain is set, it must be the same */
if (dvp->dva_domain) {
ASSERT(domain == dvp->dva_domain);
}
dvp->dva_domain = domain;
dvp->dva_ddip = ddip;
return (DDI_WALK_TERMINATE);
}
/* immu_devi either has both set or both clear */
ASSERT(domain == NULL);
ASSERT(ddip == NULL);
/* Domain may already be set, continue walking so that ddip gets set */
if (dvp->dva_domain) {
return (DDI_WALK_CONTINUE);
}
/* domain is not set in either immu_devi or dvp */
domain = bdf_domain_lookup(immu_devi);
if (domain == NULL) {
return (DDI_WALK_CONTINUE);
}
/* ok, the BDF hash had a domain for this BDF. */
/* Grab lock again to check if something else set immu_devi fields */
mutex_enter(&(DEVI(pdip)->devi_lock));
if (immu_devi->imd_domain != NULL) {
ASSERT(immu_devi->imd_domain == domain);
dvp->dva_domain = domain;
} else {
dvp->dva_domain = domain;
}
mutex_exit(&(DEVI(pdip)->devi_lock));
/*
* walk upwards until the topmost PCI bridge is found
*/
return (DDI_WALK_CONTINUE);
}
static void
map_unity_domain(domain_t *domain)
{
struct memlist *mp;
uint64_t start;
uint64_t npages;
dcookie_t dcookies[1] = {0};
int dcount = 0;
ASSERT(domain);
ASSERT(domain->dom_did == IMMU_UNITY_DID);
/*
* We call into routines that grab the lock so we should
* not be called with the lock held. This does not matter
* much since, no else has a reference to this domain
*/
ASSERT(!rw_lock_held(&(domain->dom_pgtable_rwlock)));
/*
* UNITY arenas are a mirror of the physical memory
* installed on the system.
*/
#ifdef BUGGY_DRIVERS
/*
* Dont skip page0. Some broken HW/FW access it.
*/
dcookies[0].dck_paddr = 0;
dcookies[0].dck_npages = 1;
dcount = 1;
(void) dvma_map(domain->dom_immu, domain, 0, 1, dcookies, dcount, NULL,
IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
#endif
memlist_read_lock();
mp = phys_install;
if (mp->ml_address == 0) {
/* since we already mapped page1 above */
start = IMMU_PAGESIZE;
} else {
start = mp->ml_address;
}
npages = mp->ml_size/IMMU_PAGESIZE + 1;
dcookies[0].dck_paddr = start;
dcookies[0].dck_npages = npages;
dcount = 1;
(void) dvma_map(domain->dom_immu, domain, start, npages, dcookies,
dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64
" - 0x%" PRIx64 "]", start, start + mp->ml_size);
mp = mp->ml_next;
while (mp) {
ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64
" - 0x%" PRIx64 "]", mp->ml_address,
mp->ml_address + mp->ml_size);
start = mp->ml_address;
npages = mp->ml_size/IMMU_PAGESIZE + 1;
dcookies[0].dck_paddr = start;
dcookies[0].dck_npages = npages;
dcount = 1;
(void) dvma_map(domain->dom_immu, domain, start, npages,
dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
mp = mp->ml_next;
}
mp = bios_rsvd;
while (mp) {
ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64
" - 0x%" PRIx64 "]", mp->ml_address,
mp->ml_address + mp->ml_size);
start = mp->ml_address;
npages = mp->ml_size/IMMU_PAGESIZE + 1;
dcookies[0].dck_paddr = start;
dcookies[0].dck_npages = npages;
dcount = 1;
(void) dvma_map(domain->dom_immu, domain, start, npages,
dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
mp = mp->ml_next;
}
memlist_read_unlock();
}
/*
* create_xlate_arena()
* Create the dvma arena for a domain with translation
* mapping
*/
static void
create_xlate_arena(immu_t *immu, domain_t *domain,
dev_info_t *rdip, immu_flags_t immu_flags)
{
char *arena_name;
struct memlist *mp;
int vmem_flags;
uint64_t start;
uint_t mgaw;
uint64_t size;
uint64_t maxaddr;
void *vmem_ret;
arena_name = domain->dom_dvma_arena_name;
/* Note, don't do sizeof (arena_name) - it is just a pointer */
(void) snprintf(arena_name,
sizeof (domain->dom_dvma_arena_name),
"%s-domain-%d-xlate-DVMA-arena", immu->immu_name,
domain->dom_did);
vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP;
/*
* No one else has access to this domain.
* So no domain locks needed
*/
ASSERT(!rw_lock_held(&(domain->dom_pgtable_rwlock)));
/* Restrict mgaddr (max guest addr) to MGAW */
mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap);
/*
* To ensure we avoid ioapic and PCI MMIO ranges we just
* use the physical memory address range of the system as the
* range
*/
maxaddr = ((uint64_t)1 << mgaw);
memlist_read_lock();
mp = phys_install;
if (mp->ml_address == 0)
start = MMU_PAGESIZE;
else
start = mp->ml_address;
if (start + mp->ml_size > maxaddr)
size = maxaddr - start;
else
size = mp->ml_size;
ddi_err(DER_VERB, rdip,
"%s: Creating dvma vmem arena [0x%" PRIx64
" - 0x%" PRIx64 "]", arena_name, start, start + size);
ASSERT(domain->dom_dvma_arena == NULL);
/*
* We always allocate in quanta of IMMU_PAGESIZE
*/
domain->dom_dvma_arena = vmem_create(arena_name,
(void *)(uintptr_t)start, /* start addr */
size, /* size */
IMMU_PAGESIZE, /* quantum */
NULL, /* afunc */
NULL, /* ffunc */
NULL, /* source */
0, /* qcache_max */
vmem_flags);
if (domain->dom_dvma_arena == NULL) {
ddi_err(DER_PANIC, rdip,
"Failed to allocate DVMA arena(%s) "
"for domain ID (%d)", arena_name, domain->dom_did);
/*NOTREACHED*/
}
mp = mp->ml_next;
while (mp) {
if (mp->ml_address == 0)
start = MMU_PAGESIZE;
else
start = mp->ml_address;
if (start + mp->ml_size > maxaddr)
size = maxaddr - start;
else
size = mp->ml_size;
ddi_err(DER_VERB, rdip,
"%s: Adding dvma vmem span [0x%" PRIx64
" - 0x%" PRIx64 "]", arena_name, start,
start + size);
vmem_ret = vmem_add(domain->dom_dvma_arena,
(void *)(uintptr_t)start, size, vmem_flags);
if (vmem_ret == NULL) {
ddi_err(DER_PANIC, rdip,
"Failed to allocate DVMA arena(%s) "
"for domain ID (%d)",
arena_name, domain->dom_did);
/*NOTREACHED*/
}
mp = mp->ml_next;
}
memlist_read_unlock();
}
/* ################################### DOMAIN CODE ######################### */
/*
* Set the domain and domain-dip for a dip
*/
static void
set_domain(
dev_info_t *dip,
dev_info_t *ddip,
domain_t *domain)
{
immu_devi_t *immu_devi;
domain_t *fdomain;
dev_info_t *fddip;
ASSERT(dip);
ASSERT(ddip);
ASSERT(domain);
ASSERT(domain->dom_did > 0); /* must be an initialized domain */
immu_devi = immu_devi_get(dip);
ASSERT(immu_devi);
mutex_enter(&(DEVI(dip)->devi_lock));
fddip = immu_devi->imd_ddip;
fdomain = immu_devi->imd_domain;
if (fddip) {
ASSERT(fddip == ddip);
} else {
immu_devi->imd_ddip = ddip;
}
if (fdomain) {
ASSERT(fdomain == domain);
} else {
immu_devi->imd_domain = domain;
}
mutex_exit(&(DEVI(dip)->devi_lock));
}
/*
* device_domain()
* Get domain for a device. The domain may be global in which case it
* is shared between all IOMMU units. Due to potential AGAW differences
* between IOMMU units, such global domains *have to be* UNITY mapping
* domains. Alternatively, the domain may be local to a IOMMU unit.
* Local domains may be shared or immu_devi, although the
* scope of sharing
* is restricted to devices controlled by the IOMMU unit to
* which the domain
* belongs. If shared, they (currently) have to be UNITY domains. If
* immu_devi a domain may be either UNITY or translation (XLATE) domain.
*/
static domain_t *
device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
{
dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */
immu_t *immu;
domain_t *domain;
dvma_arg_t dvarg = {0};
int level;
ASSERT(rdip);
*ddipp = NULL;
/*
* Check if the domain is already set. This is usually true
* if this is not the first DVMA transaction.
*/
ddip = NULL;
domain = immu_devi_domain(rdip, &ddip);
if (domain) {
ASSERT(domain->dom_did > 0);
ASSERT(ddip);
*ddipp = ddip;
return (domain);
}
immu = immu_dvma_get_immu(rdip, immu_flags);
if (immu == NULL) {
/*
* possible that there is no IOMMU unit for this device
* - BIOS bugs are one example.
*/
ddi_err(DER_WARN, rdip, "No IMMU unit found for device");
return (NULL);
}
immu_flags |= immu_devi_get(rdip)->imd_dvma_flags;
dvarg.dva_rdip = rdip;
dvarg.dva_ddip = NULL;
dvarg.dva_domain = NULL;
dvarg.dva_flags = immu_flags;
level = 0;
if (immu_walk_ancestor(rdip, NULL, get_branch_domain,
&dvarg, &level, immu_flags) != DDI_SUCCESS) {
/*
* maybe low memory. return error,
* so driver tries again later
*/
return (NULL);
}
/* should have walked at least 1 dip (i.e. edip) */
ASSERT(level > 0);
ddip = dvarg.dva_ddip; /* must be present */
domain = dvarg.dva_domain; /* may be NULL */
/*
* We may find the domain during our ancestor walk on any one of our
* ancestor dips, If the domain is found then the domain-dip
* (i.e. ddip) will also be found in the same immu_devi struct.
* The domain-dip is the highest ancestor dip which shares the
* same domain with edip.
* The domain may or may not be found, but the domain dip must
* be found.
*/
if (ddip == NULL) {
ddi_err(DER_MODE, rdip, "Cannot find domain dip for device.");
return (NULL);
}
/*
* Did we find a domain ?
*/
if (domain) {
goto found;
}
/* nope, so allocate */
domain = domain_create(immu, ddip, rdip, immu_flags);
if (domain == NULL) {
return (NULL);
}
ASSERT(domain->dom_did > 0);
/*FALLTHROUGH*/
found:
/*
* We know *domain *is* the right domain, so panic if
* another domain is set for either the request-dip or
* effective dip.
*/
set_domain(ddip, ddip, domain);
set_domain(rdip, ddip, domain);
*ddipp = ddip;
return (domain);
}
static void
create_unity_domain(immu_t *immu)
{
domain_t *domain;
/* 0 is reserved by Vt-d */
/*LINTED*/
ASSERT(IMMU_UNITY_DID > 0);
/* domain created during boot and always use sleep flag */
domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP);
rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
domain->dom_did = IMMU_UNITY_DID;
domain->dom_maptype = IMMU_MAPTYPE_UNITY;
domain->dom_immu = immu;
immu->immu_unity_domain = domain;
/*
* Setup the domain's initial page table
* should never fail.
*/
domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
ASSERT(domain->dom_pgtable_root);
pgtable_zero(immu, domain->dom_pgtable_root);
map_unity_domain(domain);
/*
* put it on the system-wide UNITY domain list
*/
mutex_enter(&(immu_domain_lock));
list_insert_tail(&immu_unity_domain_list, domain);
mutex_exit(&(immu_domain_lock));
}
/*
* ddip is the domain-dip - the topmost dip in a domain
* rdip is the requesting-dip - the device which is
* requesting DVMA setup
* if domain is a non-shared domain rdip == ddip
*/
static domain_t *
domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
immu_flags_t immu_flags)
{
int kmflags;
domain_t *domain;
char mod_hash_name[128];
immu_devi_t *immu_devi;
int did;
dcookie_t dcookies[1] = {0};
int dcount = 0;
ASSERT(immu);
ASSERT(ddip);
immu_devi = immu_devi_get(rdip);
ASSERT(immu_devi);
/*
* First allocate a domainid.
* This routine will never fail, since if we run out
* of domains the unity domain will be allocated.
*/
did = did_alloc(immu, rdip, ddip, immu_flags);
ASSERT(did > 0);
if (did == IMMU_UNITY_DID) {
/* domain overflow */
ASSERT(immu->immu_unity_domain);
return (immu->immu_unity_domain);
}
kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
domain = kmem_zalloc(sizeof (domain_t), kmflags);
if (domain == NULL) {
ddi_err(DER_PANIC, rdip, "Failed to alloc DVMA domain "
"structure for device. IOMMU unit: %s", immu->immu_name);
/*NOTREACHED*/
}
rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
(void) snprintf(mod_hash_name, sizeof (mod_hash_name),
"immu%s-domain%d-pava-hash", immu->immu_name, did);
domain->dom_did = did;
domain->dom_immu = immu;
domain->dom_maptype = IMMU_MAPTYPE_XLATE;
/*
* Create xlate DVMA arena for this domain.
*/
create_xlate_arena(immu, domain, rdip, immu_flags);
/*
* Setup the domain's initial page table
*/
domain->dom_pgtable_root = pgtable_alloc(immu, immu_flags);
if (domain->dom_pgtable_root == NULL) {
ddi_err(DER_PANIC, rdip, "Failed to alloc root "
"pgtable for domain (%d). IOMMU unit: %s",
domain->dom_did, immu->immu_name);
/*NOTREACHED*/
}
pgtable_zero(immu, domain->dom_pgtable_root);
/*
* Since this is a immu unit-specific domain, put it on
* the per-immu domain list.
*/
mutex_enter(&(immu->immu_lock));
list_insert_head(&immu->immu_domain_list, domain);
mutex_exit(&(immu->immu_lock));
/*
* Also put it on the system-wide xlate domain list
*/
mutex_enter(&(immu_domain_lock));
list_insert_head(&immu_xlate_domain_list, domain);
mutex_exit(&(immu_domain_lock));
bdf_domain_insert(immu_devi, domain);
#ifdef BUGGY_DRIVERS
/*
* Map page0. Some broken HW/FW access it.
*/
dcookies[0].dck_paddr = 0;
dcookies[0].dck_npages = 1;
dcount = 1;
(void) dvma_map(domain->dom_immu, domain, 0, 1, dcookies, dcount, NULL,
IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
#endif
return (domain);
}
/*
* Create domainid arena.
* Domainid 0 is reserved by Vt-d spec and cannot be used by
* system software.
* Domainid 1 is reserved by solaris and used for *all* of the following:
* as the "uninitialized" domain - For devices not yet controlled
* by Solaris
* as the "unity" domain - For devices that will always belong
* to the unity domain
* as the "overflow" domain - Used for any new device after we
* run out of domains
* All of the above domains map into a single domain with
* domainid 1 and UNITY DVMA mapping
* Each IMMU unity has its own unity/uninit/overflow domain
*/
static void
did_init(immu_t *immu)
{
(void) snprintf(immu->immu_did_arena_name,
sizeof (immu->immu_did_arena_name),
"%s_domainid_arena", immu->immu_name);
ddi_err(DER_VERB, NULL, "%s: Creating domainid arena %s",
immu->immu_name, immu->immu_did_arena_name);
immu->immu_did_arena = vmem_create(
immu->immu_did_arena_name,
(void *)(uintptr_t)(IMMU_UNITY_DID + 1), /* start addr */
immu->immu_max_domains - IMMU_UNITY_DID,
1, /* quantum */
NULL, /* afunc */
NULL, /* ffunc */
NULL, /* source */
0, /* qcache_max */
VM_SLEEP);
/* Even with SLEEP flag, vmem_create() can fail */
if (immu->immu_did_arena == NULL) {
ddi_err(DER_PANIC, NULL, "%s: Failed to create Intel "
"IOMMU domainid allocator: %s", immu->immu_name,
immu->immu_did_arena_name);
}
}
/* ######################### CONTEXT CODE ################################# */
static void
context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
int bus, int devfunc)
{
pgtable_t *context;
pgtable_t *pgtable_root;
pgtable_t *unity_pgtable_root;
hw_rce_t *hw_rent;
hw_rce_t *hw_cent;
hw_rce_t *ctxp;
int sid;
krw_t rwtype;
boolean_t fill_root;
boolean_t fill_ctx;
ASSERT(immu);
ASSERT(domain);
ASSERT(root_table);
ASSERT(bus >= 0);
ASSERT(devfunc >= 0);
ASSERT(domain->dom_pgtable_root);
pgtable_root = domain->dom_pgtable_root;
ctxp = (hw_rce_t *)(root_table->swpg_next_array);
context = *(pgtable_t **)(ctxp + bus);
hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus;
fill_root = B_FALSE;
fill_ctx = B_FALSE;
/* Check the most common case first with reader lock */
rw_enter(&(immu->immu_ctx_rwlock), RW_READER);
rwtype = RW_READER;
again:
if (ROOT_GET_P(hw_rent)) {
ASSERT(ROOT_GET_CONT(hw_rent) == context->hwpg_paddr);
hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) {
ASSERT(CONT_GET_P(hw_cent));
ASSERT(CONT_GET_DID(hw_cent) == domain->dom_did);
ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw);
ASSERT(CONT_GET_TTYPE(hw_cent) == TTYPE_XLATE_ONLY);
ASSERT(CONT_GET_ASR(hw_cent) ==
pgtable_root->hwpg_paddr);
rw_exit(&(immu->immu_ctx_rwlock));
return;
} else {
fill_ctx = B_TRUE;
}
} else {
fill_root = B_TRUE;
fill_ctx = B_TRUE;
}
if (rwtype == RW_READER &&
rw_tryupgrade(&(immu->immu_ctx_rwlock)) == 0) {
rw_exit(&(immu->immu_ctx_rwlock));
rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
rwtype = RW_WRITER;
goto again;
}
rwtype = RW_WRITER;
if (fill_root == B_TRUE) {
ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
ROOT_SET_P(hw_rent);
immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t));
}
if (fill_ctx == B_TRUE) {
hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
unity_pgtable_root = immu->immu_unity_domain->dom_pgtable_root;
ASSERT(CONT_GET_AVAIL(hw_cent) == IMMU_CONT_UNINITED);
ASSERT(CONT_GET_P(hw_cent));
ASSERT(CONT_GET_DID(hw_cent) ==
immu->immu_unity_domain->dom_did);
ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw);
ASSERT(CONT_GET_TTYPE(hw_cent) == TTYPE_XLATE_ONLY);
ASSERT(CONT_GET_ASR(hw_cent) ==
unity_pgtable_root->hwpg_paddr);
/* need to disable context entry before reprogramming it */
bzero(hw_cent, sizeof (hw_rce_t));
/* flush caches */
immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
ASSERT(rw_write_held(&(immu->immu_ctx_rwlock)));
sid = ((bus << 8) | devfunc);
immu_regs_context_flush(immu, 0, sid, domain->dom_did,
CONTEXT_FSI);
immu_regs_wbf_flush(immu);
CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED);
CONT_SET_DID(hw_cent, domain->dom_did);
CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
/*LINTED*/
CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
CONT_SET_P(hw_cent);
immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
}
rw_exit(&(immu->immu_ctx_rwlock));
}
static pgtable_t *
context_create(immu_t *immu)
{
int bus;
int devfunc;
pgtable_t *root_table;
pgtable_t *context;
pgtable_t *pgtable_root;
hw_rce_t *ctxp;
hw_rce_t *hw_rent;
hw_rce_t *hw_cent;
/* Allocate a zeroed root table (4K 256b entries) */
root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
pgtable_zero(immu, root_table);
/*
* Setup context tables for all possible root table entries.
* Start out with unity domains for all entries.
*/
ctxp = (hw_rce_t *)(root_table->swpg_next_array);
hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr);
for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) {
context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
pgtable_zero(immu, context);
ASSERT(ROOT_GET_P(hw_rent) == 0);
ROOT_SET_P(hw_rent);
ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
hw_cent = (hw_rce_t *)(context->hwpg_vaddr);
for (devfunc = 0; devfunc < IMMU_CONT_NUM;
devfunc++, hw_cent++) {
ASSERT(CONT_GET_P(hw_cent) == 0);
pgtable_root =
immu->immu_unity_domain->dom_pgtable_root;
CONT_SET_DID(hw_cent,
immu->immu_unity_domain->dom_did);
CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
/*LINTED*/
CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED);
CONT_SET_P(hw_cent);
}
immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE);
*((pgtable_t **)ctxp) = context;
}
immu_regs_cpu_flush(immu, root_table->hwpg_vaddr, IMMU_PAGESIZE);
return (root_table);
}
/*
* Called during rootnex attach, so no locks needed
*/
static void
context_init(immu_t *immu)
{
ASSERT(immu);
ASSERT(immu->immu_ctx_root == NULL);
rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
immu_regs_wbf_flush(immu);
immu->immu_ctx_root = context_create(immu);
immu_regs_set_root_table(immu);
rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
immu_regs_context_flush(immu, 0, 0, 0, CONTEXT_GLOBAL);
rw_exit(&(immu->immu_ctx_rwlock));
immu_regs_iotlb_flush(immu, 0, 0, 0, 0, IOTLB_GLOBAL);
immu_regs_wbf_flush(immu);
}
/*
* Find top pcib
*/
static int
find_top_pcib(dev_info_t *dip, void *arg)
{
immu_devi_t *immu_devi;
dev_info_t **pcibdipp = (dev_info_t **)arg;
ASSERT(dip);
immu_devi = immu_devi_get(dip);
ASSERT(immu_devi);
if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) {
*pcibdipp = dip;
}
return (DDI_WALK_CONTINUE);
}
static int
immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
dev_info_t *rdip, immu_flags_t immu_flags)
{
immu_devi_t *r_immu_devi;
immu_devi_t *d_immu_devi;
int r_bus;
int d_bus;
int r_devfunc;
int d_devfunc;
immu_pcib_t d_pcib_type;
immu_pcib_t r_pcib_type;
dev_info_t *pcibdip;
if (ddip == NULL || rdip == NULL ||
ddip == root_devinfo || rdip == root_devinfo) {
ddi_err(DER_MODE, rdip, "immu_contexts_update: domain-dip or "
"request-dip are NULL or are root devinfo");
return (DDI_FAILURE);
}
/*
* We need to set the context fields
* based on what type of device rdip and ddip are.
* To do that we need the immu_devi field.
* Set the immu_devi field (if not already set)
*/
if (immu_devi_set(ddip, immu_flags) == DDI_FAILURE) {
ddi_err(DER_MODE, rdip,
"immu_context_update: failed to set immu_devi for ddip");
return (DDI_FAILURE);
}
if (immu_devi_set(rdip, immu_flags) == DDI_FAILURE) {
ddi_err(DER_MODE, rdip,
"immu_context_update: failed to set immu_devi for rdip");
return (DDI_FAILURE);
}
d_immu_devi = immu_devi_get(ddip);
r_immu_devi = immu_devi_get(rdip);
ASSERT(r_immu_devi);
ASSERT(d_immu_devi);
d_bus = d_immu_devi->imd_bus;
d_devfunc = d_immu_devi->imd_devfunc;
d_pcib_type = d_immu_devi->imd_pcib_type;
r_bus = r_immu_devi->imd_bus;
r_devfunc = r_immu_devi->imd_devfunc;
r_pcib_type = r_immu_devi->imd_pcib_type;
ASSERT(d_bus >= 0);
if (rdip == ddip) {
ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT ||
d_pcib_type == IMMU_PCIB_PCIE_PCIE);
ASSERT(r_bus >= 0);
ASSERT(r_devfunc >= 0);
/* rdip is a PCIE device. set context for it only */
context_set(immu, domain, immu->immu_ctx_root, r_bus,
r_devfunc);
#ifdef BUGGY_DRIVERS
} else if (r_immu_devi == d_immu_devi) {
#ifdef TEST
ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and "
"0x%lx are identical", rdip, ddip);
#endif
ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT);
ASSERT(r_bus >= 0);
ASSERT(r_devfunc >= 0);
/* rdip is a PCIE device. set context for it only */
context_set(immu, domain, immu->immu_ctx_root, r_bus,
r_devfunc);
#endif
} else if (d_pcib_type == IMMU_PCIB_PCIE_PCI) {
/*
* ddip is a PCIE_PCI bridge. Set context for ddip's
* secondary bus. If rdip is on ddip's secondary
* bus, set context for rdip. Else, set context
* for rdip's PCI bridge on ddip's secondary bus.
*/
context_set(immu, domain, immu->immu_ctx_root,
d_immu_devi->imd_sec, 0);
if (d_immu_devi->imd_sec == r_bus) {
context_set(immu, domain, immu->immu_ctx_root,
r_bus, r_devfunc);
} else {
pcibdip = NULL;
if (immu_walk_ancestor(rdip, ddip, find_top_pcib,
&pcibdip, NULL, immu_flags) == DDI_SUCCESS &&
pcibdip != NULL) {
ASSERT(pcibdip);
r_immu_devi = immu_devi_get(pcibdip);
ASSERT(d_immu_devi);
ASSERT(d_immu_devi->imd_pcib_type ==
IMMU_PCIB_PCI_PCI);
r_bus = r_immu_devi->imd_bus;
r_devfunc = r_immu_devi->imd_devfunc;
context_set(immu, domain, immu->immu_ctx_root,
r_bus, r_devfunc);
} else {
ddi_err(DER_PANIC, rdip, "Failed to find PCI "
" bridge for PCI device");
/*NOTREACHED*/
}
}
} else if (d_pcib_type == IMMU_PCIB_PCI_PCI) {
context_set(immu, domain, immu->immu_ctx_root, d_bus,
d_devfunc);
} else if (d_pcib_type == IMMU_PCIB_ENDPOINT) {
ASSERT(r_pcib_type == IMMU_PCIB_NOBDF);
/*
* ddip is a PCIE device which has a non-PCI device under it
* i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata
*/
context_set(immu, domain, immu->immu_ctx_root, d_bus,
d_devfunc);
} else {
ddi_err(DER_PANIC, rdip, "unknown device type. Cannot "
"set IMMU context.");
/*NOTREACHED*/
}
/* XXX do we need a membar_producer() here */
return (DDI_SUCCESS);
}
/* ##################### END CONTEXT CODE ################################## */
/* ##################### MAPPING CODE ################################## */
static boolean_t
PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
dev_info_t *rdip, immu_flags_t immu_flags)
{
if (immu_flags & IMMU_FLAGS_PAGE1) {
ASSERT(paddr == 0);
} else {
ASSERT((next == NULL) ^ (paddr == 0));
}
/* The PDTE must be set i.e. present bit is set */
if (!PDTE_P(pdte)) {
ddi_err(DER_MODE, rdip, "No present flag");
return (B_FALSE);
}
/*
* Just assert to check most significant system software field
* (PDTE_SW4) as it is same as present bit and we
* checked that above
*/
ASSERT(PDTE_SW4(pdte));
/*
* TM field should be clear if not reserved.
* non-leaf is always reserved
*/
if (next == NULL && immu->immu_TM_reserved == B_FALSE) {
if (PDTE_TM(pdte)) {
ddi_err(DER_MODE, rdip, "TM flag set");
return (B_FALSE);
}
}
/*
* The SW3 field is not used and must be clear
*/
if (PDTE_SW3(pdte)) {
ddi_err(DER_MODE, rdip, "SW3 set");
return (B_FALSE);
}
/*
* PFN (for PTE) or next level pgtable-paddr (for PDE) must be set
*/
if (next == NULL) {
ASSERT(paddr % IMMU_PAGESIZE == 0);
if (PDTE_PADDR(pdte) != paddr) {
ddi_err(DER_MODE, rdip,
"PTE paddr mismatch: %lx != %lx",
PDTE_PADDR(pdte), paddr);
return (B_FALSE);
}
} else {
if (PDTE_PADDR(pdte) != next->hwpg_paddr) {
ddi_err(DER_MODE, rdip,
"PDE paddr mismatch: %lx != %lx",
PDTE_PADDR(pdte), next->hwpg_paddr);
return (B_FALSE);
}
}
/*
* SNP field should be clear if not reserved.
* non-leaf is always reserved
*/
if (next == NULL && immu->immu_SNP_reserved == B_FALSE) {
if (PDTE_SNP(pdte)) {
ddi_err(DER_MODE, rdip, "SNP set");
return (B_FALSE);
}
}
/* second field available for system software should be clear */
if (PDTE_SW2(pdte)) {
ddi_err(DER_MODE, rdip, "SW2 set");
return (B_FALSE);
}
/* Super pages field should be clear */
if (PDTE_SP(pdte)) {
ddi_err(DER_MODE, rdip, "SP set");
return (B_FALSE);
}
/*
* least significant field available for
* system software should be clear
*/
if (PDTE_SW1(pdte)) {
ddi_err(DER_MODE, rdip, "SW1 set");
return (B_FALSE);
}
if ((immu_flags & IMMU_FLAGS_READ) && !PDTE_READ(pdte)) {
ddi_err(DER_MODE, rdip, "READ not set");
return (B_FALSE);
}
if ((immu_flags & IMMU_FLAGS_WRITE) && !PDTE_WRITE(pdte)) {
ddi_err(DER_MODE, rdip, "WRITE not set");
return (B_FALSE);
}
return (B_TRUE);
}
/*ARGSUSED*/
static void
PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
uint64_t *dvma_ptr, uint64_t *npages_ptr, dev_info_t *rdip)
{
uint64_t npages;
uint64_t dvma;
pgtable_t *pgtable;
hw_pdte_t *hwp;
hw_pdte_t *shwp;
int idx;
hw_pdte_t pte;
ASSERT(xlate->xlt_level == 1);
pgtable = xlate->xlt_pgtable;
idx = xlate->xlt_idx;
ASSERT(pgtable);
ASSERT(idx <= IMMU_PGTABLE_MAXIDX);
dvma = *dvma_ptr;
npages = *npages_ptr;
ASSERT(dvma);
ASSERT(dvma % IMMU_PAGESIZE == 0);
ASSERT(npages);
/*
* since a caller gets a unique dvma for a physical address,
* no other concurrent thread will be writing to the same
* PTE even if it has the same paddr. So no locks needed.
*/
shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
hwp = shwp;
for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
pte = *hwp;
/* Cannot clear a HW PTE that is aleady clear */
ASSERT(PDTE_P(pte));
PDTE_CLEAR_P(pte);
*hwp = pte;
dvma += IMMU_PAGESIZE;
npages--;
}
#ifdef TEST
/* dont need to flush write during unmap */
immu_regs_cpu_flush(immu, (caddr_t)shwp,
(hwp - shwp) * sizeof (hw_pdte_t));
#endif
*dvma_ptr = dvma;
*npages_ptr = npages;
xlate->xlt_idx = idx;
}
/*ARGSUSED*/
static void
xlate_setup(immu_t *immu, uint64_t dvma, xlate_t *xlate,
int nlevels, dev_info_t *rdip)
{
int level;
uint64_t offbits;
/* level 0 is never used. Sanity check */
ASSERT(xlate->xlt_level == 0);
ASSERT(xlate->xlt_idx == 0);
ASSERT(xlate->xlt_pgtable == NULL);
ASSERT(dvma % IMMU_PAGESIZE == 0);
/*
* Skip the first 12 bits which is the offset into
* 4K PFN (phys page frame based on IMMU_PAGESIZE)
*/
offbits = dvma >> IMMU_PAGESHIFT;
/* skip to level 1 i.e. leaf PTE */
for (level = 1, xlate++; level <= nlevels; level++, xlate++) {
xlate->xlt_level = level;
xlate->xlt_idx = (offbits & IMMU_PGTABLE_LEVEL_MASK);
ASSERT(xlate->xlt_idx <= IMMU_PGTABLE_MAXIDX);
xlate->xlt_pgtable = NULL;
offbits >>= IMMU_PGTABLE_LEVEL_STRIDE;
}
}
/*
* Read the pgtables
*/
static void
PDE_lookup(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
dev_info_t *rdip)
{
pgtable_t *pgtable;
pgtable_t *next;
hw_pdte_t pde;
uint_t idx;
/* xlate should be at level 0 */
ASSERT(xlate->xlt_level == 0);
ASSERT(xlate->xlt_idx == 0);
/* start with highest level pgtable i.e. root */
xlate += nlevels;
ASSERT(xlate->xlt_level == nlevels);
if (xlate->xlt_pgtable == NULL) {
xlate->xlt_pgtable = domain->dom_pgtable_root;
}
for (; xlate->xlt_level > 1; xlate--) {
idx = xlate->xlt_idx;
pgtable = xlate->xlt_pgtable;
ASSERT(pgtable);
ASSERT(idx <= IMMU_PGTABLE_MAXIDX);
if ((xlate - 1)->xlt_pgtable) {
continue;
}
/* xlate's leafier level is not set, set it now */
/* Lock the pgtable in read mode */
rw_enter(&(pgtable->swpg_rwlock), RW_READER);
/*
* since we are unmapping, the pgtable should
* already point to a leafier pgtable.
*/
next = *(pgtable->swpg_next_array + idx);
ASSERT(next);
pde = *((hw_pdte_t *)(pgtable->hwpg_vaddr) + idx);
ASSERT(PDTE_check(immu, pde, next, 0, rdip, 0) == B_TRUE);
(xlate - 1)->xlt_pgtable = next;
rw_exit(&(pgtable->swpg_rwlock));
}
}
/*ARGSUSED*/
static void
PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
dev_info_t *rdip, immu_flags_t immu_flags)
{
hw_pdte_t pte;
pte = *hwp;
#ifndef DEBUG
/* Set paddr */
ASSERT(paddr % IMMU_PAGESIZE == 0);
pte = 0;
PDTE_SET_PADDR(pte, paddr);
PDTE_SET_READ(pte);
PDTE_SET_WRITE(pte);
*hwp = pte;
#else
if (PDTE_P(pte)) {
if (PDTE_PADDR(pte) != paddr) {
ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx",
PDTE_PADDR(pte), paddr);
}
#ifdef BUGGY_DRIVERS
return;
#else
goto out;
#endif
}
/* Don't touch SW4. It is the present field */
/* clear TM field if not reserved */
if (immu->immu_TM_reserved == B_FALSE) {
PDTE_CLEAR_TM(pte);
}
#ifdef DEBUG
/* Clear 3rd field for system software - not used */
PDTE_CLEAR_SW3(pte);
#endif
/* Set paddr */
ASSERT(paddr % IMMU_PAGESIZE == 0);
PDTE_CLEAR_PADDR(pte);
PDTE_SET_PADDR(pte, paddr);
/* clear SNP field if not reserved. */
if (immu->immu_SNP_reserved == B_FALSE) {
PDTE_CLEAR_SNP(pte);
}
#ifdef DEBUG
/* Clear SW2 field available for software */
PDTE_CLEAR_SW2(pte);
#endif
#ifdef DEBUG
/* SP is don't care for PTEs. Clear it for cleanliness */
PDTE_CLEAR_SP(pte);
#endif
#ifdef DEBUG
/* Clear SW1 field available for software */
PDTE_CLEAR_SW1(pte);
#endif
/*
* Now that we are done writing the PTE
* set the "present" flag. Note this present
* flag is a bit in the PDE/PTE that the
* spec says is available for system software.
* This is an implementation detail of Solaris
* bare-metal Intel IOMMU.
* The present field in a PDE/PTE is not defined
* by the Vt-d spec
*/
PDTE_SET_P(pte);
out:
#ifdef BUGGY_DRIVERS
PDTE_SET_READ(pte);
PDTE_SET_WRITE(pte);
#else
if (immu_flags & IMMU_FLAGS_READ)
PDTE_SET_READ(pte);
if (immu_flags & IMMU_FLAGS_WRITE)
PDTE_SET_WRITE(pte);
#endif
*hwp = pte;
#endif
}
/*ARGSUSED*/
static void
PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
uint64_t *dvma_ptr, uint64_t *nvpages_ptr, dcookie_t *dcookies,
int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
{
paddr_t paddr;
uint64_t nvpages;
uint64_t nppages;
uint64_t dvma;
pgtable_t *pgtable;
hw_pdte_t *hwp;
hw_pdte_t *shwp;
int idx;
int j;
ASSERT(xlate->xlt_level == 1);
pgtable = xlate->xlt_pgtable;
idx = xlate->xlt_idx;
ASSERT(idx <= IMMU_PGTABLE_MAXIDX);
ASSERT(pgtable);
dvma = *dvma_ptr;
nvpages = *nvpages_ptr;
ASSERT(dvma || (immu_flags & IMMU_FLAGS_PAGE1));
ASSERT(nvpages);
/*
* since a caller gets a unique dvma for a physical address,
* no other concurrent thread will be writing to the same
* PTE even if it has the same paddr. So no locks needed.
*/
shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
hwp = shwp;
for (j = dcount - 1; j >= 0; j--) {
if (nvpages <= dcookies[j].dck_npages)
break;
nvpages -= dcookies[j].dck_npages;
}
ASSERT(j >= 0);
ASSERT(nvpages);
ASSERT(nvpages <= dcookies[j].dck_npages);
nppages = nvpages;
paddr = dcookies[j].dck_paddr +
(dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE;
nvpages = *nvpages_ptr;
for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
ASSERT(paddr || (immu_flags & IMMU_FLAGS_PAGE1));
PTE_set_one(immu, hwp, paddr, rdip, immu_flags);
ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags)
== B_TRUE);
nppages--;
nvpages--;
paddr += IMMU_PAGESIZE;
dvma += IMMU_PAGESIZE;
if (nppages == 0) {
j++;
}
if (j == dcount) {
ASSERT(nvpages == 0);
break;
}
ASSERT(nvpages);
if (nppages == 0) {
nppages = dcookies[j].dck_npages;
paddr = dcookies[j].dck_paddr;
}
}
/* flush writes to HW PTE table */
immu_regs_cpu_flush(immu, (caddr_t)shwp, (hwp - shwp) *
sizeof (hw_pdte_t));
if (nvpages) {
*dvma_ptr = dvma;
*nvpages_ptr = nvpages;
} else {
*dvma_ptr = 0;
*nvpages_ptr = 0;
}
xlate->xlt_idx = idx;
}
/*ARGSUSED*/
static void
PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next,
dev_info_t *rdip, immu_flags_t immu_flags)
{
hw_pdte_t pde;
pde = *hwp;
/* if PDE is already set, make sure it is correct */
if (PDTE_P(pde)) {
ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr);
#ifdef BUGGY_DRIVERS
return;
#else
goto out;
#endif
}
/* Dont touch SW4, it is the present bit */
/* don't touch TM field it is reserved for PDEs */
/* 3rd field available for system software is not used */
PDTE_CLEAR_SW3(pde);
/* Set next level pgtable-paddr for PDE */
ASSERT(next->hwpg_paddr % IMMU_PAGESIZE == 0);
PDTE_CLEAR_PADDR(pde);
PDTE_SET_PADDR(pde, next->hwpg_paddr);
/* don't touch SNP field it is reserved for PDEs */
/* Clear second field available for system software */
PDTE_CLEAR_SW2(pde);
/* No super pages for PDEs */
PDTE_CLEAR_SP(pde);
/* Clear SW1 for software */
PDTE_CLEAR_SW1(pde);
/*
* Now that we are done writing the PDE
* set the "present" flag. Note this present
* flag is a bit in the PDE/PTE that the
* spec says is available for system software.
* This is an implementation detail of Solaris
* base-metal Intel IOMMU.
* The present field in a PDE/PTE is not defined
* by the Vt-d spec
*/
out:
#ifdef BUGGY_DRIVERS
PDTE_SET_READ(pde);
PDTE_SET_WRITE(pde);
#else
if (immu_flags & IMMU_FLAGS_READ)
PDTE_SET_READ(pde);
if (immu_flags & IMMU_FLAGS_WRITE)
PDTE_SET_WRITE(pde);
#endif
PDTE_SET_P(pde);
*hwp = pde;
immu_regs_cpu_flush(immu, (caddr_t)hwp, sizeof (hw_pdte_t));
}
/*
* Used to set PDEs
*/
static boolean_t
PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
dev_info_t *rdip, immu_flags_t immu_flags)
{
pgtable_t *pgtable;
pgtable_t *new;
pgtable_t *next;
hw_pdte_t *hwp;
int level;
uint_t idx;
krw_t rwtype;
boolean_t set = B_FALSE;
/* xlate should be at level 0 */
ASSERT(xlate->xlt_level == 0);
ASSERT(xlate->xlt_idx == 0);
/* start with highest level pgtable i.e. root */
xlate += nlevels;
ASSERT(xlate->xlt_level == nlevels);
new = NULL;
xlate->xlt_pgtable = domain->dom_pgtable_root;
for (level = nlevels; level > 1; level--, xlate--) {
ASSERT(xlate->xlt_level == level);
idx = xlate->xlt_idx;
pgtable = xlate->xlt_pgtable;
ASSERT(pgtable);
ASSERT(idx <= IMMU_PGTABLE_MAXIDX);
/* speculative alloc */
if (new == NULL) {
new = pgtable_alloc(immu, immu_flags);
if (new == NULL) {
ddi_err(DER_PANIC, rdip, "pgtable alloc err");
}
}
/* Lock the pgtable in READ mode first */
rw_enter(&(pgtable->swpg_rwlock), RW_READER);
rwtype = RW_READER;
again:
hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
ASSERT(pgtable->swpg_next_array);
next = (pgtable->swpg_next_array)[idx];
/*
* check if leafier level already has a pgtable
* if yes, verify
*/
if (next == NULL) {
/* Change to a write lock */
if (rwtype == RW_READER &&
rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) {
rw_exit(&(pgtable->swpg_rwlock));
rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
rwtype = RW_WRITER;
goto again;
}
rwtype = RW_WRITER;
pgtable_zero(immu, new);
next = new;
new = NULL;
(pgtable->swpg_next_array)[idx] = next;
PDE_set_one(immu, hwp, next, rdip, immu_flags);
set = B_TRUE;
rw_downgrade(&(pgtable->swpg_rwlock));
rwtype = RW_READER;
} else {
hw_pdte_t pde = *hwp;
#ifndef BUGGY_DRIVERS
/*
* If buggy driver we already set permission
* READ+WRITE so nothing to do for that case
* XXX Check that read writer perms change before
* actually setting perms. Also need to hold lock
*/
if (immu_flags & IMMU_FLAGS_READ)
PDTE_SET_READ(pde);
if (immu_flags & IMMU_FLAGS_WRITE)
PDTE_SET_WRITE(pde);
#endif
*hwp = pde;
}
ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags)
== B_TRUE);
(xlate - 1)->xlt_pgtable = next;
ASSERT(rwtype == RW_READER);
rw_exit(&(pgtable->swpg_rwlock));
}
if (new) {
pgtable_free(immu, new);
}
return (set);
}
/*
* dvma_map()
* map a contiguous range of DVMA pages
*
* immu: IOMMU unit for which we are generating DVMA cookies
* domain: domain
* sdvma: Starting dvma
* spaddr: Starting paddr
* npages: Number of pages
* rdip: requesting device
* immu_flags: flags
*/
static boolean_t
dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snvpages,
dcookie_t *dcookies, int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
{
uint64_t dvma;
uint64_t n;
int nlevels = immu->immu_dvma_nlevels;
xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
boolean_t pde_set = B_FALSE;
ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS);
ASSERT(sdvma % IMMU_PAGESIZE == 0);
ASSERT(snvpages);
n = snvpages;
dvma = sdvma;
while (n > 0) {
xlate_setup(immu, dvma, xlate, nlevels, rdip);
/* Lookup or allocate PGDIRs and PGTABLEs if necessary */
if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags)
== B_TRUE) {
pde_set = B_TRUE;
}
/* set all matching ptes that fit into this leaf pgtable */
PTE_set_all(immu, domain, &xlate[1], &dvma, &n, dcookies,
dcount, rdip, immu_flags);
}
return (pde_set);
}
/*
* dvma_unmap()
* unmap a range of DVMAs
*
* immu: IOMMU unit state
* domain: domain for requesting device
* ddip: domain-dip
* dvma: starting DVMA
* npages: Number of IMMU pages to be unmapped
* rdip: requesting device
*/
static void
dvma_unmap(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snpages,
dev_info_t *rdip)
{
int nlevels = immu->immu_dvma_nlevels;
xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
uint64_t n;
uint64_t dvma;
ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS);
ASSERT(sdvma != 0);
ASSERT(sdvma % IMMU_PAGESIZE == 0);
ASSERT(snpages);
dvma = sdvma;
n = snpages;
while (n > 0) {
/* setup the xlate array */
xlate_setup(immu, dvma, xlate, nlevels, rdip);
/* just lookup existing pgtables. Should never fail */
PDE_lookup(immu, domain, xlate, nlevels, rdip);
/* clear all matching ptes that fit into this leaf pgtable */
PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip);
}
/* No need to flush IOTLB after unmap */
}
static uint64_t
dvma_alloc(ddi_dma_impl_t *hp, domain_t *domain, uint_t npages)
{
ddi_dma_attr_t *dma_attr;
uint64_t dvma;
size_t xsize, align;
uint64_t minaddr, maxaddr;
ASSERT(domain->dom_maptype != IMMU_MAPTYPE_UNITY);
/* shotcuts */
dma_attr = &(hp->dmai_attr);
/* parameters */
xsize = npages * IMMU_PAGESIZE;
align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
minaddr = dma_attr->dma_attr_addr_lo;
maxaddr = dma_attr->dma_attr_addr_hi + 1;
/* nocross is checked in cookie_update() */
/* handle the rollover cases */
if (maxaddr < dma_attr->dma_attr_addr_hi) {
maxaddr = dma_attr->dma_attr_addr_hi;
}
/*
* allocate from vmem arena.
*/
dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
xsize, align, 0, 0, (void *)(uintptr_t)minaddr,
(void *)(uintptr_t)maxaddr, VM_NOSLEEP);
ASSERT(dvma);
ASSERT(dvma >= minaddr);
ASSERT(dvma + xsize - 1 < maxaddr);
return (dvma);
}
static void
dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages)
{
uint64_t size = npages * IMMU_PAGESIZE;
ASSERT(domain);
ASSERT(domain->dom_did > 0);
ASSERT(dvma);
ASSERT(npages);
if (domain->dom_maptype != IMMU_MAPTYPE_XLATE) {
ASSERT(domain->dom_maptype == IMMU_MAPTYPE_UNITY);
return;
}
vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size);
}
/*ARGSUSED*/
static void
cookie_free(rootnex_dma_t *dma, immu_t *immu, domain_t *domain,
dev_info_t *rdip)
{
int i;
uint64_t dvma;
uint64_t npages;
dvcookie_t *dvcookies = dma->dp_dvcookies;
ASSERT(dma->dp_max_cookies);
ASSERT(dma->dp_max_dcookies);
ASSERT(dma->dp_dvmax < dma->dp_max_cookies);
ASSERT(dma->dp_dmax < dma->dp_max_dcookies);
/*
* we allocated DVMA in a single chunk. Calculate total number
* of pages
*/
for (i = 0, npages = 0; i <= dma->dp_dvmax; i++) {
npages += dvcookies[i].dvck_npages;
}
dvma = dvcookies[0].dvck_dvma;
#ifdef DEBUG
/* Unmap only in DEBUG mode */
dvma_unmap(immu, domain, dvma, npages, rdip);
#endif
dvma_free(domain, dvma, npages);
kmem_free(dma->dp_dvcookies, sizeof (dvcookie_t) * dma->dp_max_cookies);
dma->dp_dvcookies = NULL;
kmem_free(dma->dp_dcookies, sizeof (dcookie_t) * dma->dp_max_dcookies);
dma->dp_dcookies = NULL;
if (dma->dp_need_to_free_cookie == B_TRUE) {
kmem_free(dma->dp_cookies, sizeof (ddi_dma_cookie_t) *
dma->dp_max_cookies);
dma->dp_dcookies = NULL;
dma->dp_need_to_free_cookie = B_FALSE;
}
dma->dp_max_cookies = 0;
dma->dp_max_dcookies = 0;
dma->dp_cookie_size = 0;
dma->dp_dvmax = 0;
dma->dp_dmax = 0;
}
/*
* cookie_alloc()
*/
static int
cookie_alloc(rootnex_dma_t *dma, struct ddi_dma_req *dmareq,
ddi_dma_attr_t *attr, uint_t prealloc)
{
int kmflag;
rootnex_sglinfo_t *sinfo = &(dma->dp_sglinfo);
dvcookie_t *dvcookies = dma->dp_dvcookies;
dcookie_t *dcookies = dma->dp_dcookies;
ddi_dma_cookie_t *cookies = dma->dp_cookies;
uint64_t max_cookies;
uint64_t max_dcookies;
uint64_t cookie_size;
/* we need to allocate new array */
if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
kmflag = KM_SLEEP;
} else {
kmflag = KM_NOSLEEP;
}
/*
* XXX make sure cookies size doen't exceed sinfo->si_max_cookie_size;
*/
/*
* figure out the rough estimate of array size
* At a minimum, each cookie must hold 1 page.
* At a maximum, it cannot exceed dma_attr_sgllen
*/
max_dcookies = dmareq->dmar_object.dmao_size + IMMU_PAGEOFFSET;
max_dcookies /= IMMU_PAGESIZE;
max_dcookies++;
max_cookies = MIN(max_dcookies, attr->dma_attr_sgllen);
/* allocate the dvma cookie array */
dvcookies = kmem_zalloc(sizeof (dvcookie_t) * max_cookies, kmflag);
if (dvcookies == NULL) {
return (DDI_FAILURE);
}
/* allocate the "phys" cookie array */
dcookies = kmem_zalloc(sizeof (dcookie_t) * max_dcookies, kmflag);
if (dcookies == NULL) {
kmem_free(dvcookies, sizeof (dvcookie_t) * max_cookies);
dvcookies = NULL;
return (DDI_FAILURE);
}
/* allocate the "real" cookie array - the one given to users */
cookie_size = sizeof (ddi_dma_cookie_t) * max_cookies;
if (max_cookies > prealloc) {
cookies = kmem_zalloc(cookie_size, kmflag);
if (cookies == NULL) {
kmem_free(dvcookies, sizeof (dvcookie_t) * max_cookies);
kmem_free(dcookies, sizeof (dcookie_t) * max_dcookies);
goto fail;
}
dma->dp_need_to_free_cookie = B_TRUE;
} else {
/* the preallocated buffer fits this size */
cookies = (ddi_dma_cookie_t *)dma->dp_prealloc_buffer;
bzero(cookies, sizeof (ddi_dma_cookie_t)* max_cookies);
dma->dp_need_to_free_cookie = B_FALSE;
}
dma->dp_dvcookies = dvcookies;
dma->dp_dcookies = dcookies;
dma->dp_cookies = cookies;
dma->dp_cookie_size = cookie_size;
dma->dp_max_cookies = max_cookies;
dma->dp_max_dcookies = max_dcookies;
dma->dp_dvmax = 0;
dma->dp_dmax = 0;
sinfo->si_max_pages = dma->dp_max_cookies;
return (DDI_SUCCESS);
fail:
dma->dp_dvcookies = NULL;
dma->dp_dcookies = NULL;
dma->dp_cookies = NULL;
dma->dp_cookie_size = 0;
dma->dp_max_cookies = 0;
dma->dp_max_dcookies = 0;
dma->dp_dvmax = 0;
dma->dp_dmax = 0;
dma->dp_need_to_free_cookie = B_FALSE;
sinfo->si_max_pages = 0;
return (DDI_FAILURE);
}
/*ARGSUSED*/
static void
cookie_update(domain_t *domain, rootnex_dma_t *dma, paddr_t paddr,
int64_t psize, uint64_t maxseg, size_t nocross)
{
dvcookie_t *dvcookies = dma->dp_dvcookies;
dcookie_t *dcookies = dma->dp_dcookies;
ddi_dma_cookie_t *cookies = dma->dp_cookies;
uint64_t dvmax = dma->dp_dvmax;
uint64_t dmax = dma->dp_dmax;
ASSERT(dvmax < dma->dp_max_cookies);
ASSERT(dmax < dma->dp_max_dcookies);
paddr &= IMMU_PAGEMASK;
ASSERT(paddr);
ASSERT(psize);
ASSERT(maxseg);
/*
* check to see if this page would put us
* over the max cookie size.
*/
if (cookies[dvmax].dmac_size + psize > maxseg) {
dvmax++; /* use the next dvcookie */
dmax++; /* also means we use the next dcookie */
ASSERT(dvmax < dma->dp_max_cookies);
ASSERT(dmax < dma->dp_max_dcookies);
}
/*
* check to see if this page would make us larger than
* the nocross boundary. If yes, create a new cookie
* otherwise we will fail later with vmem_xalloc()
* due to overconstrained alloc requests
* nocross == 0 implies no nocross constraint.
*/
if (nocross > 0) {
ASSERT((dvcookies[dvmax].dvck_npages) * IMMU_PAGESIZE
<= nocross);
if ((dvcookies[dvmax].dvck_npages + 1) * IMMU_PAGESIZE
> nocross) {
dvmax++; /* use the next dvcookie */
dmax++; /* also means we use the next dcookie */
ASSERT(dvmax < dma->dp_max_cookies);
ASSERT(dmax < dma->dp_max_dcookies);
}
ASSERT((dvcookies[dvmax].dvck_npages) * IMMU_PAGESIZE
<= nocross);
}
/*
* If the cookie is empty
*/
if (dvcookies[dvmax].dvck_npages == 0) {
ASSERT(cookies[dvmax].dmac_size == 0);
ASSERT(dvcookies[dvmax].dvck_dvma == 0);
ASSERT(dvcookies[dvmax].dvck_npages
== 0);
ASSERT(dcookies[dmax].dck_paddr == 0);
ASSERT(dcookies[dmax].dck_npages == 0);
dvcookies[dvmax].dvck_dvma = 0;
dvcookies[dvmax].dvck_npages = 1;
dcookies[dmax].dck_paddr = paddr;
dcookies[dmax].dck_npages = 1;
cookies[dvmax].dmac_size = psize;
} else {
/* Cookie not empty. Add to it */
cookies[dma->dp_dvmax].dmac_size += psize;
ASSERT(dvcookies[dma->dp_dvmax].dvck_dvma == 0);
dvcookies[dma->dp_dvmax].dvck_npages++;
ASSERT(dcookies[dmax].dck_paddr != 0);
ASSERT(dcookies[dmax].dck_npages != 0);
/* Check if this paddr is contiguous */
if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) {
dcookies[dmax].dck_npages++;
} else {
/* No, we need a new dcookie */
dmax++;
ASSERT(dcookies[dmax].dck_paddr == 0);
ASSERT(dcookies[dmax].dck_npages == 0);
dcookies[dmax].dck_paddr = paddr;
dcookies[dmax].dck_npages = 1;
}
}
dma->dp_dvmax = dvmax;
dma->dp_dmax = dmax;
}
static void
cookie_finalize(ddi_dma_impl_t *hp, immu_t *immu, domain_t *domain,
dev_info_t *rdip, immu_flags_t immu_flags)
{
int i;
rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private;
dvcookie_t *dvcookies = dma->dp_dvcookies;
dcookie_t *dcookies = dma->dp_dcookies;
ddi_dma_cookie_t *cookies = dma->dp_cookies;
uint64_t npages;
uint64_t dvma;
boolean_t pde_set;
/* First calculate the total number of pages required */
for (i = 0, npages = 0; i <= dma->dp_dvmax; i++) {
npages += dvcookies[i].dvck_npages;
}
/* Now allocate dvma */
dvma = dvma_alloc(hp, domain, npages);
/* Now map the dvma */
pde_set = dvma_map(immu, domain, dvma, npages, dcookies,
dma->dp_dmax + 1, rdip, immu_flags);
/* Invalidate the IOTLB */
immu_regs_iotlb_flush(immu, domain->dom_did, dvma, npages,
pde_set == B_TRUE ? TLB_IVA_WHOLE : TLB_IVA_LEAF, IOTLB_PSI);
/* Now setup dvcookies and real cookie addresses */
for (i = 0; i <= dma->dp_dvmax; i++) {
dvcookies[i].dvck_dvma = dvma;
cookies[i].dmac_laddress = dvma;
ASSERT(cookies[i].dmac_size != 0);
cookies[i].dmac_type = 0;
dvma += (dvcookies[i].dvck_npages * IMMU_PAGESIZE);
}
#ifdef TEST
immu_regs_iotlb_flush(immu, domain->dom_did, 0, 0, 0, IOTLB_DSI);
#endif
}
/*
* cookie_create()
*/
static int
cookie_create(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
ddi_dma_attr_t *a, immu_t *immu, domain_t *domain, dev_info_t *rdip,
uint_t prealloc_count, immu_flags_t immu_flags)
{
ddi_dma_atyp_t buftype;
uint64_t offset;
page_t **pparray;
uint64_t paddr;
uint_t psize;
uint_t size;
uint64_t maxseg;
caddr_t vaddr;
uint_t pcnt;
page_t *page;
rootnex_sglinfo_t *sglinfo;
ddi_dma_obj_t *dmar_object;
rootnex_dma_t *dma;
size_t nocross;
dma = (rootnex_dma_t *)hp->dmai_private;
sglinfo = &(dma->dp_sglinfo);
dmar_object = &(dmareq->dmar_object);
maxseg = sglinfo->si_max_cookie_size;
pparray = dmar_object->dmao_obj.virt_obj.v_priv;
vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
buftype = dmar_object->dmao_type;
size = dmar_object->dmao_size;
nocross = (size_t)(a->dma_attr_seg + 1);
/*
* Allocate cookie, dvcookie and dcookie
*/
if (cookie_alloc(dma, dmareq, a, prealloc_count) != DDI_SUCCESS) {
return (DDI_FAILURE);
}
hp->dmai_cookie = dma->dp_cookies;
pcnt = 0;
/* retrieve paddr, psize, offset from dmareq */
if (buftype == DMA_OTYP_PAGES) {
page = dmar_object->dmao_obj.pp_obj.pp_pp;
ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page));
offset = dmar_object->dmao_obj.pp_obj.pp_offset &
MMU_PAGEOFFSET;
paddr = pfn_to_pa(page->p_pagenum) + offset;
psize = MIN((MMU_PAGESIZE - offset), size);
sglinfo->si_asp = NULL;
page = page->p_next;
} else {
ASSERT((buftype == DMA_OTYP_VADDR) ||
(buftype == DMA_OTYP_BUFVADDR));
sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as;
if (sglinfo->si_asp == NULL) {
sglinfo->si_asp = &kas;
}
offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
if (pparray != NULL) {
ASSERT(!PP_ISFREE(pparray[pcnt]));
paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset;
psize = MIN((MMU_PAGESIZE - offset), size);
pcnt++;
} else {
paddr = pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat,
vaddr)) + offset;
psize = MIN(size, (MMU_PAGESIZE - offset));
vaddr += psize;
}
}
/* save the iommu page offset */
sglinfo->si_buf_offset = offset & IMMU_PAGEOFFSET;
/*
* setup dvcookie and dcookie for [paddr, paddr+psize)
*/
cookie_update(domain, dma, paddr, psize, maxseg, nocross);
size -= psize;
while (size > 0) {
/* get the size for this page (i.e. partial or full page) */
psize = MIN(size, MMU_PAGESIZE);
if (buftype == DMA_OTYP_PAGES) {
/* get the paddr from the page_t */
ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page));
paddr = pfn_to_pa(page->p_pagenum);
page = page->p_next;
} else if (pparray != NULL) {
/* index into the array of page_t's to get the paddr */
ASSERT(!PP_ISFREE(pparray[pcnt]));
paddr = pfn_to_pa(pparray[pcnt]->p_pagenum);
pcnt++;
} else {
/* call into the VM to get the paddr */
paddr = pfn_to_pa(hat_getpfnum
(sglinfo->si_asp->a_hat, vaddr));
vaddr += psize;
}
/*
* set dvcookie and dcookie for [paddr, paddr+psize)
*/
cookie_update(domain, dma, paddr, psize, maxseg, nocross);
size -= psize;
}
cookie_finalize(hp, immu, domain, rdip, immu_flags);
/* take account in the offset into the first page */
dma->dp_cookies[0].dmac_laddress += sglinfo->si_buf_offset;
/* save away how many cookies we have */
sglinfo->si_sgl_size = dma->dp_dvmax + 1;
return (DDI_SUCCESS);
}
/* ############################# Functions exported ######################## */
/*
* setup the DVMA subsystem
* this code runs only for the first IOMMU unit
*/
void
immu_dvma_setup(list_t *listp)
{
immu_t *immu;
uint_t kval;
size_t nchains;
/* locks */
mutex_init(&immu_domain_lock, NULL, MUTEX_DEFAULT, NULL);
/* Create lists */
list_create(&immu_unity_domain_list, sizeof (domain_t),
offsetof(domain_t, dom_maptype_node));
list_create(&immu_xlate_domain_list, sizeof (domain_t),
offsetof(domain_t, dom_maptype_node));
/* Setup BDF domain hash */
nchains = 0xff;
kval = mod_hash_iddata_gen(nchains);
bdf_domain_hash = mod_hash_create_extended("BDF-DOMAIN_HASH",
nchains, mod_hash_null_keydtor, mod_hash_null_valdtor,
mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp,
KM_NOSLEEP);
ASSERT(bdf_domain_hash);
immu = list_head(listp);
for (; immu; immu = list_next(listp, immu)) {
create_unity_domain(immu);
did_init(immu);
context_init(immu);
immu->immu_dvma_setup = B_TRUE;
}
}
/*
* Startup up one DVMA unit
*/
void
immu_dvma_startup(immu_t *immu)
{
ASSERT(immu);
ASSERT(immu->immu_dvma_running == B_FALSE);
if (immu_gfxdvma_enable == B_FALSE &&
immu->immu_dvma_gfx_only == B_TRUE) {
return;
}
/*
* DVMA will start once IOMMU is "running"
*/
ASSERT(immu->immu_dvma_running == B_FALSE);
immu->immu_dvma_running = B_TRUE;
}
/*
* immu_dvma_physmem_update()
* called when the installed memory on a
* system increases, to expand domain DVMA
* for domains with UNITY mapping
*/
void
immu_dvma_physmem_update(uint64_t addr, uint64_t size)
{
uint64_t start;
uint64_t npages;
int dcount;
dcookie_t dcookies[1] = {0};
domain_t *domain;
/*
* Just walk the system-wide list of domains with
* UNITY mapping. Both the list of *all* domains
* and *UNITY* domains is protected by the same
* single lock
*/
mutex_enter(&immu_domain_lock);
domain = list_head(&immu_unity_domain_list);
for (; domain; domain = list_next(&immu_unity_domain_list, domain)) {
/* There is no vmem_arena for unity domains. Just map it */
ddi_err(DER_LOG, NULL, "IMMU: unity-domain: Adding map "
"[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size);
start = IMMU_ROUNDOWN(addr);
npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1;
dcookies[0].dck_paddr = start;
dcookies[0].dck_npages = npages;
dcount = 1;
(void) dvma_map(domain->dom_immu, domain, start, npages,
dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
}
mutex_exit(&immu_domain_lock);
}
int
immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
uint_t prealloc_count, dev_info_t *rdip, immu_flags_t immu_flags)
{
ddi_dma_attr_t *attr;
dev_info_t *ddip;
domain_t *domain;
immu_t *immu;
dcookie_t dcookies[1] = {0};
int dcount = 0;
boolean_t pde_set = B_TRUE;
int r = DDI_FAILURE;
ASSERT(immu_enable == B_TRUE);
ASSERT(immu_running == B_TRUE || !(immu_flags & IMMU_FLAGS_DMAHDL));
ASSERT(hp || !(immu_flags & IMMU_FLAGS_DMAHDL));
/*
* Intel IOMMU will only be turned on if IOMMU
* page size is a multiple of IOMMU page size
*/
/*LINTED*/
ASSERT(MMU_PAGESIZE % IMMU_PAGESIZE == 0);
/* Can only do DVMA if dip is attached */
if (rdip == NULL) {
ddi_err(DER_PANIC, rdip, "DVMA map: No device specified");
/*NOTREACHED*/
}
immu_flags |= dma_to_immu_flags(dmareq);
immu = immu_dvma_get_immu(rdip, immu_flags);
if (immu == NULL) {
/*
* possible that there is no IOMMU unit for this device
* - BIOS bugs are one example.
*/
ddi_err(DER_WARN, rdip, "No IMMU unit found for device");
return (DDI_DMA_NORESOURCES);
}
/*
* redirect isa devices attached under lpc to lpc dip
*/
if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
rdip = get_lpc_devinfo(immu, rdip, immu_flags);
if (rdip == NULL) {
ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
/*NOTREACHED*/
}
}
/* Reset immu, as redirection can change IMMU */
immu = NULL;
/*
* for gart, redirect to the real graphic devinfo
*/
if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
rdip = get_gfx_devinfo(rdip);
if (rdip == NULL) {
ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
/*NOTREACHED*/
}
}
/*
* Setup DVMA domain for the device. This does
* work only the first time we do DVMA for a
* device.
*/
ddip = NULL;
domain = device_domain(rdip, &ddip, immu_flags);
if (domain == NULL) {
ASSERT(ddip == NULL);
ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device");
return (DDI_DMA_NORESOURCES);
}
/*
* If a domain is found, we must also have a domain dip
* which is the topmost ancestor dip of rdip that shares
* the same domain with rdip.
*/
if (domain->dom_did == 0 || ddip == NULL) {
ddi_err(DER_MODE, rdip, "domain did 0(%d) or ddip NULL(%p)",
domain->dom_did, ddip);
return (DDI_DMA_NORESOURCES);
}
immu = domain->dom_immu;
ASSERT(immu);
if (domain->dom_did == IMMU_UNITY_DID) {
ASSERT(domain == immu->immu_unity_domain);
/* mapping already done. Let rootnex create cookies */
r = DDI_DMA_USE_PHYSICAL;
} else if (immu_flags & IMMU_FLAGS_DMAHDL) {
/* if we have a DMA handle, the IOMMUs must be running */
ASSERT(immu->immu_regs_running == B_TRUE);
ASSERT(immu->immu_dvma_running == B_TRUE);
attr = &hp->dmai_attr;
if (attr == NULL) {
ddi_err(DER_PANIC, rdip,
"DMA handle (%p): NULL attr", hp);
/*NOTREACHED*/
}
if (cookie_create(hp, dmareq, attr, immu, domain, rdip,
prealloc_count, immu_flags) != DDI_SUCCESS) {
ddi_err(DER_MODE, rdip, "dvcookie_alloc: failed");
return (DDI_DMA_NORESOURCES);
}
r = DDI_DMA_MAPPED;
} else if (immu_flags & IMMU_FLAGS_MEMRNG) {
dcookies[0].dck_paddr = mrng->mrng_start;
dcookies[0].dck_npages = mrng->mrng_npages;
dcount = 1;
pde_set = dvma_map(immu, domain, mrng->mrng_start,
mrng->mrng_npages, dcookies, dcount, rdip, immu_flags);
immu_regs_iotlb_flush(immu, domain->dom_did, mrng->mrng_start,
mrng->mrng_npages, pde_set == B_TRUE ?
TLB_IVA_WHOLE : TLB_IVA_LEAF, IOTLB_PSI);
r = DDI_DMA_MAPPED;
} else {
ddi_err(DER_PANIC, rdip, "invalid flags for immu_dvma_map()");
/*NOTREACHED*/
}
/*
* Update the root and context entries
*/
if (immu_context_update(immu, domain, ddip, rdip, immu_flags)
!= DDI_SUCCESS) {
ddi_err(DER_MODE, rdip, "DVMA map: context update failed");
return (DDI_DMA_NORESOURCES);
}
immu_regs_wbf_flush(immu);
return (r);
}
int
immu_dvma_unmap(ddi_dma_impl_t *hp, dev_info_t *rdip)
{
ddi_dma_attr_t *attr;
rootnex_dma_t *dma;
domain_t *domain;
immu_t *immu;
dev_info_t *ddip;
immu_flags_t immu_flags;
ASSERT(immu_enable == B_TRUE);
ASSERT(immu_running == B_TRUE);
ASSERT(hp);
/*
* Intel IOMMU will only be turned on if IOMMU
* page size is same as MMU page size
*/
/*LINTED*/
ASSERT(MMU_PAGESIZE == IMMU_PAGESIZE);
/* rdip need not be attached */
if (rdip == NULL) {
ddi_err(DER_PANIC, rdip, "DVMA unmap: No device specified");
return (DDI_DMA_NORESOURCES);
}
/*
* Get the device domain, this should always
* succeed since there had to be a domain to
* setup DVMA.
*/
dma = (rootnex_dma_t *)hp->dmai_private;
attr = &hp->dmai_attr;
if (attr == NULL) {
ddi_err(DER_PANIC, rdip, "DMA handle (%p) has NULL attr", hp);
/*NOTREACHED*/
}
immu_flags = dma->dp_sleep_flags;
immu = immu_dvma_get_immu(rdip, immu_flags);
if (immu == NULL) {
/*
* possible that there is no IOMMU unit for this device
* - BIOS bugs are one example.
*/
ddi_err(DER_WARN, rdip, "No IMMU unit found for device");
return (DDI_DMA_NORESOURCES);
}
/*
* redirect isa devices attached under lpc to lpc dip
*/
if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
rdip = get_lpc_devinfo(immu, rdip, immu_flags);
if (rdip == NULL) {
ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
/*NOTREACHED*/
}
}
/* Reset immu, as redirection can change IMMU */
immu = NULL;
/*
* for gart, redirect to the real graphic devinfo
*/
if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
rdip = get_gfx_devinfo(rdip);
if (rdip == NULL) {
ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
/*NOTREACHED*/
}
}
ddip = NULL;
domain = device_domain(rdip, &ddip, immu_flags);
if (domain == NULL || domain->dom_did == 0 || ddip == NULL) {
ddi_err(DER_MODE, rdip, "Attempt to unmap DVMA for "
"a device without domain or with an uninitialized "
"domain");
return (DDI_DMA_NORESOURCES);
}
/*
* immu must be set in the domain.
*/
immu = domain->dom_immu;
ASSERT(immu);
if (domain->dom_did == IMMU_UNITY_DID) {
ASSERT(domain == immu->immu_unity_domain);
/*
* domain is unity, nothing to do here, let the rootnex
* code free the cookies.
*/
return (DDI_DMA_USE_PHYSICAL);
}
dma = hp->dmai_private;
if (dma == NULL) {
ddi_err(DER_PANIC, rdip, "DVMA unmap: DMA handle (%p) has "
"no private dma structure", hp);
/*NOTREACHED*/
}
cookie_free(dma, immu, domain, rdip);
/* No invalidation needed for unmap */
immu_regs_wbf_flush(immu);
return (DDI_SUCCESS);
}
immu_devi_t *
immu_devi_get(dev_info_t *rdip)
{
immu_devi_t *immu_devi;
volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu);
/* Just want atomic reads. No need for lock */
immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr,
0);
return (immu_devi);
}