intel_iommu.c revision 7e301000bf06f54502582dd72d606b27db229366
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Portions Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2009, Intel Corporation.
* All rights reserved.
*/
/*
* Intel IOMMU implementation
*/
#include <sys/pci_impl.h>
#include <sys/sysmacros.h>
#include <sys/ddidmareq.h>
#include <sys/ddi_impldefs.h>
#include <sys/smp_impldefs.h>
#include <sys/archsystm.h>
#include <sys/x86_archext.h>
#include <sys/bootconf.h>
#include <sys/bootinfo.h>
#include <sys/intel_iommu.h>
#include <sys/iommulib.h>
#include <sys/pci_cfgspace.h>
/*
* Macros based on PCI spec
*/
/*
* Are we on a Mobile 4 Series Chipset
*/
static int mobile4_cs = 0;
/*
* Activate usb workaround for some Mobile 4 Series Chipset based platforms
* On Toshiba laptops, its observed that usb devices appear to
* read physical page 0. If we enable RW access via iommu, system doesnt
* hang, otherwise the system hangs when the last include-all engine is
* enabled for translation.
* This happens only when enabling legacy emulation mode.
*/
static int usb_page0_quirk = 1;
static int usb_fullpa_quirk = 0;
static int usb_rmrr_quirk = 1;
/*
* internal variables
* iommu_state - the list of iommu structures
* page_num - the count of pages for iommu page tables
*/
static list_t iommu_states;
/*
* record some frequently used dips
*/
/*
* A single element in the BDF based cache of private structs
*/
typedef struct bdf_private_entry {
int bpe_seg;
int bpe_bus;
int bpe_devfcn;
struct bdf_private_entry *bpe_next;
/*
* Head of the BDF based cache of private structs
*/
typedef struct bdf_private_cache {
static bdf_private_cache_t bdf_private_cache;
/*
* dvma cache related variables
*/
/* ioapic info for interrupt remapping */
/*
* this is used when there is a dedicated drhd for the
* gfx
*/
int gfx_drhd_disable = 0;
/*
* switch to disable dmar remapping unit, even the initiation work has
* been finished
*/
int dmar_drhd_disable = 0;
/*
*/
int qinv_disable = 0;
int intrr_disable = 0;
static char *dmar_fault_reason[] = {
"Reserved",
"The present field in root-entry is Clear",
"The present field in context-entry is Clear",
"Hardware detected invalid programming of a context-entry",
"The DMA request attempted to access an address beyond max support",
"The Write field in a page-table entry is Clear when DMA write",
"The Read field in a page-table entry is Clear when DMA read",
"Access the next level page table resulted in error",
"Access the root-entry table resulted in error",
"Access the context-entry table resulted in error",
"Reserved field not initialized to zero in a present root-entry",
"Reserved field not initialized to zero in a present context-entry",
"Reserved field not initialized to zero in a present page-table entry",
"DMA blocked due to the Translation Type field in context-entry",
"Incorrect fault event reason number"
};
#define DMAR_MAX_REASON_NUMBER (14)
/*
* Check if the device has mobile 4 chipset quirk
*/
static int
{
"vendor-id", -1);
"device-id", -1);
mobile4_cs = 1;
return (DDI_WALK_TERMINATE);
} else {
return (DDI_WALK_CONTINUE);
}
}
static void
check_hwquirk(void)
{
int count;
/*
* walk through the entire device tree
*/
}
/*
* QS field of Invalidation Queue Address Register
* the size of invalidation queue is 1 << (qinv_iqa_qs + 8)
*/
/*
* the invalidate desctiptor type of queued invalidation interface
*/
static char *qinv_dsc_type[] = {
"Reserved",
"Context Cache Invalidate Descriptor",
"IOTLB Invalidate Descriptor",
"Device-IOTLB Invalidate Descriptor",
"Interrupt Entry Cache Invalidate Descriptor",
"Invalidation Wait Descriptor",
"Incorrect queue invalidation type"
};
#define QINV_MAX_DSC_TYPE (6)
/*
* S field of the Interrupt Remapping Table Address Register
* the size of the interrupt remapping table is 1 << (intrr_irta_s + 1)
*/
/*
* If true, arrange to suppress broadcast EOI by setting edge-triggered mode
* even for level-triggered interrupts in the interrupt-remapping engine.
* If false, broadcast EOI can still be suppressed if the CPU supports the
* APIC_SVR_SUPPRESS_BROADCAST_EOI bit. In both cases, the IOAPIC is still
* programmed with the correct trigger mode, and pcplusmp must send an EOI
* to the IOAPIC by writing to the IOAPIC's EOI register to make up for the
* missing broadcast EOI.
*/
static int intrr_suppress_brdcst_eoi = 0;
/*
* whether verify the source id of interrupt request
*/
static int intrr_enable_sid_verify = 0;
/* the fault reason for interrupt remapping */
static char *intrr_fault_reason[] = {
"reserved field set in IRTE",
"interrupt_index exceed the intr-remap table size",
"present field in IRTE is clear",
"hardware access intr-remap table address resulted in error",
"reserved field set in IRTE, inlcude various conditional",
"hardware blocked an interrupt request in Compatibility format",
"remappable interrupt request blocked due to verification failure"
};
#define INTRR_MAX_REASON_NUMBER (6)
/*
* the queued invalidation interface functions
*/
/*LINTED*/
/*LINTED*/
/* interrupt remapping related functions */
static int intrr_tbl_alloc_entry(intr_remap_tbl_state_t *);
static void get_ioapic_iommu_info(void);
static void intr_remap_get_iommu(apic_irq_t *);
static void intr_remap_get_sid(apic_irq_t *);
static int intr_remap_init(int);
static void intr_remap_enable(int);
static void intr_remap_alloc_entry(apic_irq_t *);
static void intr_remap_map_entry(apic_irq_t *, void *);
static void intr_remap_free_entry(apic_irq_t *);
static struct apic_intrr_ops intr_remap_ops = {
};
static int intrr_apic_mode = LOCAL_APIC;
/*
* cpu_clflush()
* flush the cpu cache line
*/
static void
{
uint_t i;
for (i = 0; i < size; i += x86_clflush_size) {
clflush_insn(addr+i);
}
mfence_insn();
}
/*
* iommu_page_init()
* do some init work for the iommu page allocator
*/
static void
iommu_page_init(void)
{
page_num = 0;
}
/*
* iommu_get_page()
* get a 4k iommu page, and zero out it
*/
static paddr_t
{
page_num++;
}
/*
* iommu_free_page()
* free the iommu page allocated with iommu_get_page
*/
static void
{
page_num--;
}
((iommu)->iu_reg_handle, \
((iommu)->iu_reg_handle, \
/*
* calculate_agaw()
* calculate agaw from gaw
*/
static int
calculate_agaw(int gaw)
{
int r, agaw;
if (r == 0)
else
if (agaw > 64)
agaw = 64;
return (agaw);
}
/*
* destroy_iommu_state()
* destory an iommu state
*/
static void
{
if (iommu->iu_inv_queue) {
}
if (iommu->iu_intr_remap_tbl) {
}
}
/*
* iommu_update_stats - update iommu private kstat counters
*
* This routine will dump and reset the iommu's internal
* statistics counters. The current stats dump values will
* be sent to the kernel status area.
*/
static int
{
const char *state;
if (rw == KSTAT_WRITE)
return (EACCES);
"enabled" : "disabled";
return (0);
}
/*
* iommu_init_stats - initialize kstat data structures
*
* This routine will create and initialize the iommu private
* statistics counters.
*/
int
{
/*
* Create and init kstat
*/
"misc", KSTAT_TYPE_NAMED,
sizeof (iommu_kstat_t) / sizeof (kstat_named_t), 0);
"Could not create kernel statistics for %s",
return (DDI_FAILURE);
}
/*
* Initialize all the statistics
*/
/*
* Function to provide kernel stat update on demand
*/
/*
* Pointer into provider's raw statistics
*/
/*
* Add kstat to systems kstat chain
*/
return (DDI_SUCCESS);
}
/*
* iommu_intr_handler()
* the fault event handler for a single drhd
*/
static int
{
int index, fault_reg_offset;
int max_fault_index;
int any_fault = 0;
/* read the fault status */
/* check if we have a pending fault for this IOMMU */
if (!(status & IOMMU_FAULT_STS_PPF)) {
goto no_primary_faults;
}
/*
* handle all primary pending faults
*/
any_fault = 1;
while (1) {
/* read the higher 64bits */
/* check if pending fault */
if (!IOMMU_FRR_GET_F(val))
break;
/* get the fault reason, fault type and sid */
/* read the first 64bits */
/* clear the fault */
/* report the fault info */
if (fault_reason < 0x20) {
/* dmar-remapping fault */
"%s generated a fault event when translating "
"DMA %s\n"
"the reason is:\n\t %s",
} else if (fault_reason < 0x27) {
/* intr-remapping fault */
"%s generated a fault event when translating "
"interrupt request\n"
"the reason is:\n\t %s",
iidx,
}
index++;
if (index > max_fault_index)
index = 0;
}
/*
* handle queued invalidation interface errors
*/
if (status & IOMMU_FAULT_STS_IQE) {
+ (head * QINV_ENTRY_SIZE));
/* report the error */
"%s generated a fault when fetching a descriptor from the\n"
"\tinvalidation queue, or detects that the fetched\n"
"\tdescriptor is invalid. The head register is "
"\tthe type is %s\n",
}
/*
* Hardware received an unexpected or invalid Device-IOTLB
* invalidation completion
*/
if (status & IOMMU_FAULT_STS_ICE) {
"Hardware received an unexpected or invalid "
"Device-IOTLB invalidation completion.\n");
}
/*
* Hardware detected a Device-IOTLB invalidation
* completion time-out
*/
if (status & IOMMU_FAULT_STS_ITE) {
"Hardware detected a Device-IOTLB invalidation "
"completion time-out.\n");
}
/* clear the fault */
}
/*
* Function to identify a display device from the PCI class code
*/
static int
{
static uint_t disp_classes[] = {
0x000100,
0x030000,
0x030001
};
for (i = 0; i < nclasses; i++) {
if (classcode == disp_classes[i])
return (1);
}
return (0);
}
/*
*/
static int
{
int is_pciex = 0;
*is_pci_bridge = 0;
if (!(status & PCI_STAT_CAP))
return (0);
if (cap == PCI_CAP_ID_PCI_E) {
/*
* See section 7.8.2 of PCI-Express Base Spec v1.0a
* PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the
* device is a PCIe2PCI bridge
*/
((status & PCIE_PCIECAP_DEV_TYPE_MASK) ==
PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? 1 : 0;
is_pciex = 1;
}
}
return (is_pciex);
}
/*
* Allocate a private structure and initialize it
*/
static iommu_private_t *
{
int pciex = 0;
int is_pci_bridge = 0;
/* No cached private struct. Create one */
/* record the bridge */
(subcl == PCI_BRIDGE_PCI));
if (private->idp_is_bridge) {
if (pciex && is_pci_bridge)
else if (pciex)
else
}
/* record the special devices */
(subcl == PCI_BRIDGE_ISA));
return (private);
}
/*
* Set the private struct in the private field of a devinfo node
*/
static int
{
seg = 0; /* NOTE: Currently seg always = 0 */
/* probably not PCI device */
return (DDI_FAILURE);
}
/*
* We always need a private structure, whether it was cached
* or not previously, since a hotplug may change the type of
* device - for example we may have had a bridge here before,
* and now we could have a leaf device
*/
/* assume new cache entry needed */
break;
}
}
if (bpe) {
/* extry exists, new not needed */
/* domain may be NULL */
} else {
}
return (DDI_SUCCESS);
}
/*
* intel_iommu_init()
* the interface to setup interrupt handlers and init the DMAR units
*/
static void
intel_iommu_init(void)
{
char intr_name[64];
uint32_t iommu_instance = 0;
msi_addr = (MSI_ADDR_HDR |
msi_data =
vect);
"iommu intr%d", iommu_instance++);
(void) iommu_intr_handler(iommu);
if (intrr_apic_mode == LOCAL_X2APIC) {
} else {
}
}
/*
* enable dma remapping
*/
if (!dmar_drhd_disable) {
if (gfx_drhd_disable &&
continue;
}
}
}
/*
* wait max 60s for the hardware completion
*/
#define IOMMU_WAIT_TIME 60000000
{ \
while (1) { \
ntick = ddi_get_lbolt(); \
if (completion) {\
break; \
} \
"iommu wait completion time out\n"); \
} else { \
iommu_cpu_nop();\
}\
}\
}
/*
* dmar_flush_write_buffer()
* flush the write buffer
*/
static void
{
/* record the statistics */
}
/*
* dmar_flush_iotlb_common()
* flush the iotlb cache
*/
static void
{
/*
*/
}
}
/*
* if the hardward doesn't support page selective invalidation, we
* will use domain type. Otherwise, use global type
*/
switch (type) {
case TLB_INV_G_PAGE:
addr & IOMMU_PAGE_OFFSET) {
goto ignore_psi;
}
break;
case TLB_INV_G_DOMAIN:
break;
case TLB_INV_G_GLOBAL:
break;
default:
return;
}
/*
* do the actual flush
*/
/* verify there is no pending command */
if (iva)
/*
* check the result and record the statistics
*/
switch (TLB_INV_GET_IAIG(status)) {
/* global */
case 1:
break;
/* domain */
case 2:
break;
/* psi */
case 3:
break;
default:
break;
}
}
/*
* dmar_flush_iotlb_psi()
* register based iotlb psi invalidation
*/
static void
{
uint_t used_count = 0;
/* choose page specified invalidation */
/* MAMV is valid only if PSI is set */
while (count != 0) {
/* First calculate alignment of DVMA */
;
/* truncate count to the nearest power of 2 */
;
}
if (align >= used_count) {
} else {
/* align < used_count */
used_count = align;
;
}
count -= used_count;
}
/* choose domain invalidation */
} else {
0, 0, TLB_INV_G_DOMAIN);
}
}
/*
* dmar_flush_iotlb_dsi()
* flush dsi iotlb
*/
static void
{
}
/*
* dmar_flush_iotlb_glb()
* flush global iotbl
*/
static void
{
}
/*
* dmar_flush_context_cache()
* flush the context cache
*/
static void
{
/*
* define the command
*/
switch (type) {
case CTT_INV_G_DEVICE:
break;
case CTT_INV_G_DOMAIN:
break;
case CTT_INV_G_GLOBAL:
break;
default:
return;
}
/* verify there is no pending command */
/* record the context cache statistics */
}
/*
* dmar_flush_context_fsi()
* function based context cache flush
*/
static void
{
}
/*
* dmar_flush_context_dsi()
* domain based context cache flush
*/
static void
{
}
/*
* dmar_flush_context_gbl()
* flush global context cache
*/
static void
{
}
/*
* dmar_set_root_entry_table()
* set root entry table
*/
static void
{
}
/*
* dmar_enable_unit()
* enable the dmar unit
*/
static void
{
}
/*
* iommu_bringup_unit()
* the processes to bring up a dmar unit
*/
static void
{
/*
* flush the iommu write buffer
*/
/*
* set root entry table
*/
/*
* flush the context cache
*/
/*
* flush the iotlb cache
*/
/*
* at last enable the unit
*/
/* enable queued invalidation */
if (iommu->iu_inv_queue)
}
/*
* iommu_dvma_cache_get()
* get a dvma from the cache
*/
static uint64_t
{
if (index >= DVMA_CACHE_HEAD_CNT)
return (0);
((nocross == 0) ||
< (nocross - 1)))) {
break;
}
}
if (cache_node) {
return (ioaddr);
}
return (0);
}
/*
* iommu_dvma_cache_put()
* put a dvma to the cache after use
*/
static void
{
/* out of cache range */
if (index >= DVMA_CACHE_HEAD_CNT) {
return;
}
/* get a node block */
if (cache_node) {
}
/* no cache, alloc one */
if (cache_node == NULL) {
}
/* initialize this node */
/* insert into the free list */
/* shrink the cache list */
}
if (shrink) {
}
}
/*
* iommu_dvma_cache_flush()
* flush the dvma caches when vmem_xalloc() failed
*/
static void
{
ddi_node_name(dip));
while (cache_node) {
}
}
}
/*
* get_dvma_cookie_array()
* get a dvma cookie array from the cache or allocate
*/
static iommu_dvma_cookie_t *
{
if (array_size > MAX_COOKIE_CACHE_SIZE) {
KM_SLEEP));
}
/* LINTED E_EQUALITY_NOT_ASSIGNMENT */
cache_head->dch_count--;
}
if (cookie) {
return (cookie);
} else {
KM_SLEEP));
}
}
/*
* put_dvma_cookie_array()
* put a dvma cookie array to the cache or free
*/
static void
{
if (array_size > MAX_COOKIE_CACHE_SIZE) {
return;
}
cache_head->dch_count++;
}
/*
* dmar_reg_plant_wait()
* the plant wait operation for register based cache invalidation
*/
static void
{
/* get a node */
if (node) {
}
/* no cache, alloc one */
}
/* initialize this node */
/* insert into the pend list */
head->ich_pend_count++;
}
/*
* dmar_release_dvma_cookie()
* release the dvma cookie
*/
static void
{
uint_t i;
/* free dvma */
for (i = 0; i < count; i++) {
}
/* free the cookie array */
}
/*
* dmar_reg_reap_wait()
* the reap wait operation for register based cache invalidation
*/
static void
{
if (node) {
head->ich_pend_count--;
}
if (node) {
/* put the node into the node cache */
}
}
/*
* dmar_init_ops()
* init dmar ops
*/
static void
{
/* initialize the dmar operations */
/* cpu clflush */
if (iommu->iu_coherency) {
} else {
}
/* Check for Mobile 4 Series Chipset */
"Mobile 4 Series chipset present, activating quirks\n");
}
/* write buffer */
} else {
}
/* cache related functions */
}
/*
* create_iommu_state()
* alloc and setup the iommu state
*/
static int
{
int bitnum;
int ret;
static ddi_device_acc_attr_t ioattr = {
};
/*
* map the register address space
*/
&(iommu->iu_reg_handle));
if (ret != DDI_SUCCESS) {
return (DDI_FAILURE);
}
(void *)ipltospl(IOMMU_INTR_IPL));
/*
* get the register value
*/
/*
* if the hardware access is non-coherent, we need clflush
*/
} else {
if (!(x86_feature & X86_CLFSH)) {
"missing clflush functionality");
return (DDI_FAILURE);
}
}
/*
* retrieve the maximum number of domains
*/
/*
* setup the domain id allocator
* domain id 0 is reserved by the architecture
*/
/*
* calculate the agaw
*/
while (bitnum < 5) {
break;
else
bitnum++;
}
if (bitnum >= 5) {
/*NOTREACHED*/
return (DDI_FAILURE);
} else {
}
/*
* the iommu is orginally disabled
*/
iommu->iu_enabled = 0;
iommu->iu_global_cmd_reg = 0;
/*
* init kstat
*/
(void) iommu_init_stats(iommu);
/*
* init dmar ops
*/
/*
* alloc root entry table, this should put after init ops
*/
/*
* init queued invalidation interface
*/
"%s init queued invalidation interface failed\n",
}
}
/*
* init intr remapping table state pointer
*/
/*
* initialize the iotlb pending list and cache
*/
MUTEX_DRIVER, NULL);
sizeof (iotlb_pend_node_t),
MUTEX_DRIVER, NULL);
sizeof (iotlb_pend_node_t),
/*
* insert this iommu into the list
*/
/*
* report this unit
*/
return (DDI_SUCCESS);
}
/*
* match_dip_sbdf()
* walk function for get_dip_from_info()
*/
static int
{
return (DDI_WALK_CONTINUE);
}
return (DDI_WALK_TERMINATE);
}
return (DDI_WALK_CONTINUE);
}
/*
* get_dip_from_info()
*/
static int
{
int count;
return (DDI_SUCCESS);
else
return (DDI_FAILURE);
}
/*
* iommu_get_pci_top_bridge()
* get the top level bridge for a pci device
*/
static dev_info_t *
{
continue;
}
return (tmp);
}
/*
* domain_vmem_init()
* initiate the domain vmem
*/
static void
{
char vmem_name[64];
static uint_t vmem_instance = 0;
void *vmem_ret;
"domain_vmem_%d", vmem_instance++);
mp = phys_install;
/*
* Skip page 0: vmem_create wont like it for obvious
* reasons.
*/
} else {
}
IOMMU_PAGE_SIZE, /* quantum */
NULL, /* afunc */
NULL, /* ffunc */
NULL, /* source */
0, /* qcache_max */
VM_SLEEP);
}
while (mp) {
if (!vmem_ret)
}
}
/*
* iommu_domain_init()
* initiate a domain
*/
static int
{
uint_t i;
/*
* allocate the domain id
*/
}
/*
* record the domain statistics
*/
/*
* create vmem map
*/
/*
* create the first level page table
*/
KM_SLEEP);
/*
* init the CPU available page tables
*/
/*
* init the dvma cache
*/
for (i = 0; i < DVMA_CACHE_HEAD_CNT; i++) {
/* init the free list */
sizeof (dvma_cache_node_t),
/* init the memory cache list */
sizeof (dvma_cache_node_t),
}
return (DDI_SUCCESS);
}
/*
* Get first ancestor with a non-NULL private struct
*/
static dev_info_t *
{
continue;
return (pdip);
}
return (NULL);
}
/*
* dmar_check_sub()
* check to see if the device is under scope of a p2p bridge
*/
static boolean_t
{
continue;
return (B_TRUE);
}
return (B_FALSE);
}
/*
* iommu_get_dmar()
* get the iommu structure for a device
*/
static intel_iommu_state_t *
{
seg = 0;
}
/*
* walk the drhd list for a match
*/
/*
* match the include all
*/
if (drhd->di_include_all)
return ((intel_iommu_state_t *)
/*
* try to match the device scope
*/
/*
* get a perfect match
*/
if (private &&
return ((intel_iommu_state_t *)
}
/*
* maybe under a scope of a p2p
*/
return ((intel_iommu_state_t *)
}
}
/*
* This may happen with buggy versions of BIOSes. Just warn instead
* of panic as we don't want whole system to go down because of one
* device.
*/
ddi_node_name(dip));
return (NULL);
}
/*
* domain_set_root_context
* set root context for a single device
*/
static void
{
/*
* set root entry
*/
if (!ROOT_ENTRY_GET_P(rce)) {
} else {
}
/* set context entry */
if (!CONT_ENTRY_GET_P(rce)) {
} else if (CONT_ENTRY_GET_ASR(rce) !=
" %d, %d, %d has been set", bus,
/*NOTREACHED*/
}
/* cache mode set, flush context cache */
/* cache mode not set, flush write buffer */
} else {
}
}
/*
* setup_single_context()
* setup the root context entry
*/
static void
{
}
/*
* setup_context_walk()
* the walk function to set up the possible context entries
*/
static int
{
/*NOTREACHED*/
}
return (DDI_WALK_PRUNECHILD);
}
/*
* setup_possible_contexts()
* set up all the possible context entries for a device under ppb
*/
static void
{
int count;
/* for pci-pci bridge */
return;
}
/* for pcie-pci bridge */
/* for functions under pcie-pci bridge */
}
/*
* iommu_alloc_domain()
* allocate a domain for device, the result is returned in domain parameter
*/
static int
{
uint_t need_to_set_parent = 0;
int count;
ddi_node_name(dip));
}
/*
* check if the domain has already allocated without lock held.
*/
if (private->idp_intel_domain) {
return (DDI_SUCCESS);
}
/*
* lock strategy for dip->devi_iommu_private->idp_intel_domain field:
* 1) read access is allowed without lock held.
* 2) write access is protected by ndi_devi_enter(dip, &count). Lock
* on dip will protect itself and all descendants.
* 3) lock will be released if in-kernel and iommu hardware data
* strutures have been synchronized.
*/
/*
* double check if the domain has already created by other thread.
*/
if (private->idp_intel_domain) {
return (DDI_SUCCESS);
}
/*
* check to see if it is under a pci bridge
*/
if (b_private->idp_intel_domain) {
goto get_domain_finish;
} else {
need_to_set_parent = 1;
}
}
/*
* OK, we have to allocate a new domain
*/
return (DDI_FAILURE);
}
/*
* setup root context entries
*/
} else if (need_to_set_parent) {
}
return (DDI_SUCCESS);
}
/*
* iommu_get_domain()
* get a iommu domain for dip, and the result is returned in domain
*/
static int
{
/*
* for isa devices attached under lpc
*/
if (lpc_devinfo) {
} else {
return (DDI_FAILURE);
}
}
/*
* for gart, use the real graphic devinfo
*/
if (gfx_devinfo) {
} else {
return (DDI_FAILURE);
}
}
/*
* if iommu private is NULL:
* 1. try to find a cached private
* 2. if that fails try to create a new one
* 3. if this fails as well, device is probably not
* PCI and shares domain with an ancestor.
*/
}
return (DDI_FAILURE);
}
/*
* check if the domain has already allocated
*/
if (private->idp_intel_domain) {
return (DDI_SUCCESS);
}
/*
* allocate a domain for this device
*/
}
/*
* helper functions to manipulate iommu pte
*/
static void
{
}
static paddr_t
{
return (*pte & IOMMU_PAGE_MASK);
}
/*
* dvma_level_offset()
* get the page table offset by specifying a dvma and level
*/
static uint_t
{
return (offset);
}
/*
* iommu_setup_level_table()
* setup the page table for a level
*/
static iovpte_t
{
/*
* check whether pde already exists withoud lock held.
*/
return (vpte);
}
/* Speculatively allocate resources needed. */
/*
* double check whether pde already exists with lock held.
*/
return (vpte);
}
/* make previous changes visible to other threads. */
return (vpte);
}
/*
* iommu_setup_page_table()
* setup the page table for a dvma
*/
static caddr_t
{
int i;
for (i = level; i > 1; i--) {
}
}
/*
* iommu_map_page_range()
* map a range of pages for iommu translation
*
* domain: the device domain
* dvma: the start dvma for mapping
* start: the start physcial address
* end: the end physical address
* flags: misc flag
*/
static int
{
count = 0;
"non-NULL pte");
}
} else {
}
paddr += IOMMU_PAGE_SIZE;
offset++;
count++;
}
/* flush cpu and iotlb cache */
if (!(flags & IOMMU_PAGE_PROP_NOSYNC)) {
/* cache mode set, flush iotlb */
/* cache mode not set, flush write buffer */
} else {
}
}
}
return (DDI_SUCCESS);
}
/*
* iommu_vmem_walker()
*/
static void
{
ddi_node_name(dip));
IOMMU_PAGE_SIZE, /* align/quantum */
0, /* phase */
0, /* nocross */
}
}
/*
* build_single_rmrr_identity_map()
* build identity map for a single rmrr unit
*/
static void
{
"ACPI DMAR table does not exist, ignoring",
continue;
}
continue;
}
"is not in reserved memory range\n",
}
(void) iommu_map_page_range(domain,
/*
* rmrr should never overlap phy_mem
*/
}
}
/*
* build_rmrr_identity_map()
* build identity mapping for devices under rmrr scopes
*/
static void
build_rmrr_identity_map(void)
{
int i;
for (i = 0; i < DMAR_MAX_SEGMENT; i++) {
break;
}
}
}
/*
* drhd_only_for_gfx()
* return TRUE, if the drhd is only for gfx
*/
static boolean_t
{
int dev_num;
if (drhd->di_include_all)
return (B_FALSE);
/* get the device number attached to this drhd */
dev_num = 0;
dev_num++;
}
if (dev_num == 1) {
return (B_FALSE);
}
if (private->idp_is_display)
return (B_TRUE);
}
return (B_FALSE);
}
/*
* build_dev_identity_map()
* build identity map for a device
*/
static void
{
"this device may not be functional",
ddi_node_name(dip));
return;
}
mp = phys_install;
(void) iommu_map_page_range(domain,
}
/*
* record the identity map for domain, any device
* which uses this domain will needn't any further
* map
*/
}
/*
* build dma map for bios reserved memspace
*/
static void
{
ddi_node_name(dip));
return;
}
while (mp != 0) {
(void) iommu_map_page_range(domain,
}
}
/*
* build_isa_gfx_identity_walk()
* the walk function for build_isa_gfx_identity_map()
*/
static int
{
/* ignore devices which cannot have private struct */
return (DDI_WALK_CONTINUE);
}
/* fix the gfx and fd */
if (private->idp_is_display) {
gfx_devinfo = dip;
return (DDI_WALK_CONTINUE);
} else if (private->idp_is_lpc) {
lpc_devinfo = dip;
return (DDI_WALK_CONTINUE);
}
return (DDI_WALK_CONTINUE);
}
return (DDI_WALK_CONTINUE);
}
/* workaround for usb leagcy emulation mode */
if (usb_rmrr_quirk) {
"?Workaround for %s USB rmrr\n",
ddi_node_name(dip));
}
/*
* Identify usb ehci and uhci controllers
*/
if (usb_fullpa_quirk) {
"?Workaround for %s USB phys install mem\n",
ddi_node_name(dip));
return (DDI_WALK_CONTINUE);
}
if (usb_page0_quirk) {
"Unable to setup usb-quirk for %s failed,"
"this device may not be functional",
ddi_node_name(dip));
return (DDI_WALK_CONTINUE);
}
(void) iommu_map_page_range(domain,
0, 0, 0, DDI_DMA_READ | DDI_DMA_WRITE |
ddi_node_name(dip));
}
return (DDI_WALK_CONTINUE);
}
/*
* build_isa_gfx_identity_map()
* build identity map for isa and gfx devices
*/
static void
{
int count;
/*
* walk through the entire device tree
*/
}
/*
* dmar_check_boot_option()
* check the intel iommu boot option
*/
static void
{
int len;
char *boot_option;
opt);
*var = 1;
opt);
*var = 0;
}
}
}
extern void (*rootnex_iommu_init)(void);
/*
* intel_iommu_attach_dmar_nodes()
* attach intel iommu nodes
*/
int
{
int i;
/*
* retrieve the dmar boot options
*/
/*
* init the lists
*/
/*
* initiate each iommu unit
*/
for (i = 0; i < DMAR_MAX_SEGMENT; i++) {
goto iommu_init_fail;
}
}
/*
* register interrupt remap ops
*/
}
/*
* build identity map for devices in the rmrr scope
*/
/*
* build identity map for isa and gfx devices
*/
/*
* initialize the dvma cookie cache
*/
for (i = 0; i < MAX_COOKIE_CACHE_SIZE; i++) {
MUTEX_DRIVER, NULL);
cookie_cache[i].dch_count = 0;
}
/*
* regist the intr add function
*/
return (DDI_SUCCESS);
/*
* free iommu state structure
*/
}
return (DDI_FAILURE);
}
/*
* get_level_table()
* get level n page table, NULL is returned if
* failure encountered
*/
static caddr_t
{
/* walk to the level n page table */
for (i = level; i > n; i--) {
}
}
/*
* iommu_alloc_cookie_array()
* allocate the cookie array which is needed by map sgl
*/
static int
{
int kmflag;
/* figure out the rough estimate of array size */
/* the preallocated buffer fit this size */
/* we need to allocate new array */
} else {
/* convert the sleep flags */
} else {
kmflag = KM_NOSLEEP;
}
sizeof (ddi_dma_cookie_t);
return (IOMMU_SGL_NORESOURCES);
}
}
/* allocate the dvma cookie array */
return (IOMMU_SGL_SUCCESS);
}
/*
* iommu_alloc_dvma()
* alloc a dvma range for the caller
*/
static int
{
/* shotcuts */
/* parameters */
/* handle the rollover cases */
}
/* get from cache first */
/* allocate from vmem arena */
/* if xalloc failed, we have to flush the cache and retry */
}
}
/*
* save the dvma range in the device dvma cookie
*/
return (DDI_SUCCESS);
}
/*
* iommu_map_dvma()
* map dvma to the physical addresses, the actual
* mapped dvma page number is returned
*/
static int
{
int flags;
/* map each physical address */
}
/*
* intel_iommu_map_sgl()
* called from rootnex_dma_bindhdl(), to build dma
* cookies when iommu is enabled
*/
int
{
int e;
/* get domain for the dma request */
return (IOMMU_SGL_NORESOURCES);
}
/* direct return if drhd is disabled */
return (IOMMU_SGL_DISABLE);
/*
* allocate the cookies arrays, if the pre-allocated
* space is not enough, we should reallocate it
*/
return (IOMMU_SGL_NORESOURCES);
pcnt = 0;
cnt = 0;
/* retrieve paddr, psize, offset from dmareq */
if (buftype == DMA_OTYP_PAGES) {
} else {
(buftype == DMA_OTYP_BUFVADDR));
}
pcnt++;
} else {
}
}
/* save the iommu page offset */
/*
* allocate the dvma and map [paddr, paddr+psize)
*/
if (e != DDI_SUCCESS)
return (IOMMU_SGL_NORESOURCES);
/*
* setup the first cookie with the dvma of the page
* and the its size, we don't take account in the
* offset into the first page now
*/
dvma += IOMMU_PTOB(e);
while (size > 0) {
/* get the size for this page (i.e. partial or full page) */
if (buftype == DMA_OTYP_PAGES) {
/* get the paddr from the page_t */
/* index into the array of page_t's to get the paddr */
pcnt++;
} else {
/* call into the VM to get the paddr */
}
/*
* check to see if this page would put us
* over the max cookie size
*/
/* use the next cookie */
cnt++;
/* allocate the dvma and map [paddr, paddr+psize) */
if (e != DDI_SUCCESS)
return (IOMMU_SGL_NORESOURCES);
/* save the cookie information */
dvma += IOMMU_PTOB(e);
/*
* we can add this page in the current cookie
*/
} else {
dvma += IOMMU_PTOB(e);
}
}
/* take account in the offset into the first page */
/* save away how many cookies we have */
return (IOMMU_SGL_SUCCESS);
}
/*
* iommu_clear_leaf_pte()
* clear a single leaf pte
*/
static void
{
int count;
/* retrieve the leaf page table */
if (!leaf_table) {
return;
}
/* map the leaf page and walk to the pte */
/* clear the ptes */
count = 0;
(offset < IOMMU_PTE_MAX)) {
if (!*pte) {
} else {
*pte = 0;
}
csize += IOMMU_PAGE_SIZE;
offset++;
count++;
}
/* flush cpu and iotlb cache */
/* unmap the leaf page */
}
}
/*
* intel_iommu_unmap_sgl()
* called from rootnex_dma_unbindhdl(), to unbind dma
* cookies when iommu is enabled
*/
void
{
uint64_t i;
/* get the device domain, no return check needed here */
/* if the drhd is disabled, nothing will be done */
return;
/* the drhd is enabled */
for (i = 0; i < sinfo->si_sgl_size; i++) {
/* clear leaf ptes */
}
}
/*
* initialize invalidation request queue structure.
* call ddi_dma_mem_alloc to allocate physical contigous
* pages for invalidation queue table
*/
static int
{
0U,
0xffffffffU,
0xffffffffU,
MMU_PAGESIZE, /* page aligned */
0x1,
0x1,
0xffffffffU,
0xffffffffU,
1,
4,
0
};
};
if (qinv_iqa_qs > QINV_MAX_QUEUE_SIZE)
inv_queue = (inv_queue_state_t *)
/* set devi_ops in dev info structure for ddi_dma_mem_alloc */
/*
* set devi_bus_dma_allochdl in dev info structure for
* ddi_dma_free_handle
*/
DEVI(ddi_root_node());
NULL,
"alloc invalidation queue table handler failed\n");
}
NULL,
"alloc invalidation queue sync mem handler failed\n");
goto sync_table_handle_failed;
}
/* alloc physical contiguous pages for invalidation queue */
size,
NULL,
&size,
"alloc invalidation queue table failed\n");
goto queue_table_mem_failed;
}
/* get the base physical address of invalidation request queue */
/* alloc status memory for invalidation wait descriptor */
size,
NULL,
&size,
"alloc invalidation queue sync mem failed\n");
goto sync_table_mem_failed;
}
/*
* init iotlb pend node for submitting invalidation iotlb
* queue request
*/
* sizeof (iotlb_pend_node_t *), KM_SLEEP);
/* set invalidation queue structure */
return (DDI_SUCCESS);
return (ENOMEM);
}
/* destroy invalidation queue structure */
static void
{
* sizeof (iotlb_pend_node_t *));
}
/* enable queued invalidation interface */
static void
{
/* Initialize the Invalidation Queue Tail register to zero */
/* set invalidation queue base address register */
/* enable queued invalidation interface */
/* set new queued invalidation interface */
}
/* submit invalidation request descriptor to invalidation queue */
static void
{
/*
* inv queue table exhausted, wait hardware to fetch
* next descriptor
*/
}
}
/* queued invalidation interface -- invalidate context cache */
static void
{
/* record the context cache statistics */
}
/* queued invalidation interface -- invalidate iotlb */
static void
{
dr = 1;
dw = 1;
switch (type) {
case TLB_INV_G_PAGE:
addr & IOMMU_PAGE_OFFSET) {
goto qinv_ignore_psi;
}
break;
case TLB_INV_G_DOMAIN:
break;
case TLB_INV_G_GLOBAL:
break;
default:
return;
}
/*
* check the result and record the statistics
*/
switch (type) {
/* global */
case TLB_INV_G_GLOBAL:
break;
/* domain */
case TLB_INV_G_DOMAIN:
break;
/* psi */
case TLB_INV_G_PAGE:
break;
default:
break;
}
}
/* queued invalidation interface -- invalidate dev_iotlb */
static void
{
}
/* queued invalidation interface -- invalidate interrupt entry cache */
static void
{
}
/* queued invalidation interface -- global invalidate interrupt entry cache */
static void
{
}
/* queued invalidation interface -- invalidate single interrupt entry cache */
static void
{
}
/* queued invalidation interface -- invalidate interrupt entry caches */
static void
{
/* requested interrupt count is not a power of 2 */
for (i = 0; i < cnt; i++) {
}
return;
}
mask++;
}
for (i = 0; i < cnt; i++) {
}
return;
}
}
/*
* alloc free entry from sync status table
*/
static uint_t
{
/* should never happen */
goto sync_mem_exhausted;
}
return (tail);
}
/*
* queued invalidation interface -- invalidation wait descriptor
* fence flag not set, need status data to indicate the invalidation
* wait descriptor completion
*/
static void
{
/* plant an iotlb pending node */
/*
* sdata = QINV_SYNC_DATA_UNFENCE, fence = 0, sw = 1, if = 0
* indicate the invalidation wait descriptor completion by
* performing a coherent DWORD write to the status address,
* not by generating an invalidation completion event
*/
}
/*
* queued invalidation interface -- invalidation wait descriptor
* fence flag set, indicate descriptors following the invalidation
* wait descriptor must be processed by hardware only after the
* invalidation wait descriptor completes.
*/
static void
{
/* sw = 0, fence = 1, iflag = 0 */
}
/*
* queued invalidation interface -- invalidation wait descriptor
* wait until the invalidation request finished
*/
static void
{
/*
* sdata = QINV_SYNC_DATA_FENCE, fence = 1, sw = 1, if = 0
* indicate the invalidation wait descriptor completion by
* performing a coherent DWORD write to the status address,
* not by generating an invalidation completion event
*/
while ((*status) != QINV_SYNC_DATA_FENCE)
}
/* get already completed invalidation wait requests */
static int
{
int index;
while (*value == QINV_SYNC_DATA_UNFENCE) {
*value = 0;
(*cnt)++;
} else
}
if ((*cnt) > 0)
return (index);
else
return (-1);
}
/*
* queued invalidation interface
* function based context cache invalidation
*/
static void
{
}
/*
* queued invalidation interface
* domain based context cache invalidation
*/
static void
{
}
/*
* queued invalidation interface
* invalidation global context cache
*/
static void
{
}
/*
* queued invalidation interface
* paged based iotlb invalidation
*/
static void
{
/* choose page specified invalidation */
<= ADDR_AM_MAX(am)) {
break;
}
am++;
}
}
/* choose domain invalidation */
} else {
0, hint, TLB_INV_G_DOMAIN);
}
}
/*
* queued invalidation interface
* domain based iotlb invalidation
*/
static void
{
}
/*
* queued invalidation interface
* global iotlb invalidation
*/
static void
{
}
/*
* the plant wait operation for queued invalidation interface
*/
static void
{
if (node) {
}
/* no cache, alloc one */
}
/* plant an invalidation wait descriptor, not wait its completion */
}
/*
* the reap wait operation for queued invalidation interface
*/
static void
{
while (cnt--) {
continue;
index++;
index = 0;
}
}
/* init interrupt remapping table */
static int
{
0U,
0xffffffffU,
0xffffffffU,
MMU_PAGESIZE, /* page aligned */
0x1,
0x1,
0xffffffffU,
0xffffffffU,
1,
4,
0
};
};
if (intrr_apic_mode == LOCAL_X2APIC) {
return (DDI_FAILURE);
}
}
if (intrr_irta_s > INTRR_MAX_IRTA_SIZE) {
}
NULL,
goto intrr_tbl_handle_failed;
}
size,
NULL,
&(intr_remap_tbl->vaddr),
&size,
goto intrr_tbl_mem_failed;
}
intr_remap_tbl->free = 0;
return (DDI_SUCCESS);
return (ENOMEM);
}
/* destroy interrupt remapping table */
static void
{
}
/* enable interrupt remapping hardware unit */
static void
{
if (intrr_apic_mode == LOCAL_X2APIC)
/* set interrupt remap table pointer */
/* global flush intr entry cache */
/* enable interrupt remapping */
status);
/* set compatible mode */
status);
}
/*
* helper function to find the free interrupt remapping
* table entry
*/
static uint_t
{
uint_t i;
post = 0;
if (!bitset_in_set(b, i))
return (i);
}
for (i = 0; i < post; i++) {
if (!bitset_in_set(b, i))
return (i);
}
return (INTRR_IIDX_FULL); /* no free index */
}
/*
* helper function to find 'count' contigous free
* interrupt remapping table entries
*/
static uint_t
{
uint_t i, j;
if (post == INTRR_IIDX_FULL) {
return (INTRR_IIDX_FULL);
}
return (INTRR_IIDX_FULL);
for (j = 0; j < count; j++) {
if (bitset_in_set(b, (i + j))) {
i = i + j;
break;
}
if (j == count - 1)
return (i);
}
}
for (j = 0; j < count; j++) {
if (bitset_in_set(b, (i + j))) {
i = i + j;
break;
}
if (j == count - 1)
return (i);
}
}
return (INTRR_IIDX_FULL); /* no free index */
}
/* alloc one interrupt remapping table entry */
static int
{
if (iidx == INTRR_IIDX_FULL) {
/* no free intr entry, use compatible format intr */
if (intrr_apic_mode == LOCAL_X2APIC) {
/*
* x2apic mode not allowed compatible
* interrupt
*/
goto retry_alloc_iidx;
}
} else {
iidx + 1);
}
return (iidx);
}
/* alloc 'cnt' contigous interrupt remapping table entries */
static int
{
if (iidx != INTRR_IIDX_FULL) {
}
for (i = 0; i < cnt; i++) {
}
} else {
if (intrr_apic_mode == LOCAL_X2APIC) {
/* x2apic mode not allowed comapitible interrupt */
goto retry_alloc_iidxs;
}
}
return (iidx);
}
/* get ioapic source id and iommu structure for ioapics */
static void
get_ioapic_iommu_info(void)
{
uint_t i;
for (i = 0; i < MAX_IO_APIC; i++) {
sizeof (ioapic_iommu_info_t), KM_SLEEP);
ioapic_iommu_infos[i]->iommu =
break;
}
}
}
}
/* initialize interrupt remapping */
static int
intr_remap_init(int apic_mode)
{
int intrr_all_disable = 1;
intrr_all_disable = 0;
}
}
}
if (intrr_all_disable) {
/*
* if all drhd unit disabled intr remapping,
* return FAILURE
*/
return (DDI_FAILURE);
} else {
return (DDI_SUCCESS);
}
}
/* enable interrupt remapping */
static void
{
if (iommu->iu_intr_remap_tbl)
}
/* get iommu structure and interrupt source id for ioapic */
}
/* alloc remapping entry for the interrupt */
static void
{
return;
}
goto intr_remap_disable;
}
} else {
cnt = 1;
}
if (cnt == 1) {
} else {
}
if (iidx == INTRR_IIDX_FULL) {
goto intr_remap_disable;
}
if (cnt == 1) {
} else {
}
return;
}
for (i = 1; i < cnt; i++) {
}
} else {
}
return;
}
/* helper function to get iommu structure */
{
/* for fixed interrupt */
} else {
}
}
}
}
/* helper function to get interrupt request source id */
static void
{
if (!intrr_enable_sid_verify) {
return;
}
/* for interrupt through I/O APIC */
sq = SQ_VERIFY_ALL;
} else {
/* pcie device */
sq = SQ_VERIFY_ALL;
} else {
/* device behind pcie to pci bridge */
sq = SQ_VERIFY_ALL;
} else {
/* device behind pci to pci bridge */
sq = SQ_VERIFY_ALL;
}
}
}
}
/* remapping the interrupt */
static void
{
return;
}
} else {
cnt = 1;
}
rh = 0;
/*
* Mark the IRTE's TM as Edge to suppress broadcast EOI.
*/
if (intrr_suppress_brdcst_eoi) {
}
} else {
dlm = 0;
}
if (intrr_apic_mode == LOCAL_APIC)
if (cnt == 1) {
/* set interrupt remapping table entry */
} else {
for (i = 0; i < cnt; i++) {
/* set interrupt remapping table entry */
vector++;
iidx++;
}
}
}
/* free the remapping entry */
static void
{
return;
}
}
}
/* record the ioapic rdt entry */
static void
{
(pol << INTRR_IOAPIC_POL_SHIFT) |
(1 << INTRR_IOAPIC_FORMAT_SHIFT);
} else {
}
}
/* record the msi interrupt structure */
/*ARGSUSED*/
static void
{
} else {
}
}