/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/archsystm.h>
#include <sys/debug.h>
#include <sys/bootconf.h>
#include <sys/bootsvcs.h>
#include <sys/bootinfo.h>
#include <sys/mman.h>
#include <sys/cmn_err.h>
#include <sys/param.h>
#include <sys/machparam.h>
#include <sys/machsystm.h>
#include <sys/promif.h>
#include <sys/kobj.h>
#ifdef __xpv
#include <sys/hypervisor.h>
#endif
#include <vm/kboot_mmu.h>
#include <vm/hat_pte.h>
#include <vm/hat_i86.h>
#include <vm/seg_kmem.h>
#if 0
/*
* Joe's debug printing
*/
#define DBG(x) \
bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x));
#else
#define DBG(x) /* naught */
#endif
/*
* Page table and memory stuff.
*/
static caddr_t window;
static caddr_t pte_to_window;
/*
* this are needed by mmu_init()
*/
int kbm_nx_support = 0; /* NX bit in PTEs is in use */
int kbm_pae_support = 0; /* PAE is 64 bit Page table entries */
int kbm_pge_support = 0; /* PGE is Page table global bit enabled */
int kbm_largepage_support = 0;
uint_t kbm_nucleus_size = 0;
#define BOOT_SHIFT(l) (shift_amt[l])
#define BOOT_SZ(l) ((size_t)1 << BOOT_SHIFT(l))
#define BOOT_OFFSET(l) (BOOT_SZ(l) - 1)
#define BOOT_MASK(l) (~BOOT_OFFSET(l))
/*
* Initialize memory management parameters for boot time page table management
*/
void
kbm_init(struct xboot_info *bi)
{
/*
* configure mmu information
*/
kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size;
kbm_largepage_support = bi->bi_use_largepage;
kbm_nx_support = bi->bi_use_nx;
kbm_pae_support = bi->bi_use_pae;
kbm_pge_support = bi->bi_use_pge;
window = bi->bi_pt_window;
DBG(window);
pte_to_window = bi->bi_pte_to_pt_window;
DBG(pte_to_window);
if (kbm_pae_support) {
shift_amt = shift_amt_pae;
ptes_per_table = 512;
pte_size = 8;
lpagesize = TWO_MEG;
#ifdef __amd64
top_level = 3;
#else
top_level = 2;
#endif
} else {
shift_amt = shift_amt_nopae;
ptes_per_table = 1024;
pte_size = 4;
lpagesize = FOUR_MEG;
top_level = 1;
}
#ifdef __xpv
xen_info = bi->bi_xen_start_info;
mfn_list = (mfn_t *)xen_info->mfn_list;
DBG(mfn_list);
mfn_count = xen_info->nr_pages;
DBG(mfn_count);
#endif
top_page_table = bi->bi_top_page_table;
DBG(top_page_table);
}
/*
* Change the addressible page table window to point at a given page
*/
/*ARGSUSED*/
void *
kbm_remap_window(paddr_t physaddr, int writeable)
{
x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE;
DBG(physaddr);
#ifdef __xpv
if (!writeable)
pt_bits &= ~PT_WRITABLE;
if (HYPERVISOR_update_va_mapping((uintptr_t)window,
pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0)
bop_panic("HYPERVISOR_update_va_mapping() failed");
#else
if (kbm_pae_support)
*((x86pte_t *)pte_to_window) = physaddr | pt_bits;
else
*((x86pte32_t *)pte_to_window) = physaddr | pt_bits;
mmu_tlbflush_entry(window);
#endif
DBG(window);
return (window);
}
/*
* Add a mapping for the physical page at the given virtual address.
*/
void
kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel)
{
x86pte_t *ptep;
paddr_t pte_physaddr;
x86pte_t pteval;
if (khat_running)
panic("kbm_map() called too late");
pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE;
if (level >= 1)
pteval |= PT_PAGESIZE;
if (kbm_pge_support && is_kernel)
pteval |= PT_GLOBAL;
#ifdef __xpv
/*
* try update_va_mapping first - fails if page table is missing.
*/
if (HYPERVISOR_update_va_mapping(va, pteval,
UVMF_INVLPG | UVMF_LOCAL) == 0)
return;
#endif
/*
* Find the pte that will map this address. This creates any
* missing intermediate level page tables.
*/
ptep = find_pte(va, &pte_physaddr, level, 0);
if (ptep == NULL)
bop_panic("kbm_map: find_pte returned NULL");
#ifdef __xpv
if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
bop_panic("HYPERVISOR_update_va_mapping() failed");
#else
if (kbm_pae_support)
*ptep = pteval;
else
*((x86pte32_t *)ptep) = pteval;
mmu_tlbflush_entry((caddr_t)va);
#endif
}
#ifdef __xpv
/*
* Add a mapping for the machine page at the given virtual address.
*/
void
kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
{
paddr_t pte_physaddr;
x86pte_t pteval;
pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE;
if (level == 1)
pteval |= PT_PAGESIZE;
/*
* try update_va_mapping first - fails if page table is missing.
*/
if (HYPERVISOR_update_va_mapping(va,
pteval, UVMF_INVLPG | UVMF_LOCAL) == 0)
return;
/*
* Find the pte that will map this address. This creates any
* missing intermediate level page tables
*/
(void) find_pte(va, &pte_physaddr, level, 0);
if (HYPERVISOR_update_va_mapping(va,
pteval, UVMF_INVLPG | UVMF_LOCAL) != 0)
bop_panic("HYPERVISOR_update_va_mapping failed");
}
#endif /* __xpv */
/*
* Probe the boot time page tables to find the first mapping
* including va (or higher) and return non-zero if one is found.
* va is updated to the starting address and len to the pagesize.
* pp will be set to point to the 1st page_t of the mapped page(s).
*
* Note that if va is in the middle of a large page, the returned va
* will be less than what was asked for.
*/
int
kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot)
{
uintptr_t probe_va;
x86pte_t *ptep;
paddr_t pte_physaddr;
x86pte_t pte_val;
level_t l;
if (khat_running)
panic("kbm_probe() called too late");
*len = 0;
*pfn = PFN_INVALID;
*prot = 0;
probe_va = *va;
restart_new_va:
l = top_level;
for (;;) {
if (IN_VA_HOLE(probe_va))
probe_va = mmu.hole_end;
if (IN_HYPERVISOR_VA(probe_va))
#if defined(__amd64) && defined(__xpv)
probe_va = HYPERVISOR_VIRT_END;
#else
return (0);
#endif
/*
* If we don't have a valid PTP/PTE at this level
* then we can bump VA by this level's pagesize and try again.
* When the probe_va wraps around, we are done.
*/
ptep = find_pte(probe_va, &pte_physaddr, l, 1);
if (ptep == NULL)
bop_panic("kbm_probe: find_pte returned NULL");
if (kbm_pae_support)
pte_val = *ptep;
else
pte_val = *((x86pte32_t *)ptep);
if (!PTE_ISVALID(pte_val)) {
probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l);
if (probe_va <= *va)
return (0);
goto restart_new_va;
}
/*
* If this entry is a pointer to a lower level page table
* go down to it.
*/
if (!PTE_ISPAGE(pte_val, l)) {
ASSERT(l > 0);
--l;
continue;
}
/*
* We found a boot level page table entry
*/
*len = BOOT_SZ(l);
*va = probe_va & ~(*len - 1);
*pfn = PTE2PFN(pte_val, l);
*prot = PROT_READ | PROT_EXEC;
if (PTE_GET(pte_val, PT_WRITABLE))
*prot |= PROT_WRITE;
/*
* pt_nx is cleared if processor doesn't support NX bit
*/
if (PTE_GET(pte_val, mmu.pt_nx))
*prot &= ~PROT_EXEC;
return (1);
}
}
/*
* Destroy a boot loader page table 4K mapping.
*/
void
kbm_unmap(uintptr_t va)
{
if (khat_running)
panic("kbm_unmap() called too late");
else {
#ifdef __xpv
(void) HYPERVISOR_update_va_mapping(va, 0,
UVMF_INVLPG | UVMF_LOCAL);
#else
x86pte_t *ptep;
level_t level = 0;
uint_t probe_only = 1;
ptep = find_pte(va, NULL, level, probe_only);
if (ptep == NULL)
return;
if (kbm_pae_support)
*ptep = 0;
else
*((x86pte32_t *)ptep) = 0;
mmu_tlbflush_entry((caddr_t)va);
#endif
}
}
/*
* Change a boot loader page table 4K mapping.
* Returns the pfn of the old mapping.
*/
pfn_t
kbm_remap(uintptr_t va, pfn_t pfn)
{
x86pte_t *ptep;
level_t level = 0;
uint_t probe_only = 1;
x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE |
PT_NOCONSIST | PT_VALID;
x86pte_t old_pte;
if (khat_running)
panic("kbm_remap() called too late");
ptep = find_pte(va, NULL, level, probe_only);
if (ptep == NULL)
bop_panic("kbm_remap: find_pte returned NULL");
if (kbm_pae_support)
old_pte = *ptep;
else
old_pte = *((x86pte32_t *)ptep);
#ifdef __xpv
if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
bop_panic("HYPERVISOR_update_va_mapping() failed");
#else
if (kbm_pae_support)
*((x86pte_t *)ptep) = pte_val;
else
*((x86pte32_t *)ptep) = pte_val;
mmu_tlbflush_entry((caddr_t)va);
#endif
if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1)
return (PFN_INVALID);
return (mmu_btop(ma_to_pa(old_pte)));
}
/*
* Change a boot loader page table 4K mapping to read only.
*/
void
kbm_read_only(uintptr_t va, paddr_t pa)
{
x86pte_t pte_val = pa_to_ma(pa) |
PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID;
#ifdef __xpv
if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
bop_panic("HYPERVISOR_update_va_mapping() failed");
#else
x86pte_t *ptep;
level_t level = 0;
ptep = find_pte(va, NULL, level, 0);
if (ptep == NULL)
bop_panic("kbm_read_only: find_pte returned NULL");
if (kbm_pae_support)
*ptep = pte_val;
else
*((x86pte32_t *)ptep) = pte_val;
mmu_tlbflush_entry((caddr_t)va);
#endif
}
/*
* interfaces for kernel debugger to access physical memory
*/
static x86pte_t save_pte;
void *
kbm_push(paddr_t pa)
{
static int first_time = 1;
if (first_time) {
first_time = 0;
return (window);
}
if (kbm_pae_support)
save_pte = *((x86pte_t *)pte_to_window);
else
save_pte = *((x86pte32_t *)pte_to_window);
return (kbm_remap_window(pa, 0));
}
void
kbm_pop(void)
{
#ifdef __xpv
if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte,
UVMF_INVLPG | UVMF_LOCAL) < 0)
bop_panic("HYPERVISOR_update_va_mapping() failed");
#else
if (kbm_pae_support)
*((x86pte_t *)pte_to_window) = save_pte;
else
*((x86pte32_t *)pte_to_window) = save_pte;
mmu_tlbflush_entry(window);
#endif
}
x86pte_t
get_pteval(paddr_t table, uint_t index)
{
void *table_ptr = kbm_remap_window(table, 0);
if (kbm_pae_support)
return (((x86pte_t *)table_ptr)[index]);
return (((x86pte32_t *)table_ptr)[index]);
}
#ifndef __xpv
void
set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
{
void *table_ptr = kbm_remap_window(table, 0);
if (kbm_pae_support)
((x86pte_t *)table_ptr)[index] = pteval;
else
((x86pte32_t *)table_ptr)[index] = pteval;
if (level == top_level && level == 2)
reload_cr3();
}
#endif
paddr_t
make_ptable(x86pte_t *pteval, uint_t level)
{
paddr_t new_table;
void *table_ptr;
new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE);
table_ptr = kbm_remap_window(new_table, 1);
bzero(table_ptr, MMU_PAGESIZE);
#ifdef __xpv
/* Remove write permission to the new page table. */
(void) kbm_remap_window(new_table, 0);
#endif
if (level == top_level && level == 2)
*pteval = pa_to_ma(new_table) | PT_VALID;
else
*pteval = pa_to_ma(new_table) |
PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
return (new_table);
}
x86pte_t *
map_pte(paddr_t table, uint_t index)
{
void *table_ptr = kbm_remap_window(table, 0);
return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size));
}