/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/archsystm.h>
#include <sys/machsystm.h>
#include <sys/t_lock.h>
#include <sys/vmem.h>
#include <sys/mman.h>
#include <sys/vm.h>
#include <sys/cpu.h>
#include <sys/cmn_err.h>
#include <sys/cpuvar.h>
#include <sys/atomic.h>
#include <vm/as.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/page.h>
#include <vm/seg.h>
#include <vm/seg_kmem.h>
#include <vm/seg_kpm.h>
#include <vm/hat_sfmmu.h>
#include <sys/debug.h>
#include <sys/cpu_module.h>
/*
* A quick way to generate a cache consistent address to map in a page.
* users: ppcopy, pagezero, /proc, dev/mem
*
* The ppmapin/ppmapout routines provide a quick way of generating a cache
* consistent address by reserving a given amount of kernel address space.
* The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided
* into x number of sets, where x is the number of colors for the virtual
* cache. The number of colors is how many times a page can be mapped
* simulatenously in the cache. For direct map caches this translates to
* the number of pages in the cache.
* Each set will be assigned a group of virtual pages from the reserved memory
* depending on its virtual color.
* When trying to assign a virtual address we will find out the color for the
* physical page in question (if applicable). Then we will try to find an
* available virtual page from the set of the appropiate color.
*/
int pp_slots = 4; /* small default, tuned by cpu module */
/* tuned by cpu module, default is "safe" */
int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
static int nsets; /* number of sets */
static int ppmap_shift; /* set selector */
#ifdef PPDEBUG
#define MAXCOLORS 16 /* for debug only */
static int ppalloc_noslot = 0; /* # of allocations from kernelmap */
static int align_hits;
static int pp_allocs; /* # of ppmapin requests */
#endif /* PPDEBUG */
/*
* There are only 64 TLB entries on spitfire, 16 on cheetah
* (fully-associative TLB) so we allow the cpu module to tune the
* number to use here via pp_slots.
*/
static struct ppmap_va {
caddr_t ppmap_slots[MAXPP_SLOTS];
} ppmap_va[NCPU];
/* prevent compilation with VAC defined */
#ifdef VAC
#error "sun4v ppmapin and ppmapout do not support VAC"
#endif
void
ppmapinit(void)
{
int nset;
caddr_t va;
ASSERT(pp_slots <= MAXPP_SLOTS);
va = (caddr_t)PPMAPBASE;
/*
* sun4v does not have a virtual indexed cache and simply
* has only one set containing all pages.
*/
nsets = mmu_btop(PPMAPSIZE);
ppmap_shift = MMU_PAGESHIFT;
for (nset = 0; nset < nsets; nset++) {
ppmap_vaddrs[nset] =
(caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE));
}
}
/*
* Allocate a cache consistent virtual address to map a page, pp,
* with protection, vprot; and map it in the MMU, using the most
* efficient means possible. The argument avoid is a virtual address
* hint which when masked yields an offset into a virtual cache
* that should be avoided when allocating an address to map in a
* page. An avoid arg of -1 means you don't care, for instance pagezero.
*
* machine dependent, depends on virtual address space layout,
* understands that all kernel addresses have bit 31 set.
*
* NOTE: For sun4 platforms the meaning of the hint argument is opposite from
* that found in other architectures. In other architectures the hint
* (called avoid) was used to ask ppmapin to NOT use the specified cache color.
* This was used to avoid virtual cache trashing in the bcopy. Unfortunately
* in the case of a COW, this later on caused a cache aliasing conflict. In
* sun4, the bcopy routine uses the block ld/st instructions so we don't have
* to worry about virtual cache trashing. Actually, by using the hint to choose
* the right color we can almost guarantee a cache conflict will not occur.
*/
/*ARGSUSED2*/
caddr_t
ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
{
int nset;
caddr_t va;
#ifdef PPDEBUG
pp_allocs++;
#endif /* PPDEBUG */
/*
* For sun4v caches are physical caches, we can pick any address
* we want.
*/
for (nset = 0; nset < nsets; nset++) {
va = ppmap_vaddrs[nset];
if (va != NULL) {
#ifdef PPDEBUG
align_hits++;
#endif /* PPDEBUG */
if (atomic_cas_ptr(&ppmap_vaddrs[nset], va, NULL) ==
va) {
hat_memload(kas.a_hat, va, pp,
vprot | HAT_NOSYNC,
HAT_LOAD_LOCK);
return (va);
}
}
}
#ifdef PPDEBUG
ppalloc_noslot++;
#endif /* PPDEBUG */
/*
* No free slots; get a random one from the kernel heap area.
*/
va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
return (va);
}
void
ppmapout(caddr_t va)
{
int nset;
if (va >= kernelheap && va < ekernelheap) {
/*
* Space came from kernelmap, flush the page and
* return the space.
*/
hat_unload(kas.a_hat, va, PAGESIZE,
(HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
vmem_free(heap_arena, va, PAGESIZE);
} else {
/*
* Space came from ppmap_vaddrs[], give it back.
*/
nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
hat_unload(kas.a_hat, va, PAGESIZE,
(HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
ASSERT(ppmap_vaddrs[nset] == NULL);
ppmap_vaddrs[nset] = va;
}
}
#ifdef DEBUG
#define PP_STAT_ADD(stat) (stat)++
uint_t pload, ploadfail;
uint_t ppzero, ppzero_short;
#else
#define PP_STAT_ADD(stat)
#endif /* DEBUG */
static void
pp_unload_tlb(caddr_t *pslot, caddr_t va)
{
ASSERT(*pslot == va);
vtag_flushpage(va, (uint64_t)ksfmmup);
*pslot = NULL; /* release the slot */
}
/*
* Routine to copy kernel pages during relocation. It will copy one
* PAGESIZE page to another PAGESIZE page. This function may be called
* above LOCK_LEVEL so it should not grab any locks.
*/
void
ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
{
uint64_t fm_pa, to_pa;
size_t nbytes;
fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
nbytes = MMU_PAGESIZE;
for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
hw_pa_bcopy32(fm_pa, to_pa);
}
/*
* Copy the data from the physical page represented by "frompp" to
* that represented by "topp".
*
* Try to use per cpu mapping first, if that fails then call pp_mapin
* to load it.
* Returns one on success or zero on some sort of fault while doing the copy.
*/
int
ppcopy(page_t *fm_pp, page_t *to_pp)
{
caddr_t fm_va = NULL;
caddr_t to_va;
boolean_t fast;
label_t ljb;
int ret = 1;
ASSERT(PAGE_LOCKED(fm_pp));
ASSERT(PAGE_LOCKED(to_pp));
/*
* Try to map using KPM if enabled. If it fails, fall
* back to ppmapin/ppmapout.
*/
if ((kpm_enable == 0) ||
(fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
(to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
if (fm_va != NULL)
hat_kpm_mapout(fm_pp, NULL, fm_va);
fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
fast = B_FALSE;
} else
fast = B_TRUE;
if (on_fault(&ljb)) {
ret = 0;
goto faulted;
}
bcopy(fm_va, to_va, PAGESIZE);
no_fault();
faulted:
/* Unmap */
if (fast) {
hat_kpm_mapout(fm_pp, NULL, fm_va);
hat_kpm_mapout(to_pp, NULL, to_va);
} else {
ppmapout(fm_va);
ppmapout(to_va);
}
return (ret);
}
/*
* Zero the physical page from off to off + len given by `pp'
* without changing the reference and modified bits of page.
*
* Again, we'll try per cpu mapping first.
*/
void
pagezero(page_t *pp, uint_t off, uint_t len)
{
caddr_t va;
extern int hwblkclr(void *, size_t);
extern int use_hw_bzero;
boolean_t fast;
ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
ASSERT(PAGE_LOCKED(pp));
PP_STAT_ADD(ppzero);
if (len != MMU_PAGESIZE || !use_hw_bzero) {
PP_STAT_ADD(ppzero_short);
}
kpreempt_disable();
/*
* Try to use KPM if enabled. If that fails, fall back to
* ppmapin/ppmapout.
*/
if (kpm_enable != 0) {
fast = B_TRUE;
va = hat_kpm_mapin(pp, NULL);
} else
va = NULL;
if (va == NULL) {
fast = B_FALSE;
va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
}
if (!use_hw_bzero) {
bzero(va + off, len);
sync_icache(va + off, len);
} else if (hwblkclr(va + off, len)) {
/*
* We may not have used block commit asi.
* So flush the I-$ manually
*/
sync_icache(va + off, len);
} else {
/*
* We have used blk commit, and flushed the I-$.
* However we still may have an instruction in the
* pipeline. Only a flush will invalidate that.
*/
doflush(va);
}
if (fast) {
hat_kpm_mapout(pp, NULL, va);
} else {
ppmapout(va);
}
kpreempt_enable();
}