rock.c revision 2f0fcb93196badcdd803715656c809058d9f3114
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/archsystm.h>
#include <sys/machparam.h>
#include <sys/machsystm.h>
#include <sys/cpu.h>
#include <sys/elf_SPARC.h>
#include <vm/page.h>
#include <vm/vm_dep.h>
#include <sys/cpuvar.h>
#include <sys/async.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/dditypes.h>
#include <sys/sunddi.h>
#include <sys/cpu_module.h>
#include <sys/prom_debug.h>
#include <sys/vmsystm.h>
#include <sys/prom_plat.h>
#include <sys/sysmacros.h>
#include <sys/intreg.h>
#include <sys/machtrap.h>
#include <sys/ontrap.h>
#include <sys/ivintr.h>
#include <sys/atomic.h>
#include <sys/panic.h>
#include <sys/dtrace.h>
#include <vm/seg_spt.h>
#include <sys/hypervisor_api.h>
#include <sys/rock_hypervisor_api.h>
#include <sys/hsvc.h>
uint_t root_phys_addr_lo_mask = 0xffffffffU;
uint8_t enable_tm = 1;
char cpu_module_name[] = "SUNW,UltraSPARC-AT10";
static boolean_t hsvc_tm_available = B_TRUE;
static hsvc_info_t rock_tm_hsvc = {
HSVC_REV_1, /* HSVC rev num */
NULL, /* Private */
HSVC_GROUP_TM, /* Requested API Group */
ROCK_HSVC_MAJOR, /* Requested Major */
ROCK_HSVC_MINOR, /* Requested Minor */
cpu_module_name /* Module name */
};
#define MCOREID_MASK 0x1E
#define MCOREID_SHIFT 1
void
cpu_setup(void)
{
extern int cpc_has_overflow_intr;
uint64_t sup_minor;
int status;
/*
* The setup common to all CPU modules is done in cpu_setup_common
* routine.
*/
cpu_setup_common(NULL);
/*
* Rock I$ is non-coherent.
*/
mach_setup_icache(0);
#ifdef DEBUG
/*
* These should always be present on Rock
*/
if (cpu_hwcap_flags == 0)
cmn_err(CE_WARN, "hwcap-list missing from MD");
#endif
cache |= (CACHE_PTAG | CACHE_IOCOHERENT);
if (use_page_coloring) {
do_pg_coloring = 1;
}
/*
* Rock generates hpriv performance event trap instead of pic overflow
* trap. To get the attention of the guest hv in-turn generates pic
* overflow trap. Therefore enable support for that.
*/
cpc_has_overflow_intr = 1;
/*
* Enable 4M pages for OOB.
*/
max_uheap_lpsize = MMU_PAGESIZE4M;
max_ustack_lpsize = MMU_PAGESIZE4M;
max_privmap_lpsize = MMU_PAGESIZE4M;
/*
* hv_tm_enable is a part of TM group. We need to
* negotiate that API group before we can use it.
*/
status = hsvc_register(&rock_tm_hsvc, &sup_minor);
if ((status != 0) || (sup_minor < (uint64_t)ROCK_HSVC_MINOR)) {
cmn_err(CE_WARN, "%s cannot negotiate hypervisor services: "
"major: 0x%lx minor: 0x%lx group: 0x%x errno: %d",
cpu_module_name, rock_tm_hsvc.hsvc_major,
rock_tm_hsvc.hsvc_minor, HSVC_GROUP_TM, status);
hsvc_tm_available = B_FALSE;
}
}
/*
* Set the magic constants of the implementation.
*/
void
cpu_fiximp(struct cpu_node *cpunode)
{
/*
* The Cache node is optional in MD. Therefore in case it
* does not exist, use hardcoded values.
*/
#ifdef DEBUG
/*
* ...that said, we do want this info to come from the MD.
*/
if (cpunode->ecache_size == 0 || cpunode->ecache_linesize == 0 ||
cpunode->ecache_associativity == 0) {
cmn_err(CE_WARN, "ecache info missing from MD");
}
#endif
if (cpunode->ecache_size == 0)
cpunode->ecache_size = 2 * 1024 * 1024;
if (cpunode->ecache_linesize == 0)
cpunode->ecache_linesize = 64;
if (cpunode->ecache_associativity == 0)
cpunode->ecache_associativity = 8;
}
void
dtrace_flush_sec(uintptr_t addr)
{
pfn_t pfn;
proc_t *procp = ttoproc(curthread);
page_t *pp;
caddr_t va;
pfn = hat_getpfnum(procp->p_as->a_hat, (void *)addr);
if (pfn != -1) {
ASSERT(pf_is_memory(pfn));
pp = page_numtopp_noreclaim(pfn, SE_SHARED);
if (pp != NULL) {
va = ppmapin(pp, PROT_READ | PROT_WRITE, (void *)addr);
/* sparc needs 8-byte align */
doflush((caddr_t)((uintptr_t)va & -8l));
ppmapout(va);
page_unlock(pp);
}
}
}
void
cpu_map_exec_units(struct cpu *cp)
{
ASSERT(MUTEX_HELD(&cpu_lock));
/*
* The cpu_ipipe and cpu_fpu fields are initialized based on
* the execution unit sharing information from the MD. They
* default to the CPU id in the absence of such information.
*/
cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping;
if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND)
cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id);
cp->cpu_m.cpu_fpu = cpunodes[cp->cpu_id].fpu_mapping;
if (cp->cpu_m.cpu_fpu == NO_EU_MAPPING_FOUND)
cp->cpu_m.cpu_fpu = (id_t)(cp->cpu_id);
cp->cpu_m.cpu_core = (cp->cpu_id & MCOREID_MASK) >> MCOREID_SHIFT;
/*
* The cpu_chip field is initialized based on the information
* in the MD and assume that all cpus within a chip
* share the same L2 cache. If no such info is available, we
* set the cpu to CPU_CHIPID_INVALID.
*/
cp->cpu_m.cpu_mpipe = cpunodes[cp->cpu_id].l2_cache_mapping;
if (cp->cpu_m.cpu_mpipe == NO_L2_CACHE_MAPPING_FOUND)
cp->cpu_m.cpu_mpipe = CPU_L2_CACHEID_INVALID;
cp->cpu_m.cpu_chip = cpunodes[cp->cpu_id].l2_cache_mapping;
if (cp->cpu_m.cpu_chip == NO_L2_CACHE_MAPPING_FOUND)
cp->cpu_m.cpu_chip = CPU_CHIPID_INVALID;
}
void
cpu_init_private(struct cpu *cp)
{
cpu_map_exec_units(cp);
}
/*ARGSUSED*/
void
cpu_uninit_private(struct cpu *cp)
{
}
/*
* cpu_feature_init
*
* This function is called once per strand.
*/
void
cpu_feature_init(void)
{
/*
* Enable or disable for each cpu if hypervisor API is negotiated.
*/
if (hsvc_tm_available == B_TRUE)
(void) hv_tm_enable((uint64_t)enable_tm);
}
/*
* Flush specified address range from I$ via hv_mem_iflush interface
* Note that the hypervisor interface expects physical address range
* and can flush less than the requested size.
*/
void
rock_sync_icache(caddr_t addr, size_t size)
{
uint64_t pa, i, flushlen, flushed;
if (!force_sync_icache_after_bcopy)
/*
* Do not clear the I-cache after bcopy.
* The default value is 0. This flag made be
* set via /etc/system.
*/
return;
if (!tba_taken_over)
/*
* Very early in boot, va_to_pa() will try to call back
* into OBP. Very *very* early in boot, this will fail
* because we haven't set up the OBP callback handler.
* (Without this check, kmdb boot will fail.)
*/
return;
for (i = 0; i < size; i += flushed) {
pa = va_to_pa(addr + i);
ASSERT(pa != -1);
/*
* Only flush the required length up to a PAGESIZE.
*/
flushlen = MIN((size - i), (PAGESIZE - (pa & MMU_PAGEOFFSET)));
/*
* Flush I$ up to the page bounday. This call should never
* fail. If it does, we panic the system as I$ may contain
* stale instructions, which can result in silent data
* corruption.
*/
if (hv_mem_iflush(pa, flushlen, &flushed) != H_EOK) {
cmn_err(CE_PANIC, "Flushing the Icache failed");
}
}
}
/*
* There are no Hypervisor trapstat(1m) interfaces for Rock
* If trapstat(1m) wants to do its thing, it will have to
* take over all TLB miss handling.
*/
int
cpu_trapstat_conf(int cmd)
{
int status;
switch (cmd) {
case CPU_TSTATCONF_INIT:
case CPU_TSTATCONF_FINI:
case CPU_TSTATCONF_ENABLE:
case CPU_TSTATCONF_DISABLE:
status = ENOTSUP;
break;
default:
status = EINVAL;
break;
}
return (status);
}
/*ARGSUSED*/
void
cpu_trapstat_data(void *buf, uint_t tstat_pgszs)
{
}
#define MAX_PAGE_COLORS (1 << MAX_PAGE_COLORS_SHIFT)
#define MAX_PAGE_COLORS_SHIFT (5)
/*ARGSUSED*/
uint_t
page_pfn_2_color_cpu(pfn_t pfn, uchar_t szc, void *cookie)
{
uint_t color;
pfn = PFN_BASE(pfn, szc);
color = pfn ^ (pfn >> 20);
color = color ^ (color >> 10);
return ((color ^ (color >> 5)) & 0x1f);
}
/*
* this macro rotates value "x" n steps to the right
* mask consists of "n + m" bits
* ASSERT(x < (1 << (n + m));
*/
#define ROTATE_BITS(x, n, m) (((x) >> (n)) | (((x) & ((1 << (n)) - 1)) << m))
uchar_t clr2sqnclr_table[MMU_PAGE_SIZES][MAX_PAGE_COLORS];
/*
* on Rock, the hash cache index is calculated as follows:
* pa[47:43]^pa[42:38]^pa[37:33]^pa[32:28]^
* pa[27:23]^pa[22:18]^pa[17:13].pa[12:6]
* That is, every 5 bits is folded and XORd together. Page sizes
* differ by 3 bits, which is a factor of 8. This function computes
* the next sequential color by rotating by 3 steps within a field of 5 bits
* for every page size.
*/
void
clr2sqnclr_table_init()
{
uchar_t szc;
uint_t color;
uint_t rot = 0;
for (szc = 0; szc < MMU_PAGE_SIZES; szc++) {
rot = (szc * 3) % MAX_PAGE_COLORS_SHIFT;
for (color = 0; color < MAX_PAGE_COLORS; color++) {
clr2sqnclr_table[szc][color] =
ROTATE_BITS(color, rot,
(MAX_PAGE_COLORS_SHIFT - rot));
}
}
}
uint_t
clr2sqnclr(uchar_t szc, uint_t color)
{
ASSERT(szc < MMU_PAGE_SIZES);
ASSERT(color < MAX_PAGE_COLORS);
return (clr2sqnclr_table[szc][color]);
}
#if MMU_PAGE_SIZES > 8
#error MMU_PAGE_SIZES can be at most 8
#endif
uint_t
page_get_nsz_color_mask_cpu(uchar_t szc, uint_t mask)
{
static uint_t rock_color_masks[7] = {0x18, 6, 0x11, 0xc, 3, 0x18, 6};
ASSERT(szc < MMU_PAGE_SIZES - 1);
return (mask & rock_color_masks[szc]);
}
/*ARGSUSED*/
uint_t
page_get_nsz_color_cpu(uchar_t szc, uint_t color)
{
return (color);
}
uint_t
page_get_color_shift_cpu(uchar_t szc, uchar_t nszc)
{
ASSERT(nszc >= szc);
return (0);
}
/*ARGSUSED*/
pfn_t
page_next_pfn_for_color_cpu(pfn_t pfn, uchar_t szc, uint_t color,
uint_t ceq_mask, uint_t color_mask, void *cookie)
{
uint_t sqn_ceq_mask = clr2sqnclr(szc, ceq_mask);
uint_t sqn_color = clr2sqnclr(szc, color);
uint_t pfn_shift = PNUM_SHIFT(szc);
pfn_t cpfn, npfn, base_pfn = pfn & (~(pfn_t)color_mask << pfn_shift);
uint_t base_sqn_color, nsqn_color, wrap = 0;
ASSERT((color & ~ceq_mask) == 0);
base_sqn_color = clr2sqnclr(szc,
page_pfn_2_color_cpu(base_pfn, szc, NULL)) ^ sqn_color;
nsqn_color = base_sqn_color;
cpfn = (pfn_t)-1L;
do {
npfn = base_pfn | (nsqn_color << pfn_shift);
ASSERT(((page_pfn_2_color_cpu(npfn, szc, NULL) ^ color) &
ceq_mask) == 0);
if (npfn > pfn && npfn < cpfn)
cpfn = npfn;
nsqn_color = INC_MASKED(nsqn_color, sqn_ceq_mask, color_mask);
if (nsqn_color != base_sqn_color)
continue;
if (cpfn != (pfn_t)-1L)
break;
base_pfn += ((pfn_t)color_mask + 1) << pfn_shift;
base_sqn_color = clr2sqnclr(szc,
page_pfn_2_color_cpu(base_pfn, szc, NULL)) ^ sqn_color;
nsqn_color = base_sqn_color;
wrap++;
} while (nsqn_color != base_sqn_color || wrap < 2);
ASSERT(cpfn != (pfn_t)-1L);
return (cpfn);
}
void
page_coloring_init_cpu()
{
int i;
uint_t colors = 1 << MAX_PAGE_COLORS_SHIFT;
for (i = 0; i < mmu_page_sizes; i++) {
hw_page_array[i].hp_colors = colors;
}
/*
* initialise conversion table between page colors and
* sequential colors
*/
clr2sqnclr_table_init();
}
/*
* group colorequiv colors on Rock by low order bits of the color first
*/
void
page_set_colorequiv_arr_cpu(void)
{
static uint_t nequiv_shades_log2[MMU_PAGE_SIZES] = {0, 3, 0, 0, 0, 0};
if (colorequiv > 1) {
int i;
uint_t sv_a = lowbit(colorequiv) - 1;
if (sv_a > 15)
sv_a = 15;
for (i = 0; i < MMU_PAGE_SIZES; i++) {
uint_t colors;
uint_t a = sv_a;
if ((colors = hw_page_array[i].hp_colors) <= 1)
continue;
while ((colors >> a) == 0)
a--;
if (a > (colorequivszc[i] & 0xf) +
(colorequivszc[i] >> 4)) {
if (a <= nequiv_shades_log2[i]) {
colorequivszc[i] = (uchar_t)a;
} else {
colorequivszc[i] =
((a - nequiv_shades_log2[i]) << 4) |
nequiv_shades_log2[i];
}
}
}
}
}