starcat.c revision 8c754b1b0941ce71249cc956888b3470525b995f
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/machparam.h>
#include <sys/mem_cage.h>
#include <sys/promimpl.h>
#include <sys/platform_module.h>
#include <sys/cpu_sgnblk_defs.h>
#include <sys/iosramio.h>
#include <sys/machsystm.h>
#include <sys/bootconf.h>
#include <sys/cheetahregs.h>
#include <sys/plat_ecc_unum.h>
#include <sys/plat_ecc_dimm.h>
#include <sys/kdi_impl.h>
#include <sys/iosramreg.h>
#include <sys/iosramvar.h>
/* Preallocation of spare tsb's for DR */
/* Set the maximum number of slot0 + slot1 boards. .. for DR */
/* Maximum number of cpus per board... for DR */
/* Maximum number of mem-units per board... for DR */
/* Maximum number of io-units (buses) per board... for DR */
/* Preferred minimum cage size (expressed in pages)... for DR */
/* Platform specific function to get unum information */
int (*p2get_mem_unum)(int, uint64_t, char *, int, int *);
/* Memory for fcode claims. 16k times # maximum possible schizos */
int efcode_size = EFCODE_SIZE;
/*
* The IOSRAM driver is loaded in load_platform_drivers() any cpu signature
* usage prior to that time will have not have a function to call.
*/
/*
* set_platform_max_ncpus should return the maximum number of CPUs that the
* platform supports. This function is called from check_cpus() to set the
* value of max_ncpus [see PSARC 1997/165 CPU Dynamic Reconfiguration].
* Data elements which are allocated based upon max_ncpus are all accessed
* via cpu_seqid and not physical IDs. Previously, the value of max_ncpus
* was being set to the largest physical ID, which led to boot problems on
* systems with less than 1.25GB of memory.
*/
int
set_platform_max_ncpus(void)
{
int n;
/*
* Convert number of slot0 + slot1 boards to number of expander brds
* and constrain the value to an architecturally plausible range
*/
/* return maximum number of cpus possible on N expander boards */
return (n * STARCAT_BDSET_CPU_MAX - STARCAT_SLOT1_CPU_MAX);
}
int
{
}
#pragma weak mmu_init_large_pages
void
set_platform_defaults(void)
{
extern char *tod_module_name;
extern int ts_dispatch_extended;
extern int tsb_lgrp_affinity;
extern int segkmem_reloc;
extern void mmu_init_large_pages(size_t);
extern int ncpunode; /* number of CPUs detected by OBP */
#ifdef DEBUG
ce_verbose_memory = 2;
ce_verbose_other = 2;
#endif
/* Set the CPU signature function pointer */
/* Set appropriate tod module for starcat */
tod_module_name = "todstarcat";
/*
* Use the alternate TS dispatch table, which is better
* tuned for large servers.
*/
if (ts_dispatch_extended == -1)
ts_dispatch_extended = 1;
/*
* Use lgroup-aware TSB allocations on this platform,
* since they are a considerable performance win.
*/
tsb_lgrp_affinity = 1;
if ((mmu_page_sizes == max_mmu_page_sizes) &&
(mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) {
if (&mmu_init_large_pages)
}
/*
* KPR (kernel page relocation) is supported on this platform.
*/
segkmem_reloc = 1;
} else {
}
}
#ifdef DEBUG
#endif
void
set_platform_cage_params(void)
{
extern pgcnt_t total_pages;
extern struct memlist *phys_avail;
if (kernel_cage_enable) {
#ifdef DEBUG
#endif
/*
* Note: we are assuming that post has load the
* whole show in to the high end of memory. Having
* taken this leap, we copy the whole of phys_avail
* the glist and arrange for the cage to grow
* downward (descending pfns).
*/
}
if (kcage_on)
else
}
void
load_platform_modules(void)
{
}
}
/*
* Starcat does not support power control of CPUs from the OS.
*/
/*ARGSUSED*/
int
{
(int (*)(struct cpu *))kobj_getsymvalue(
"drmach_cpu_poweron", 0);
if (starcat_cpu_poweron == NULL)
return (ENOTSUP);
else
return ((starcat_cpu_poweron)(cp));
}
/*ARGSUSED*/
int
{
(int (*)(struct cpu *))kobj_getsymvalue(
"drmach_cpu_poweroff", 0);
if (starcat_cpu_poweroff == NULL)
return (ENOTSUP);
else
return ((starcat_cpu_poweroff)(cp));
}
/*
* The following are currently private to Starcat DR
*/
int
{
return (starcat_boards);
}
int
{
return (starcat_cpu_per_board);
}
int
{
return (starcat_mem_per_board); /* each CPU has a memory controller */
}
int
{
return (starcat_mem_per_board);
}
int
{
return (starcat_io_per_board);
}
int
plat_max_cpumem_boards(void)
{
return (STARCAT_BDSET_MAX);
}
int
{
return (pfn >> mem_node_pfn_shift);
}
/* ARGSUSED */
void
{
/*
* Starcat mem slices are always aligned on a 128GB boundary,
* fixed, and limited to one slice per expander due to design
* of the centerplane ASICs.
*/
mem_node_physalign = 0;
/*
* Boot install lists are arranged <addr, len>, <addr, len>, ...
*/
}
}
/*
* Find the CPU associated with a slice at boot-time.
*/
void
{
int len;
int local_mc;
int portid;
int expnum;
int i;
/*
* Memory address decoding registers
* (see Chap 9 of SPARCV9 JSP-1 US-III implementation)
*/
0x400028, 0x400010, 0x400018, 0x400020
};
/*
* Starcat memory controller portid == global CPU id
*/
(portid == -1))
return;
/*
* The "reg" property returns 4 32-bit values. The first two are
* combined to form a 64-bit address. The second two are for a
* 64-bit size, but we don't actually need to look at that value.
*/
prom_printf("Warning: malformed 'reg' property\n");
return;
}
return;
/*
* Figure out whether the memory controller we are examining
*/
local_mc = 1;
else
local_mc = 0;
for (i = 0; i < MAX_BANKS_PER_MC; i++) {
mask = mc_decode_addr[i];
/*
* If the memory controller is local to this CPU, we use
* the special ASI to read the decode registers.
* Otherwise, we load the values from a magic address in
* I/O space.
*/
if (local_mc)
else
if (mc_decode[i] >> MC_VALID_SHIFT) {
if (sliceid < max_mem_nodes) {
/*
* Establish start-of-day mappings of
* lgroup platform handles to memnodes.
* Handle == Expander Number
* Memnode == Fixed 128GB Slice
*/
sliceid);
}
}
}
}
/*
* Starcat support for lgroups.
*
* On Starcat, an lgroup platform handle == expander number.
* For split-slot configurations (e.g. slot 0 and slot 1 boards
* in different domains) an MCPU board has only remote memory.
*
* The centerplane logic provides fixed 128GB memory slices
* each of which map to a memnode. The initial mapping of
* memnodes to lgroup handles is determined at boot time.
* A DR addition of memory adds a new mapping. A DR copy-rename
* swaps mappings.
*/
/*
* Convert board number to expander number.
*/
#define BOARDNUM_2_EXPANDER(b) (b >> 1)
/*
* Return the number of boards configured with NULL LPA.
*/
static int
check_for_null_lpa(void)
{
/*
* Read GDCD from IOSRAM.
* If this fails indicate a NULL LPA condition.
*/
return (EXP_COUNT+1);
return (EXP_COUNT+2);
}
/*
* Check for NULL LPAs on all slot 0 boards in domain
* (i.e. in all expanders marked good for this domain).
*/
nlpa = 0;
nlpa++;
}
return (nlpa);
}
/*
* Return the platform handle for the lgroup containing the given CPU
*
* For Starcat, lgroup platform handle == expander.
*/
extern int mpo_disabled;
extern lgrp_handle_t lgrp_default_handle;
int null_lpa_boards = -1;
{
/*
* Return the real platform handle for the CPU until
* such time as we know that MPO should be disabled.
* At that point, we set the "mpo_disabled" flag to true,
* and from that point on, return the default handle.
*
* By the time we know that MPO should be disabled, the
* first CPU will have already been added to a leaf
* lgroup, but that's ok. The common lgroup code will
* double check that the boot CPU is in the correct place,
* and in the case where mpo should be disabled, will move
* it to the root if necessary.
*/
if (mpo_disabled) {
/* If MPO is disabled, return the default (UMA) handle */
} else {
if (null_lpa_boards > 0) {
/* Determine if MPO should be disabled */
mpo_disabled = 1;
}
}
return (plathand);
}
/*
* Platform specific lgroup initialization
*/
void
plat_lgrp_init(void)
{
extern uint32_t lgrp_expand_proc_thresh;
extern uint32_t lgrp_expand_proc_diff;
/*
* Set tuneables for Starcat architecture
*
* lgrp_expand_proc_thresh is the minimum load on the lgroups
* this process is currently running on before considering
* expanding threads to another lgroup.
*
* lgrp_expand_proc_diff determines how much less the remote lgroup
* must be loaded before expanding to it.
*
* Since remote latencies can be costly, attempt to keep 3 threads
* within the same lgroup before expanding to the next lgroup.
*/
}
/*
* Platform notification of lgroup (re)configuration changes
*/
/*ARGSUSED*/
void
{
if (mpo_disabled)
return;
switch (evt) {
case LGRP_CONFIG_MEM_ADD:
/*
* Establish the lgroup handle to memnode translation.
*/
break;
case LGRP_CONFIG_MEM_DEL:
/* We don't have to do anything */
break;
case LGRP_CONFIG_MEM_RENAME:
/*
* During a DR copy-rename operation, all of the memory
* on one board is moved to another board -- but the
* the memory has changed locations without changing identity.
*
* Source is where we are copying from and target is where we
* are copying to. After source memnode is copied to target
* memnode, the physical addresses of the target memnode are
* renamed to match what the source memnode had. Then target
* memnode can be removed and source memnode can take its
* place.
*
* To do this, swap the lgroup handle to memnode mappings for
* the boards, so target lgroup will have source memnode and
* source lgroup will have empty target memnode which is where
* its memory will go (if any is added to it later).
*
* Then source memnode needs to be removed from its lgroup
* and added to the target lgroup where the memory was living
* target memnode and now lives in the source memnode with
* different physical addresses even though it is the same
* memory.
*/
/*
* Remove source memnode of copy rename from its lgroup
* and add it to its new target lgroup
*/
break;
default:
break;
}
}
/*
* Return latency between "from" and "to" lgroups
*
* This latency number can only be used for relative comparison
* between lgroups on the running system, cannot be used across platforms,
* and may not reflect the actual latency. It is platform and implementation
* specific, so platform gets to decide its value. It would be nice if the
* number was at least proportional to make comparisons more meaningful though.
* NOTE: The numbers below are supposed to be load latencies for uncached
* memory divided by 10.
*/
int
{
/*
* Return min remote latency when there are more than two lgroups
* (root and child) and getting latency between two different lgroups
* or root is involved
*/
return (48);
else
return (28);
}
/*
* Return platform handle for root lgroup
*/
plat_lgrp_root_hand(void)
{
if (mpo_disabled)
return (lgrp_default_handle);
return (LGRP_DEFAULT_HANDLE);
}
/* ARGSUSED */
void
{
}
void
load_platform_drivers(void)
{
char chosen_iosram[MAXNAMELEN];
/*
* Get /chosen node - that's where the tunnel property is
*/
nodeid = prom_chosennode();
/*
* Get the iosram property from the chosen node.
*/
prom_printf("Unable to get iosram property\n");
}
sizeof (chosen_iosram)) < 0) {
(void) prom_printf("prom_phandle_to_path(0x%x) failed\n",
tunnel);
tunnel);
}
/*
* Attach all driver instances along the iosram's device path
*/
}
(void) prom_printf("e_ddi_hold_devi_by_path(%s) failed\n",
}
/*
* iosram driver is now loaded so we need to set our read and
* write pointers.
*/
modgetsymvalue("iosram_rd", 0);
modgetsymvalue("iosram_wr", 0);
/*
* Need to check for null proc LPA after IOSRAM driver is loaded
* and before multiple lgroups created (when start_other_cpus() called)
*/
/* load and attach the axq driver */
}
/* load Starcat Solaris Mailbox Client driver */
}
/* load the DR driver */
}
/*
* Load the mc-us3 memory driver.
*/
else
/* Load the schizo pci bus nexus driver. */
}
/*
* No platform drivers on this platform
*/
char *platform_module_list[] = {
(char *)0
};
/*ARGSUSED*/
void
{
}
/*
* Update the signature(s) in the IOSRAM's domain data section.
*/
void
{
/*
* If the substate is REBOOT, then check for panic flow
*/
if (sub_state == SIGSUBST_REBOOT) {
}
/*
* cpuid == -1 indicates that the operation applies to all cpus.
*/
if (cpuid < 0) {
return;
}
(*iosram_wrp)(DOMD_MAGIC,
/*
* Under certain conditions we don't update the signature
* of the domain_state.
*/
return;
}
/*
* Update the signature(s) in the IOSRAM's domain data section for all CPUs.
*/
void
{
int i = 0;
/*
* First update the domain_state signature
*/
for (i = 0; i < NCPU; i++) {
(CPU_EXISTS|CPU_QUIESCED))) {
(*iosram_wrp)(DOMD_MAGIC,
DOMD_CPUSIGS_OFFSET + i * sizeof (sig_state_t),
}
}
}
get_cpu_sgn(int cpuid)
{
(*iosram_rdp)(DOMD_MAGIC,
}
get_cpu_sgn_state(int cpuid)
{
(*iosram_rdp)(DOMD_MAGIC,
}
/*
* Type of argument passed into plat_get_ecache_cpu via ddi_walk_devs
* for matching on specific CPU node in device tree
*/
typedef struct {
char *jnum; /* output, kmem_alloc'd if successful */
int cpuid; /* input, to match cpuid/portid/upa-portid */
/*
* plat_get_ecache_cpu is called repeatedly by ddi_walk_devs with pointers
* to device tree nodes (dip) and to a plat_ecache_cpu_arg_t structure (arg).
* Returning DDI_WALK_CONTINUE tells ddi_walk_devs to keep going, returning
* DDI_WALK_TERMINATE ends the walk. When the node for the specific CPU
* being searched for is found, the walk is done. But before returning to
* ddi_walk_devs and plat_get_ecacheunum, we grab this CPU's ecache-dimm-label
* property and set the jnum member of the plat_ecache_cpu_arg_t structure to
* point to the label corresponding to this specific ecache DIMM. It is up
* to plat_get_ecacheunum to kmem_free this string.
*/
static int
{
char *devtype;
char **dimm_labels;
int portid;
/*
* Check device_type, must be "cpu"
*/
"device_type", &devtype)
!= DDI_PROP_SUCCESS)
return (DDI_WALK_CONTINUE);
ddi_prop_free((void *)devtype);
return (DDI_WALK_CONTINUE);
}
ddi_prop_free((void *)devtype);
/*
* Check cpuid, portid, upa-portid (in that order), must
* match the cpuid being sought
*/
if (portid == -1)
if (portid == -1)
return (DDI_WALK_CONTINUE);
/*
* Found the right CPU, fetch ecache-dimm-label property
*/
!= DDI_PROP_SUCCESS) {
#ifdef DEBUG
portid);
#endif /* DEBUG */
return (DDI_WALK_TERMINATE);
}
KM_SLEEP);
#ifdef DEBUG
else
"cannot kmem_alloc for ecache dimm label");
#endif /* DEBUG */
}
ddi_prop_free((void *)dimm_labels);
return (DDI_WALK_TERMINATE);
}
/*
* Bit 4 of physical address indicates ecache 0 or 1
*/
#define ECACHE_DIMM_SHIFT 4
#define ECACHE_DIMM_MASK 0x10
/*
* plat_get_ecacheunum is called to generate the unum for an ecache error.
* After some initialization, nearly all of the work is done by ddi_walk_devs
* and plat_get_ecache_cpu.
*/
int
{
/*
* Walk the device tree, find this specific CPU, and get the label
* for this ecache, returned here in findcpu.jnum
*/
return (-1);
/*
* STARCAT_CPUID_TO_PORTID clears the CoreID bit so that
* STARCAT_CPUID_TO_AGENT will return a physical proc (0 - 3).
*/
/*
* NOTE: Any modifications to the snprintf() call below will require
* changing plat_log_fruid_error() as well!
*/
return (0);
}
/*ARGSUSED*/
int
{
int ret;
/*
* check if it's a Memory or an Ecache error.
*/
if (flt_in_memory) {
if (p2get_mem_unum != NULL) {
} else {
return (ENOTSUP);
}
} else if (flt_status & ECC_ECACHE) {
return (EIO);
} else {
return (ENOTSUP);
}
return (ret);
}
/*
* To keep OS mailbox handling localized, all we do is forward the call to the
* scosmb module (if it is available).
*/
int
{
/*
* find the symbol for the mailbox sender routine in the scosmb module
*/
if (ecc_mailbox_msg_func == NULL)
ecc_mailbox_msg_func = (int (*)(plat_ecc_message_type_t,
void *))modgetsymvalue("scosmb_log_ecc_error", 0);
/*
* If the symbol was found, call it. Otherwise, there is not much
* else we can do and console messages will have to suffice.
*/
if (ecc_mailbox_msg_func)
else
return (ENODEV);
}
int
{
}
/*
* board number for a given proc
*/
int
{
return (STARCAT_CPUID_TO_EXPANDER(proc));
}
/*
* This platform hook gets called from mc_add_mem_unum_label() in the mc-us3
* driver giving each platform the opportunity to add platform
* specific label information to the unum for ECC error logging purposes.
*/
void
{
char new_unum[UNUM_NAMLEN];
/*
* STARCAT_CPUID_TO_PORTID clears the CoreID bit so that
* STARCAT_CPUID_TO_AGENT will return a physical proc (0 - 3).
*/
/*
* NOTE: Any modifications to the two sprintf() calls below will
* require changing plat_log_fruid_error() as well!
*/
if (dimm == -1)
else
}
int
{
>= buflen) {
return (ENOSPC);
} else {
return (0);
}
}
/*
* This routine is used by the data bearing mondo (DMV) initialization
* routine to determine the number of hardware and software DMV interrupts
* that a platform supports.
*/
void
{
*swint = 0;
}
/*
* If provided, this function will be called whenever the nodename is updated.
* To keep OS mailbox handling localized, all we do is forward the call to the
* scosmb module (if it is available).
*/
void
plat_nodename_set(void)
{
/*
* find the symbol for the nodename update routine in the scosmb module
*/
nodename_update_func = (void (*)(uint64_t))
modgetsymvalue("scosmb_update_nodename", 0);
/*
* If the symbol was found, call it. Otherwise, log a note (but not to
* the console).
*/
if (nodename_update_func != NULL) {
} else {
"!plat_nodename_set: scosmb_update_nodename not found\n");
}
}
/*
* Preallocate enough memory for fcode claims.
*/
{
/*
* allocate the physical memory schizo fcode.
*/
return (efcode_alloc_base + efcode_size);
}
{
return (tmp_alloc_base);
}
/*
* This is a helper function to determine if a given
* node should be considered for a dr operation according
* to predefined dr names. This is accomplished using
* a function defined in drmach module. The drmach module
* owns the definition of dr allowable names.
* Formal Parameter: The name of a device node.
* Expected Return Value: -1, device node name does not map to a valid dr name.
* A value greater or equal to 0, name is valid.
*/
int
starcat_dr_name(char *name)
{
int (*drmach_name2type)(char *) = NULL;
/* Get a pointer to helper function in the dramch module. */
(int (*)(char *))kobj_getsymvalue("drmach_name2type_idx", 0);
if (drmach_name2type == NULL)
return (-1);
return ((*drmach_name2type)(name));
}
void
startup_platform(void)
{
}
/*
* KDI functions - used by the in-situ kernel debugger (kmdb) to perform
* platform-specific operations. These functions execute when the world is
* stopped, and as such cannot make any blocking calls, hold locks, etc.
* promif functions are a special case, and may be used.
*/
static void
starcat_system_claim(void)
{
}
static void
starcat_system_release(void)
{
}
void
{
}
/*
* This function returns 1 if large pages for kernel heap are supported
* and 0 otherwise.
*
* Currently we disable lp kmem support if kpr is going to be enabled
* because in the case of large pages hat_add_callback()/hat_delete_callback()
* cause network performance degradation
*/
int
plat_lpkmem_is_supported(void)
{
extern int segkmem_reloc;
if (hat_kpr_enabled && kernel_cage_enable &&
return (0);
return (1);
}