serengeti.c revision 85f5803819bea86c07827a9544494e4ad327d95d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/dditypes.h>
#include <sys/ddipropdefs.h>
#include <sys/ddi_impldefs.h>
#include <sys/platform_module.h>
#include <sys/promimpl.h>
#include <sys/prom_plat.h>
#include <sys/sysmacros.h>
#include <sys/mem_cage.h>
#include <sys/cpu_sgnblk_defs.h>
#include <sys/kdi_impl.h>
#include <sys/sgsbbc_iosram.h>
#include <sys/sgsbbc_iosram_priv.h>
#include <sys/sgsbbc_mailbox.h>
#include <sys/serengeti.h>
#include <sys/sgfrutypes.h>
#include <sys/machsystm.h>
#include <sys/sbd_ioctl.h>
#include <sys/sbdp_mem.h>
#include <sys/cheetahregs.h>
#include <sys/plat_ecc_unum.h>
#include <sys/plat_ecc_dimm.h>
static int sg_debug = 0;
#ifdef DEBUG
#else
#define DCMNERR
#endif
int (*p2get_mem_unum)(int, uint64_t, char *, int, int *);
/* local functions */
/*
* Local data.
*
* iosram_write_ptr is a pointer to iosram_write(). Because of
* kernel dynamic linking, we can't get to the function by name,
* but we can look up its address, and store it in this variable
* instead.
*
* We include the extern for iosram_write() here not because we call
* it, but to force compilation errors if its prototype doesn't
* match the prototype of iosram_write_ptr.
*
* The same issues apply to iosram_read() and iosram_read_ptr.
*/
/*CSTYLED*/
/*CSTYLED*/
/*
* Variable to indicate if the date should be obtained from the SC or not.
*/
/*
* Preallocation of spare tsb's for DR
*
* We don't allocate spares for Wildcat since TSBs should come
* out of memory local to the node.
*/
#define IOMMU_PER_SCHIZO 2
/*
* sg_max_ncpus is the maximum number of CPUs supported on Serengeti.
* sg_max_ncpus is set to be smaller than NCPU to reduce the amount of
* memory the logs take up until we have a dynamic log memory allocation
* solution.
*/
/*
* variables to control mailbox message timeouts.
*/
/* cached 'chosen' node_id */
/*
* Table that maps memory slices to a specific memnode.
*/
int slice_to_memnode[SG_MAX_SLICE];
int
{
}
#pragma weak mmu_init_large_pages
void
set_platform_defaults(void)
{
extern int watchdog_enable;
extern uint64_t xc_tick_limit_scale;
extern void mmu_init_large_pages(size_t);
#ifdef DEBUG
char *todsg_name = "todsg";
ce_verbose_memory = 2;
ce_verbose_other = 2;
#endif /* DEBUG */
#ifdef DEBUG
/* tod_module_name should be set to "todsg" from OBP property */
prom_printf("Using todsg driver\n");
else {
prom_printf("Force using todsg driver\n");
}
#endif /* DEBUG */
/* Serengeti does not support forthdebug */
forthdebug_supported = 0;
/*
* Some DR operations require the system to be sync paused.
* Sync pause on Serengeti could potentially take up to 4
* seconds to complete depending on the load on the SC. To
* avoid send_mond panics during such operations, we need to
* increase xc_tick_limit to a larger value on Serengeti by
* setting xc_tick_limit_scale to 5.
*/
xc_tick_limit_scale = 5;
if ((mmu_page_sizes == max_mmu_page_sizes) &&
(mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) {
if (&mmu_init_large_pages)
}
}
void
load_platform_modules(void)
{
}
}
/*ARGSUSED*/
int
{
if (serengeti_cpu_poweron == NULL)
return (ENOTSUP);
else
return ((serengeti_cpu_poweron)(cp));
}
/*ARGSUSED*/
int
{
if (serengeti_cpu_poweroff == NULL)
return (ENOTSUP);
else
return ((serengeti_cpu_poweroff)(cp));
}
#ifdef DEBUG
#endif
/* Preferred minimum cage size (expressed in pages)... for DR */
void
set_platform_cage_params(void)
{
extern pgcnt_t total_pages;
extern struct memlist *phys_avail;
if (kernel_cage_enable) {
#ifdef DEBUG
#endif
/*
* Post copies obp into the lowest slice. This requires the
* cage to grow upwards
*/
}
/* Only note when the cage is off since it should always be on. */
if (!kcage_on)
}
void
{
int mnode;
/*
* First see if this board already has a memnode associated
* with it. If not, see if this slice has a memnode. This
* covers the cases where a single slice covers multiple
* boards (cross-board interleaving) and where a single
* board has multiple slices (1+GB DIMMs).
*/
mnode = mem_node_alloc();
}
/*
* Align base at 16GB boundary
*/
}
}
/*
* Dynamically detect memory slices in the system by decoding
* the cpu memory decoder registers at boot time.
*/
void
{
int len;
int local_mc;
int portid;
int boardid;
int i;
(portid == -1))
return;
/*
* Decode the board number from the MC portid
*/
/*
* The "reg" property returns 4 32-bit values. The first two are
* combined to form a 64-bit address. The second two are for a
* 64-bit size, but we don't actually need to look at that value.
*/
prom_printf("Warning: malformed 'reg' property\n");
return;
}
return;
/*
* Figure out whether the memory controller we are examining
* belongs to this CPU or a different one.
*/
local_mc = 1;
else
local_mc = 0;
for (i = 0; i < SG_MAX_BANKS_PER_MC; i++) {
mask = SG_REG_2_OFFSET(i);
/*
* If the memory controller is local to this CPU, we use
* the special ASI to read the decode registers.
* Otherwise, we load the values from a magic address in
* I/O space.
*/
if (local_mc)
else
if (mc_decode[i] >> MC_VALID_SHIFT) {
/*
* The memory decode register is a bitmask field,
* so we can decode that into both a base and
* a span.
*/
}
}
}
/*
* This routine is run midway through the boot process. By the time we get
* here, we know about all the active CPU boards in the system, and we have
* extracted information about each board's memory from the memory
* controllers. We have also figured out which ranges of memory will be
* assigned to which memnodes, so we walk the slice table to build the table
* of memnodes.
*/
/* ARGSUSED */
void
{
int slice;
continue;
}
}
int
{
int node;
return (node);
}
/*
* Serengeti support for lgroups.
*
* On Serengeti, an lgroup platform handle == board number.
*
* Mappings between lgroup handles and memnodes are managed
* in addition to mappings between memory slices and memnodes
* to support cross-board interleaving as well as multiple
* slices per board (e.g. >1GB DIMMs). The initial mapping
* of memnodes to lgroup handles is determined at boot time.
* A DR addition of memory adds a new mapping. A DR copy-rename
* swaps mappings.
*/
/*
* Macro for extracting the board number from the CPU id
*/
/*
* Return the platform handle for the lgroup containing the given CPU
*
* For Serengeti, lgroup platform handle == board number
*/
{
return (CPUID_TO_BOARD(id));
}
/*
* Platform specific lgroup initialization
*/
void
plat_lgrp_init(void)
{
int i;
extern uint32_t lgrp_expand_proc_thresh;
extern uint32_t lgrp_expand_proc_diff;
/*
* Initialize lookup tables to invalid values so we catch
* any illegal use of them.
*/
for (i = 0; i < SG_MAX_SLICE; i++) {
slice_to_memnode[i] = -1;
}
/*
* Set tuneables for Serengeti architecture
*
* lgrp_expand_proc_thresh is the minimum load on the lgroups
* this process is currently running on before considering
* expanding threads to another lgroup.
*
* lgrp_expand_proc_diff determines how much less the remote lgroup
* must be loaded before expanding to it.
*
* Bandwidth is maximized on Serengeti by spreading load across
* the machine. The impact to inter-thread communication isn't
* too costly since remote latencies are relatively low. These
* values equate to one CPU's load and so attempt to spread the
* load out across as many lgroups as possible one CPU at a time.
*/
}
/*
* Platform notification of lgroup (re)configuration changes
*/
/*ARGSUSED*/
void
{
switch (evt) {
case LGRP_CONFIG_MEM_ADD:
break;
case LGRP_CONFIG_MEM_DEL:
/* We don't have to do anything */
break;
case LGRP_CONFIG_MEM_RENAME:
/*
* During a DR copy-rename operation, all of the memory
* on one board is moved to another board -- but the
* the memory has changed locations without changing identity.
*
* Source is where we are copying from and target is where we
* are copying to. After source memnode is copied to target
* memnode, the physical addresses of the target memnode are
* renamed to match what the source memnode had. Then target
* memnode can be removed and source memnode can take its
* place.
*
* To do this, swap the lgroup handle to memnode mappings for
* the boards, so target lgroup will have source memnode and
* source lgroup will have empty target memnode which is where
* its memory will go (if any is added to it later).
*
* Then source memnode needs to be removed from its lgroup
* and added to the target lgroup where the memory was living
* target memnode and now lives in the source memnode with
* different physical addresses even though it is the same
* memory.
*/
/*
* Remove source memnode of copy rename from its lgroup
* and add it to its new target lgroup
*/
break;
default:
break;
}
}
/*
* Return latency between "from" and "to" lgroups
*
* This latency number can only be used for relative comparison
* between lgroups on the running system, cannot be used across platforms,
* and may not reflect the actual latency. It is platform and implementation
* specific, so platform gets to decide its value. It would be nice if the
* number was at least proportional to make comparisons more meaningful though.
* NOTE: The numbers below are supposed to be load latencies for uncached
* memory divided by 10.
*/
int
{
/*
* Return min remote latency when there are more than two lgroups
* (root and child) and getting latency between two different lgroups
* or root is involved
*/
return (28);
else
return (23);
}
/* ARGSUSED */
void
{
}
/*
* Find dip for chosen IOSRAM
*/
find_chosen_dip(void)
{
char master_sbbc[MAXNAMELEN];
/*
* find the /chosen SBBC node, prom interface will handle errors
*/
nodeid = prom_chosennode();
/*
* get the 'iosram' property from the /chosen node
*/
}
sizeof (master_sbbc)) < 0) {
tunnel);
}
/*
* load and attach the sgsbbc driver.
* This will also attach all the sgsbbc driver instances
*/
}
/* translate a path name to a dev_info_t */
tunnel);
}
/* make sure devi_ref is ZERO */
return (dip);
}
void
load_platform_drivers(void)
{
int ret;
/*
* Load and attach the mc-us3 memory driver.
*/
else
/*
* Initialize the chosen IOSRAM before its clients
* are loaded.
*/
(void) find_chosen_dip();
/*
* Ideally, we'd do this in set_platform_defaults(), but
* at that point it's too early to look up symbols.
*/
modgetsymvalue("iosram_write", 0);
if (iosram_write_ptr == NULL) {
" not found; signatures will not be updated\n");
} else {
/*
* The iosram read ptr is only needed if we can actually
* write CPU signatures, so only bother setting it if we
* set a valid write pointer, above.
*/
modgetsymvalue("iosram_read", 0);
if (iosram_read_ptr == NULL)
" not found\n");
}
/*
* Set todsg_use_sc to TRUE so that we will be getting date
* from the SC.
*/
todsg_use_sc = TRUE;
/*
* Now is a good time to activate hardware watchdog (if one exists).
*/
if (watchdog_enable)
if (ret != 0)
printf("Hardware watchdog enabled\n");
/*
* Load and attach the schizo pci bus nexus driver.
*/
}
/*
* No platform drivers on this platform
*/
char *platform_module_list[] = {
(char *)0
};
/*ARGSUSED*/
void
{
}
int
{
return (SG_MAX_BDS);
}
int
{
return (SG_MAX_IO_PER_BD);
}
int
{
return (SG_MAX_CMPS_PER_BD);
}
int
{
return (SG_MAX_CPUS_PER_BD);
}
int
{
return (SG_MAX_CMPS_PER_BD); /* each CPU die has a memory controller */
}
int
{
return (SG_MAX_MEM_PER_BD);
}
int
plat_max_cpumem_boards(void)
{
return (SG_MAX_CPU_BDS);
}
int
set_platform_max_ncpus(void)
{
return (sg_max_ncpus);
}
void
{
*swint = 0;
}
/*
* Our nodename has been set, pass it along to the SC.
*/
void
plat_nodename_set(void)
{
int rv; /* return value from call to mbox */
struct nodename_info {
} nni;
/*
* find the symbol for the mailbox routine
*/
modgetsymvalue("sbbc_mbox_request_response", 0);
return;
}
/*
* construct the message telling the SC our nodename
*/
req.msg_status = 0;
/*
* initialize the response back from the SC
*/
resp.msg_status = 0;
/*
* ship it and check for success
*/
if (rv != 0) {
} else if (resp.msg_status != 0) {
} else {
/*
* It is necessary to exchange the capability bitmap
* with SC before sending any ecc error information and
* indictment. We are calling the plat_ecc_capability_send()
* here just after sending the nodename successfully.
*/
if (rv == 0) {
" successful\n");
}
}
}
/*
* flag to allow users switch between using OBP's
* prom_get_unum() and mc-us3 driver's p2get_mem_unum()
* (for main memory errors only).
*/
int sg_use_prom_get_unum = 0;
/*
* Debugging flag: set to 1 to call into obp for get_unum, or set it to 0
* to call into the unum cache system. This is the E$ equivalent of
* sg_use_prom_get_unum.
*/
int sg_use_prom_ecache_unum = 0;
/* used for logging ECC errors to the SC */
#define SG_MEMORY_ECC 1
#define SG_ECACHE_ECC 2
#define SG_UNKNOWN_ECC (-1)
/*
* plat_get_mem_unum() generates a string identifying either the
* memory or E$ DIMM(s) during error logging. Depending on whether
* the error is E$ or memory related, the appropriate support
* routine is called to assist in the string generation.
*
* - For main memory errors we can use the mc-us3 drivers p2getunum()
* (or prom_get_unum() for debugging purposes).
*
* - For E$ errors we call sg_get_ecacheunum() to generate the unum (or
* prom_serengeti_get_ecacheunum() for debugging purposes).
*/
static int
int *lenp)
{
return (EIO);
else if (*lenp <= 1)
return (EINVAL);
else
return (0);
}
/*ARGSUSED*/
int
{
/*
* unum_func will either point to the memory drivers p2get_mem_unum()
* or to prom_get_unum() for memory errors.
*/
/*
* check if it's a Memory or an Ecache error.
*/
if (flt_in_memory) {
/*
* It's a main memory error.
*
* For debugging we allow the user to switch between
* using OBP's get_unum and the memory driver's get_unum
* so we create a pointer to the functions and switch
* depending on the sg_use_prom_get_unum flag.
*/
if (sg_use_prom_get_unum) {
return (sg_prom_get_unum(synd_code,
} else {
return (ENOTSUP);
}
} else if (flt_status & ECC_ECACHE) {
/*
* It's an E$ error.
*/
if (sg_use_prom_ecache_unum) {
/*
* We call to OBP to handle this.
*/
"Using prom_serengeti_get_ecacheunum from OBP");
return (EIO);
}
} else {
}
} else {
return (ENOTSUP);
}
return (0);
}
/*
* This platform hook gets called from mc_add_mem_unum_label() in the mc-us3
* driver giving each platform the opportunity to add platform
* specific label information to the unum for ECC error logging purposes.
*/
void
{
/*
* The mc-us3 driver deals with logical banks but for unum
* purposes we need to use physical banks so that the correct
* dimm can be physically located. Logical banks 0 and 2
* make up physical bank 0. Logical banks 1 and 3 make up
* physical bank 1. Here we do the necessary conversion.
*/
if (dimm == -1) {
} else {
}
}
int
{
return (ENOSPC);
} else {
return (0);
}
}
/*
* We log all ECC events to the SC so we send a mailbox
* message to the SC passing it the relevant data.
* ECC mailbox messages are sent via a taskq mechanism to
* prevent impaired system performance during ECC floods.
* Indictments have already passed through a taskq, so they
* are not queued here.
*/
int
{
int sleep_flag, log_error;
if (sg_ecc_taskq_func == NULL) {
sg_ecc_taskq_func = (void (*)(sbbc_ecc_mbox_t *))
modgetsymvalue("sbbc_mbox_queue_ecc_event", 0);
if (sg_ecc_taskq_func == NULL) {
"sbbc_mbox_queue_ecc_event not found");
return (ENODEV);
}
}
if (sg_ecc_mbox_func == NULL) {
sg_ecc_mbox_func = (int (*)(sbbc_ecc_mbox_t *))
modgetsymvalue("sbbc_mbox_ecc_output", 0);
if (sg_ecc_mbox_func == NULL) {
"sbbc_mbox_ecc_output not found");
return (ENODEV);
}
}
/*
* Initialize the request and response structures
*/
switch (msg_type) {
case PLAT_ECC_ERROR_MESSAGE:
msg_size = sizeof (plat_ecc_error_data_t);
log_error = 1;
break;
case PLAT_ECC_ERROR2_MESSAGE:
msg_size = sizeof (plat_ecc_error2_data_t);
log_error = 1;
break;
msg_size = sizeof (plat_ecc_indictment_data_t);
log_error = 0;
break;
msg_size = sizeof (plat_ecc_indictment2_data_t);
log_error = 0;
break;
msg_size = sizeof (plat_capability_data_t) +
log_error = 0;
break;
msg_size = sizeof (plat_dimm_sid_request_data_t);
log_error = 0;
break;
default:
return (EINVAL);
}
"unable to allocate sbbc_ecc_mbox");
return (ENOMEM);
}
"unable to allocate request msg_buf");
return (ENOMEM);
}
/*
* initialize the response back from the SC
*/
switch (msg_type) {
case PLAT_ECC_ERROR_MESSAGE:
case PLAT_ECC_ERROR2_MESSAGE:
/*
* For Error Messages, we go through a taskq.
* Queue up the message for processing
*/
(*sg_ecc_taskq_func)(msgp);
return (0);
/*
* For indictment and capability messages, we've already gone
* through the taskq, so we can call the mailbox routine
* directly. Find the symbol for the routine that sends
* the mailbox msg
*/
/* FALLTHRU */
return ((*sg_ecc_mbox_func)(msgp));
sizeof (plat_dimm_sid_board_data_t), sleep_flag);
return ((*sg_ecc_mbox_func)(msgp));
default:
ASSERT(0);
return (EINVAL);
}
}
/*
* m is redundant on serengeti as the multiplier is always 4
*/
/*ARGSUSED*/
int
{
}
/*
* board number for a given proc
*/
int
{
return (SG_CPU_BD_PORTID_TO_BD_NUM(proc));
}
static
void
{
int i;
if (iosram_write_ptr == NULL) {
/*
* If the IOSRAM write pointer isn't set, we won't be able
* to write signatures to ANYTHING, so we may as well just
* write out an error message (if desired) and exit this
* routine now...
*/
"cpu_sgn_update: iosram_write() not found;"
" cannot write signature 0x%x for CPU(s) or domain\n",
return;
}
/*
* Differentiate a panic reboot from a non-panic reboot in the
* setting of the substate of the signature.
*
* If the new substate is REBOOT and we're rebooting due to a panic,
* then set the new substate to a special value indicating a panic
* reboot, SIGSUBST_PANIC_REBOOT.
*
* A panic reboot is detected by a current (previous) domain signature
* state of SIGST_EXIT, and a new signature substate of SIGSUBST_REBOOT.
* The domain signature state SIGST_EXIT is used as the panic flow
* progresses.
*
* At the end of the panic flow, the reboot occurs but we should now
* one that was involuntary, something that may be quite useful to know
* at OBP level.
*/
if (sub_state == SIGSUBST_REBOOT) {
if (iosram_read_ptr == NULL) {
"cpu_sgn_update: iosram_read() not found;"
" could not check current domain signature\n");
} else {
(void) (*iosram_read_ptr)(SBBC_SIGBLCK_KEY,
(char *)¤t_sgn, sizeof (current_sgn));
}
}
/*
* cpuid == -1 indicates that the operation applies to all cpus.
*/
if (cpuid >= 0) {
(void) (*iosram_write_ptr)(SBBC_SIGBLCK_KEY,
sizeof (signature));
} else {
for (i = 0; i < NCPU; i++) {
(CPU_EXISTS|CPU_QUIESCED))) {
continue;
}
(void) (*iosram_write_ptr)(SBBC_SIGBLCK_KEY,
SG_SGNBLK_CPUSIG_OFFSET(i), (char *)&signature,
sizeof (signature));
}
}
return;
}
(void) (*iosram_write_ptr)(SBBC_SIGBLCK_KEY,
SG_SGNBLK_DOMAINSIG_OFFSET, (char *)&signature,
sizeof (signature));
}
void
startup_platform(void)
{
}
/*
* A routine to convert a number (represented as a string) to
* the integer value it represents.
*/
static int
{
}
static int
{
int n;
int c, neg = 0;
if (!isdigit(c = *p)) {
while (isspace(c))
c = *++p;
switch (c) {
case '-':
neg++;
/* FALLTHROUGH */
case '+':
c = *++p;
}
if (!isdigit(c)) {
*pos = p;
return (0);
}
}
for (n = '0' - c; isdigit(c = *++p); ) {
n *= 10; /* two steps to avoid unnecessary overflow */
n += '0' - c; /* accum neg to avoid surprises at MAX */
}
*pos = p;
return (neg ? n : -n);
}
/*
* Get the three parts of the Serengeti PROM version.
* Used for feature readiness tests.
*
* Return 0 if version extracted successfully, -1 otherwise.
*/
int
{
int plen;
char vers[512];
static char version[] = "version";
if (node == OBP_BADNODE)
return (-1);
return (-1);
/* Make sure it's an OBP flashprom */
"unknown <version> string in </openprom>\n");
return (-1);
}
return (-1);
return (-1);
return (-1);
return (0);
}
/*
* Return 0 if system board Dynamic Reconfiguration
* is supported by the firmware, -1 otherwise.
*/
int
sg_prom_sb_dr_check(void)
{
static int prom_res = 1;
if (prom_res == 1) {
int rv;
prom_res = 0;
} else {
prom_res = -1;
}
}
return (prom_res);
}
/*
* Return 0 if cPCI Dynamic Reconfiguration
* is supported by the firmware, -1 otherwise.
*/
int
sg_prom_cpci_dr_check(void)
{
/*
* The version check is currently the same as for
* system boards. Since the two DR sub-systems are
* independent, this could change.
*/
return (sg_prom_sb_dr_check());
}
/*
* KDI functions - used by the in-situ kernel debugger (kmdb) to perform
* platform-specific operations. These functions execute when the world is
* stopped, and as such cannot make any blocking calls, hold locks, etc.
* promif functions are a special case, and may be used.
*/
/*
* Our implementation of this KDI op updates the CPU signature in the system
* controller. Note that we set the signature to OBP_SIG, rather than DBG_SIG.
* The Forth words we execute will, among other things, transform our OBP_SIG
* into DBG_SIG. They won't function properly if we try to use DBG_SIG.
*/
static void
sg_system_claim(void)
{
}
static void
sg_system_release(void)
{
}
static void
sg_console_claim(void)
{
}
static void
sg_console_release(void)
{
}
void
{
}