spitfire.c revision cf74e62b28857cc7cf88dd0a34d34df9a7e26fe8
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/archsystm.h>
#include <sys/machparam.h>
#include <sys/machsystm.h>
#include <sys/elf_SPARC.h>
#include <vm/hat_sfmmu.h>
#include <sys/spitregs.h>
#include <sys/dditypes.h>
#include <sys/cpu_module.h>
#include <sys/prom_debug.h>
#include <sys/prom_plat.h>
#include <sys/sysmacros.h>
#include <sys/machtrap.h>
#include <sys/ecc_kstat.h>
#include <sys/watchpoint.h>
#include <sys/errclassify.h>
uint_t cpu_impl_dual_pgsz = 0;
/*
* Structure for the 8 byte ecache data dump and the associated AFSR state.
* There will be 8 of these structures used to dump an ecache line (64 bytes).
*/
typedef struct sf_ec_data_elm {
} ec_data_t;
/*
*/
typedef struct spitfire_async_flt {
int flt_ec_lcnt; /* number of bad E$ lines */
/*
* Prototypes for support routines in spitfire_asm.s:
*/
extern uint64_t read_and_clear_afsr();
/*
* Spitfire module routines:
*/
static void cpu_async_log_err(void *flt);
/*PRINTFLIKE6*/
static void ecache_page_retire(void *);
static void add_leaky_bucket_timeout(void);
extern uint_t read_all_memscrub;
extern void memscrub_run(void);
/*
* Default ecache mask and shift settings for Spitfire. If we detect a
* different CPU implementation, we will modify these values at boot time.
*/
static int cpu_ec_par_shift = S_ECPAR_SHIFT;
static int cpu_ec_tag_shift = S_ECTAG_SHIFT;
static int cpu_ec_state_shift = S_ECSTATE_SHIFT;
/*
* Default ecache state bits for Spitfire. These individual bits indicate if
* the given line is in any of the valid or modified states, respectively.
* Again, we modify these at boot if we detect a different CPU.
*/
/*
* This table is used to determine which bit(s) is(are) bad when an ECC
* error occurrs. The array is indexed an 8-bit syndrome. The entries
* of this array have the following semantics:
*
* 00-63 The number of the bad bit, when only one bit is bad.
* 64 ECC bit C0 is bad.
* 65 ECC bit C1 is bad.
* 66 ECC bit C2 is bad.
* 67 ECC bit C3 is bad.
* 68 ECC bit C4 is bad.
* 69 ECC bit C5 is bad.
* 70 ECC bit C6 is bad.
* 71 ECC bit C7 is bad.
* 72 Two bits are bad.
* 73 Three bits are bad.
* 74 Four bits are bad.
* 75 More than Four bits are bad.
* 76 NO bits are bad.
* Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
*/
#define C0 64
#define C1 65
#define C2 66
#define C3 67
#define C4 68
#define C5 69
#define C6 70
#define C7 71
#define M2 72
#define M3 73
#define M4 74
#define MX 75
#define NA 76
static char ecc_syndrome_tab[] =
{
};
#define SYND_TBL_SIZE 256
/*
* Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
*/
#define UDBL_REG 0x8000
/*
* These error types are specific to Spitfire and are used internally for the
* spitfire fault structure flt_type field.
*/
#define CPU_UE_ERR 0 /* uncorrectable errors - UEs */
/*
* Macro to access the "Spitfire cpu private" data structure.
*/
/*
* set to 0 to disable automatic retiring of pages on
* DIMMs that have excessive soft errors
*/
int automatic_page_removal = 1;
/*
* Heuristic for figuring out which module to replace.
* Relative likelihood that this P_SYND indicates that this module is bad.
* We call it a "score", though, not a relative likelihood.
*
* Step 1.
* Assign a score to each byte of P_SYND according to the following rules:
* If no bits on (0x00) or all bits on (0xFF), then give it a 5.
* If one bit on, give it a 95.
* If seven bits on, give it a 10.
* If two bits on:
* in different nybbles, a 90
* in same nybble, but unaligned, 85
* in same nybble and as an aligned pair, 80
* If six bits on, look at the bits that are off:
* in same nybble and as an aligned pair, 15
* in same nybble, but unaligned, 20
* in different nybbles, a 25
* If three bits on:
* in diferent nybbles, no aligned pairs, 75
* in diferent nybbles, one aligned pair, 70
* in the same nybble, 65
* If five bits on, look at the bits that are off:
* in the same nybble, 30
* in diferent nybbles, one aligned pair, 35
* in diferent nybbles, no aligned pairs, 40
* If four bits on:
* all in one nybble, 45
* as two aligned pairs, 50
* one aligned pair, 55
* no aligned pairs, 60
*
* Step 2:
* Take the higher of the two scores (one for each byte) as the score
* for the module.
*
* Print the score for each module, and field service should replace the
* module with the highest score.
*/
/*
* the hex digit.
*/
static int
p_synd_score_table[256] = {
/* 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 */
/* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */
/* 0 0 */ 5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
/* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
/* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
/* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
/* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
/* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
/* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
/* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
/* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
/* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
/* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
/* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
/* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
/* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
/* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
/* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10, 5,
};
int
{
int i, j, a, b;
i = p_synd & 0xFF;
a = p_synd_score_table[i];
b = p_synd_score_table[j];
return (a > b ? a : b);
}
/*
* Async Fault Logging
*
* To ease identifying, reading, and filtering async fault log messages, the
* label [AFT#] is now prepended to each async fault message. These messages
* and the logging rules are implemented by cpu_aflt_log(), below.
*
* [AFT0] - Tag for log messages that are associated with corrected ECC errors.
* This includes both corrected ECC memory and ecache faults.
*
* [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
* else except CE errors) with a priority of 1 (highest). This tag
* is also used for panic messages that result from an async fault.
*
* [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
* [AFT3] or parity errors. For example, AFT2 is used for the actual dump
* of the E-$ data and tags.
*
* In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
* printed on the console. To send all AFT logs to both the log and the
* console, set aft_verbose = 1.
*/
#define PARERR_LFLAGS (CMN_LFLAGS)
~CPU_FLTCPU & ~CPU_FAULTPC)
#define BERRTO_LFLAGS (CMN_LFLAGS)
#define NO_LFLAGS (0)
#define AFSR_FMTSTR0 "\020\1ME"
#define AFSR_FMTSTR1 "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO" \
"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
#define UDB_FMTSTR "\020\012UE\011CE"
/*
* Save the cache bootup state for use when internal
* caches are to be re-enabled after an error occurs.
*/
uint64_t cache_boot_state = 0;
/*
* PA[31:0] represent Displacement in UPA configuration space.
*/
/*
* Spitfire legacy globals
*/
int itlb_entries;
int dtlb_entries;
void
cpu_setup(void)
{
extern int page_retire_messages;
extern int page_retire_first_ue;
extern int at_flags;
#if defined(SF_ERRATA_57)
extern caddr_t errata57_limit;
#endif
/*
* Spitfire isn't currently FMA-aware, so we have to enable the
* page retirement messages. We also change the default policy
* for UE retirement to allow clearing of transient errors.
*/
page_retire_messages = 1;
page_retire_first_ue = 0;
/*
* save the cache bootup state.
*/
if (use_page_coloring) {
do_pg_coloring = 1;
}
/*
* Tune pp_slots to use up to 1/8th of the tlb entries.
*/
/*
* Block stores invalidate all pages of the d$ so pagecopy
* et. al. do not need virtual translations with virtual
* coloring taken into consideration.
*/
isa_list =
"sparcv9+vis sparcv9 "
"sparcv8plus+vis sparcv8plus "
"sparcv8 sparcv8-fsmuld sparcv7 sparc";
/*
* On Spitfire, there's a hole in the address space
* that we must never map (the hardware only support 44-bits of
* virtual address). Later CPUs are expected to have wider
* supported address ranges.
*
* See address map on p23 of the UltraSPARC 1 user's manual.
*/
/*
* A spitfire call bug requires us to be a further 4Gbytes of
* firewall from the spec.
*
* See Spitfire Errata #21
*/
/*
* The kpm mapping window.
* kpm_size:
* The size of a single kpm range.
* The overall size will be: kpm_size * vac_colors.
* kpm_vbase:
* The virtual start address of the kpm range within the kernel
* virtual address space. kpm_vbase has to be kpm_size aligned.
*/
kpm_size_shift = 41;
#if defined(SF_ERRATA_57)
#endif
/*
* Disable text by default.
* Note that the other defaults are set in sun4u/vm/mach_vm_dep.c.
*/
}
static int
{
int value;
case 0:
break;
case sizeof (int):
break;
default:
break;
}
return (value);
}
/*
* Set the magic constants of the implementation.
*/
void
{
extern int dcache_line_mask;
int i, a;
static struct {
char *name;
int *var;
} prop[] = {
"dcache-size", &dcache_size,
"dcache-line-size", &dcache_linesize,
"icache-size", &icache_size,
"icache-line-size", &icache_linesize,
"ecache-size", &ecache_size,
"ecache-line-size", &ecache_alignsize,
"ecache-associativity", &ecache_associativity,
"#itlb-entries", &itlb_entries,
"#dtlb-entries", &dtlb_entries,
};
}
}
i = 0; a = vac_size;
while (a >>= 1)
++i;
vac_shift = i;
vac = 1;
/*
* UltraSPARC I & II have ecache sizes running
* as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
* according to the cache size. The magic number
* and its floor of VIS_COPY_THRESHOLD bytes before it will use
* VIS instructions.
*
* We assume that all CPUs on the system have the same size
* ecache. We're also called very early in the game.
* these values can be overwritten.
*/
if (ecache_size <= 524288) {
} else if (ecache_size == 1048576) {
hw_copy_limit_2 = 1024;
hw_copy_limit_4 = 1280;
hw_copy_limit_8 = 1536;
} else if (ecache_size == 2097152) {
hw_copy_limit_2 = 1536;
hw_copy_limit_4 = 2048;
hw_copy_limit_8 = 2560;
} else if (ecache_size == 4194304) {
hw_copy_limit_2 = 2048;
hw_copy_limit_4 = 2560;
hw_copy_limit_8 = 3072;
} else {
hw_copy_limit_2 = 2560;
hw_copy_limit_4 = 3072;
hw_copy_limit_8 = 3584;
}
}
/*
* Called by setcpudelay
*/
void
cpu_init_tick_freq(void)
{
/*
* Determine the cpu frequency by calling
* tod_get_cpufrequency. Use an approximate freqency
* value computed by the prom if the tod module
* is not initialized and loaded yet.
*/
} else {
#if defined(HUMMINGBIRD)
/*
* the hummingbird version of %stick is used as the basis for
* low level timing; this provides an independent constant-rate
* clock for general system use, and frees power mgmt to set
* various cpu clock speeds.
*/
if (system_clock_freq == 0)
#else /* SPITFIRE */
#endif
}
}
extern uint64_t xc_tick_limit;
extern uint64_t xc_tick_jump_limit;
#ifdef SEND_MONDO_STATS
#endif
/*
* Note: A version of this function is used by the debugger via the KDI,
* and must be kept in sync with this version. Any changes made to this
* function to support new chips or to accomodate errata must also be included
* in the KDI-specific version. See spitfire_kdi.c.
*/
void
send_one_mondo(int cpuid)
{
for (;;) {
if (idsr == 0)
break;
/*
* When we detect an irregular tick jump, we adjust
* the timer window to the current tick value.
*/
if (ticks > xc_tick_jump_limit) {
if (panic_quiesce)
return;
"send mondo timeout (target 0x%x) [%d NACK %d "
}
busy++;
continue;
}
drv_usecwait(1);
nack++;
busy = 0;
}
#ifdef SEND_MONDO_STATS
#endif
}
void
{
int i;
for (i = 0; i < NCPU; i++)
if (CPU_IN_SET(set, i)) {
send_one_mondo(i);
CPUSET_DEL(set, i);
if (CPUSET_ISNULL(set))
break;
}
}
void
syncfpu(void)
{
}
/*
* Determine the size of the CPU module's error structure in bytes. This is
* called once during boot to initialize the error queues.
*/
int
cpu_aflt_size(void)
{
/*
* We need to determine whether this is a sabre, Hummingbird or a
* ecache tag manipulation. We can't do this in cpu_setup() as it is
* too early in the boot flow and the cpunodes are not initialized.
* This routine will be called once after cpunodes[] is ready, so do
* it here.
*/
isus2i = 1;
/* These states do not exist in sabre - set to 0xFF */
cpu_ec_state_shr = 0xFF;
cpu_ec_state_own = 0xFF;
isus2e = 1;
/* These states do not exist in hummingbird - set to 0xFF */
cpu_ec_state_shr = 0xFF;
cpu_ec_state_own = 0xFF;
}
return (sizeof (spitf_async_flt));
}
/*
* Correctable ecc error trap handler
*/
/*ARGSUSED*/
void
{
int queue = 1;
/*
* Note: the Spitfire data buffer error registers
* (upper and lower halves) are or'ed into the upper
* word of the afsr by ce_err().
*/
t_afsr &= S_AFSR_MASK;
/* Setup the async fault structure */
ecc->flt_in_memory =
/*
* Check for fatal conditions.
*/
/*
* Pananoid checks for valid AFSR and UDBs
*/
"** Panic due to CE bit not set in the AFSR",
" Corrected Memory Error on");
}
/*
* We want to skip logging only if ALL the following
* conditions are true:
*
* 1. There is only one error
* 2. That error is a correctable memory error
* 3. The error is caused by the memory scrubber (in which case
* the error will have occurred under on_trap protection)
* 4. The error is on a retired page
*
* Note: OT_DATA_EC is used places other than the memory scrubber.
* However, none of those errors should occur on a retired page.
*/
queue = 0;
}
}
}
"** Panic due to CE bits not set in the UDBs",
" Corrected Memory Error on");
}
if (queue) {
}
}
if (queue) {
}
}
/*
* Re-enable all error trapping (CEEN currently cleared).
*/
clr_datapath();
}
/*
* Cpu specific CE logging routine
*/
static void
{
return;
}
" Corrected Memory Error detected by");
}
/*
* Spitfire does not perform any further CE classification refinement
*/
/*ARGSUSED*/
int
{
return (0);
}
char *
{
return (ERR_TYPE_DESC_INTERMITTENT);
return (ERR_TYPE_DESC_PERSISTENT);
return (ERR_TYPE_DESC_STICKY);
return (ERR_TYPE_DESC_UNKNOWN);
}
/*
* Called by correctable ecc error logging code to print out
* the stick/persistent/intermittent status of the error.
*/
static void
{
char *status1_str = "Memory";
char *status2_str = "Intermittent";
if (status & ECC_ECACHE)
status1_str = "Ecache";
if (status & ECC_STICKY)
status2_str = "Sticky";
else if (status & ECC_PERSISTENT)
status2_str = "Persistent";
NULL, " Corrected %s Error on %s is %s",
}
/*
* check for a valid ce syndrome, then call the
* displacement flush scrubbing code, and then check the afsr to see if
* if the error was not scrubbed successfully, and is therefore sticky.
*/
/*ARGSUSED1*/
void
{
/*
* It is possible that the flt_addr is not a valid
* physical address. To deal with this, we disable
* NCEEN while we scrub that address. If this causes
* memory location.
*/
eer = get_error_enable();
/*
* To check if the error detected by IO is persistent, sticky or
* intermittent.
*/
}
get_asyncflt(&afsr);
/*
* when we reenable NCEEN, so we clear the AFSR.
*/
return;
}
/*
* Check and clear any ECC errors from the scrub. If the scrub did
* not trip over the error, mark it intermittent. If the scrub did
* trip the error again and it did not scrub away, mark it sticky.
* Otherwise mark it persistent.
*/
status = ECC_STICKY;
else
} else
}
/*
* get the syndrome and unum, and then call the routines
* to check the other cpus and iobuses, and then do the error logging.
*/
/*ARGSUSED1*/
void
{
char unum[UNUM_NAMLEN];
int len = 0;
int ce_verbose = 0;
int err;
/* Get the unum string for logging purposes */
UNUM_NAMLEN, &len);
/* Call specific error logging routine */
/*
* Count errors per unum.
* Non-memory errors are all counted via a special unum string.
*/
}
ce_verbose = 1;
ce_verbose = (ce_verbose_memory > 0);
} else {
ce_verbose = 1;
}
if (ce_verbose) {
int synd_code;
if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
NULL, " ECC Data Bit %2d was in error "
"and corrected", synd_code);
} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
NULL, " ECC Check Bit %2d was in error "
} else {
/*
* These are UE errors - we shouldn't be getting CE
* traps for these; handle them in case of bad h/w.
*/
switch (synd_code) {
case M2:
" Two ECC Bits were in error");
break;
case M3:
" Three ECC Bits were in error");
break;
case M4:
" Four ECC Bits were in error");
break;
case MX:
" More than Four ECC bits were "
"in error");
break;
default:
" Unknown fault syndrome %d",
break;
}
}
}
/* Display entire cache line, if valid address */
}
/*
* We route all errors through a single switch statement.
*/
void
{
case CPU_FAULT:
break;
case BUS_FAULT:
break;
default:
break;
}
}
/* Values for action variable in cpu_async_error() */
#define ACTION_NONE 0
#define ACTION_TRAMPOLINE 1
#define ACTION_AST_FLAGS 2
/*
* Access error trap handler for asynchronous cpu errors. This routine is
* called to handle a data or instruction access error. All fatal errors are
* completely handled by this routine (by panicking). Non fatal error logging
* is queued for later processing either via AST or softint at a lower PIL.
* In case of panic, the error log queue will also be processed as part of the
* panic flow to ensure all errors are logged. This routine is called with all
* errors disabled at PIL15. The AFSR bits are cleared and the UDBL and UDBH
* error bits are also cleared. The hardware has also disabled the I and
* D-caches for us, so we must re-enable them before returning.
*
*
* _______________________________________________________________
* | Privileged tl0 | Unprivileged |
* | Protected | Unprotected | Protected | Unprotected |
* |on_trap|lofault| | | |
* -------------|-------|-------+---------------+---------------+-------------|
* | | | | | |
* | | | | | |
* | | | | | |
* WP | L,M,p | L,M,p | L,M,p | n/a | L,M,p |
* | | | | | |
* ____________________________________________________________________________
*
*
* Action codes:
*
* L - log
* M - kick off memscrubber if flt_in_memory
* P - panic
* p - panic if US-IIi or US-IIe (Sabre); overrides R and M
* R - i) if aft_panic is set, panic
* ii) otherwise, send hwerr event to contract and SIGKILL to process
* S - send SIGBUS to process
* T - trampoline
*
* Special cases:
*
* 1) if aft_testfatal is set, all faults result in a panic regardless
* of type (even WP), protection (even on_trap), or privilege.
*/
/*ARGSUSED*/
void
{
char pr_reason[28];
int action = ACTION_NONE;
int expected = DDI_FM_ERR_UNEXPECTED;
/*
* We need to look at p_flag to determine if the thread detected an
* error while dumping core. We can't grab p_lock here, but it's ok
* because we just need a consistent snapshot and we know that everyone
* else will store a consistent set of bits while holding p_lock. We
* don't have to worry about a race because SDOCORE is set once prior
* to doing i/o from the process's address space and is never cleared.
*/
pr_reason[0] = '\0';
/*
* Note: the Spitfire data buffer error registers
* (upper and lower halves) are or'ed into the upper
* word of the afsr by async_err() if P_AFSR_UE is set.
*/
/*
* Grab the ttype encoded in <63:53> of the saved
* afsr passed from async_err()
*/
t_afsr &= S_AFSR_MASK;
/*
* Initialize most of the common and CPU-specific structure. We derive
* aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit. The
* initial setting of aflt->flt_panic is based on TL: we must panic if
* tuneable aft_testfatal is set (not the default).
*/
/*
* Set flt_status based on the trap type. If we end up here as the
* result of a UE detected by the CE handling code, leave status 0.
*/
switch (ttype) {
case T_DATA_ERROR:
break;
case T_INSTR_ERROR:
break;
}
/*
* Check for fatal async errors.
*/
/*
* If the trap occurred in privileged mode at TL=0, we need to check to
* see if we were executing in the kernel under on_trap() or t_lofault
* protection. If so, modify the saved registers so that we return
* from the trap to the appropriate trampoline routine.
*/
}
/*
* for peeks and caut_gets errors are expected
*/
if (!hp)
}
}
}
/*
* Determine if this error needs to be treated as fatal. Note that
* multiple errors detected upon entry to this trap handler does not
* necessarily warrant a panic. We only want to panic if the trap
* happened in privileged mode and not under t_ontrap or t_lofault
* protection. The exception is WP: if we *only* get WP, it is not
* fatal even if the trap occurred in privileged mode, except on Sabre.
*
* aft_panic, if set, effectively makes us treat usermode
* panic instead of sending a contract event. A lofault-protected
* fault will normally follow the contract event; if aft_panic is
* set this will be changed to a panic.
*
* control through mapped device memory, we need only deliver
* a SIGBUS to the offending process.
*
* Some additional flt_panic reasons (eg, WP on Sabre) will be
* checked later; for now we implement the common reasons.
*/
/*
* Beware - multiple bits may be set in AFSR
*/
}
}
}
/*
* IO errors that may have resulted in this trap.
*/
}
/*
* Handle UE: If the UE is in memory, we need to flush the bad line from
* the E-cache. We also need to query the bus nexus for fatal errors.
* For sabre, we will panic on UEs. Attempts to do diagnostic read on
* caches may introduce more parity errors (especially when the module
* is bad) and in sabre there is no guarantee that such errors
* (if introduced) are written back as poisoned data.
*/
int i;
MMU_PAGESHIFT)) ? 1: 0;
/*
* With UE, we have the PA of the fault.
* Let do a diagnostic read to get the ecache
* data and tag info of the bad line for logging.
*/
if (aflt->flt_in_memory) {
/* touch the line to put it in ecache */
acc_afsr |= read_and_clear_afsr();
acc_afsr |= (read_and_clear_afsr() &
~(P_AFSR_EDP | P_AFSR_UE));
for (i = 0; i < ecache_associativity; i++) {
ecache_idx = i * ec_set_size +
if ((state & cpu_ec_state_valid) &&
break;
}
/*
* Check to see if the ecache tag is valid for the
* fault PA. In the very unlikely event where the
* line could be victimized, no ecache info will be
* available. If this is the case, capture the line
* from memory instead.
*/
if ((state & cpu_ec_state_valid) == 0 ||
acc_afsr |= read_and_clear_afsr();
acc_afsr |= (read_and_clear_afsr() &
~(P_AFSR_EDP | P_AFSR_UE));
/* null afsr value */
}
/*
* Mark tag invalid to indicate mem dump
* when we print out the info.
*/
}
/*
* Flush out the bad line
*/
}
/*
* Ask our bus nexus friends if they have any fatal errors. If
* so, they will log appropriate error messages and panic as a
* result. We then queue an event for each UDB that reports a
* UE. Each UE reported in a UDB will have its own log message.
*
* Note from kbn: In the case where there are multiple UEs
* (ME bit is set) - the AFAR address is only accurate to
* the 16-byte granularity. One cannot tell whether the AFAR
* belongs to the UDBH or UDBL syndromes. In this case, we
* always report the AFAR address to be 16-byte aligned.
*
* If we're on a Sabre, there is no SDBL, but it will always
* read as zero, so the sdbl test below will safely fail.
*/
}
}
/*
* We got a UE and are panicking, save the fault PA in a known
* location so that the platform specific panic code can check
* for copyback errors.
*/
panic_aflt = *aflt;
}
}
/*
* Handle EDP and LDP: Locate the line with bad parity and enqueue an
* async error for logging. For Sabre, we panic on EDP or LDP.
*/
if (t_afsr & P_AFSR_EDP)
if (t_afsr & P_AFSR_LDP)
/*
* Here we have no PA to work with.
* Scan each line in the ecache to look for
* the one with bad parity.
*/
/*
* If we found a bad PA, update the state to indicate if it is
* memory or I/O space. This code will be important if we ever
* support cacheable frame buffers.
*/
MMU_PAGESHIFT)) ? 1 : 0;
}
}
/*
* Timeout and bus error handling. There are two cases to consider:
*
* (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
* have already modified the saved registers so that we will return
* from the trap to the appropriate trampoline routine; otherwise panic.
*
* (2) In user mode, we can simply use our AST mechanism to deliver
* a SIGBUS. We do not log the occurence - processes performing
* device control would generate lots of uninteresting messages.
*/
if (t_afsr & P_AFSR_BERR)
}
}
/*
* Handle WP: WP happens when the ecache is victimized and a parity
* error was detected on a writeback. The data in question will be
* poisoned as a UE will be written back. The PA is not logged and
* it is possible that it doesn't belong to the trapped thread. The
* WP trap is not fatal, but it could be fatal to someone that
* subsequently accesses the toxic page. We set read_all_memscrub
* to force the memscrubber to read all of memory when it awakens.
* For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
* UE back to poison the data.
*/
} else {
read_all_memscrub = 1;
}
}
/*
* Handle trapping CP error: In Sabre/Hummingbird, parity error in
* the ecache on a copyout due to a PCI DMA read is signaled as a CP.
* This is fatal.
*/
} else {
/*
* Orphan CP: Happens due to signal integrity problem
* on a CPU, where a CP is reported, without reporting
* its associated UE. This is handled by locating the
* bad parity line and would kick off the memscrubber
* to find the UE if in memory or in another's cache.
*/
/*
* Here we have no PA to work with.
* Scan each line in the ecache to look for
* the one with bad parity.
*/
&oafsr);
/*
* If we found a bad PA, update the state to indicate
* if it is memory or I/O space.
*/
MMU_PAGESHIFT)) ? 1 : 0;
}
read_all_memscrub = 1;
}
}
/*
* If we queued an error other than WP or CP and we are going to return
* from the trap and the error was in user mode or inside of a
* copy routine, set AST flag so the queue will be drained before
* returning to user mode.
*
* and send an event to its process contract.
*
* will have been no error queued in this case.
*/
if ((t_afsr &
int pcb_flag = 0;
pcb_flag |= ASYNC_HWERR;
if (t_afsr & P_AFSR_BERR)
pcb_flag |= ASYNC_BERR;
}
/*
* In response to a deferred error, we must do one of three things:
* (1) set the AST flags, (2) trampoline, or (3) panic. action is
* set in cases (1) and (2) - check that either action is set or
* (3) is true.
*
* On II, the WP writes poisoned data back to memory, which will
* cause a UE and a panic or reboot when read. In this case, we
* don't need to panic at this time. On IIi and IIe,
* aflt->flt_panic is already set above.
*/
/*
* Make a final sanity check to make sure we did not get any more async
* errors and accumulate the afsr.
*/
/*
* Take care of a special case: If there is a UE in the ecache flush
* area, we'll see it in flush_ecache(). This will trigger the
* CPU_ADDITIONAL_ERRORS case below.
*
* This could occur if the original error was a UE in the flush area,
* or if the original error was an E$ error that was flushed out of
* the E$ in scan_ecache().
*
* If it's at the same address that we're already logging, then it's
* probably one of these cases. Clear the bit so we don't trip over
* it on the additional errors case, which could cause an unnecessary
* panic.
*/
else
/*
* Check the acumulated afsr for the important bits.
* Make sure the spf_flt.flt_type value is set, and
* enque an error.
*/
if (acc_afsr &
}
/*
* If aflt->flt_panic is set at this point, we need to panic as the
* result of a trap at TL > 0, or an error we determined to be fatal.
* We've already enqueued the error in one of the if-clauses above,
* and it will be dequeued and logged as part of the panic flow.
*/
"See previous message(s) for details", " %sError(s)",
}
/*
* Before returning, we must re-enable errors, and
* reset the caches to their boot-up state.
*/
}
/*
* Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
* This routine is shared by the CE and UE handling code.
*/
static void
{
/*
* The ISAP and ETP errors are supposed to cause a POR
* from the system, so in theory we never, ever see these messages.
* ISAP, ETP and IVUE are considered to be fatal.
*/
fatal_str = " System Address Parity Error on";
fatal_str = " Ecache Tag Parity Error on";
fatal_str = " Interrupt Vector Uncorrectable Error on";
}
}
/*
* Routine to convert a syndrome into a syndrome code.
*/
static int
{
if (synd_status != AFLT_STAT_VALID)
return (-1);
/*
* Use the 8-bit syndrome to index the ecc_syndrome_tab
* to get the code indicating which bit(s) is(are) bad.
*/
return (-1);
else
return (ecc_syndrome_tab[synd]);
}
/* ARGSUSED */
int
{
return (ENOTSUP);
}
/* ARGSUSED */
int
{
return (ENOTSUP);
}
/* ARGSUSED */
int
{
return (ENOTSUP);
}
/*
* Routine to return a string identifying the physical name
*/
/* ARGSUSED */
int
{
short synd_code;
int ret;
if (flt_in_memory) {
if (synd_code == -1) {
} else if (*lenp <= 1) {
} else {
ret = 0;
}
} else {
}
if (ret != 0) {
buf[0] = '\0';
*lenp = 0;
}
return (ret);
}
/*
* Wrapper for cpu_get_mem_unum() routine that takes an
* async_flt struct rather than explicit arguments.
*/
int
{
}
/*
* This routine is a more generic interface to cpu_get_mem_unum(),
* that may be used by other modules (e.g. mm).
*/
int
{
char unum[UNUM_NAMLEN];
/*
* Check for an invalid address.
*/
return (ENXIO);
else
!= 0)
return (ret);
return (ENAMETOOLONG);
return (0);
}
/*
* Routine to return memory information associated
* with a physical address and syndrome.
*/
/* ARGSUSED */
int
{
return (ENOTSUP);
}
/*
* Routine to return a string identifying the physical
* name associated with a cpuid.
*/
/* ARGSUSED */
int
{
return (ENOTSUP);
}
/*
* This routine returns the size of the kernel's FRU name buffer.
*/
{
return (UNUM_NAMLEN);
}
/*
* Cpu specific log func for UEs.
*/
static void
{
int len = 0;
#ifdef DEBUG
/*
* Paranoid Check for priv mismatch
* Only applicable for UEs
*/
/*
* The priv bits in %tstate and %afsr did not match; we expect
* this to be very rare, so flag it with a message.
*/
": PRIV bit in TSTATE and AFSR mismatched; "
/* update saved afsr to reflect the correct priv */
}
#endif /* DEBUG */
UNUM_NAMLEN, &len);
" Uncorrectable Memory Error on");
" Syndrome 0x3 indicates that this may not be a "
"memory module problem");
}
if (aflt->flt_in_memory)
}
/*
* The cpu_async_log_err() function is called via the ue_drain() function to
* handle logging for CPU events that are dequeued. As such, it can be invoked
* from softint context, from AST processing in the trap() flow, or from the
* panic flow. We decode the CPU-specific data, and log appropriate messages.
*/
static void
cpu_async_log_err(void *flt)
{
char unum[UNUM_NAMLEN];
char *space;
char *ecache_scrub_logstr = NULL;
case CPU_UE_ERR:
/*
* We want to skip logging only if ALL the following
* conditions are true:
*
* 1. We are not panicking
* 2. There is only one error
* 3. That error is a memory error
* 4. The error is caused by the memory scrubber (in
* which case the error will have occurred under
* on_trap protection)
* 5. The error is on a retired page
*
* Note 1: AFLT_PROT_EC is used places other than the memory
* scrubber. However, none of those errors should occur
* on a retired page.
*
* Note 2: In the CE case, these errors are discarded before
* the errorq. In the UE case, we must wait until now --
* softcall() grabs a mutex, which we can't do at a high PIL.
*/
if (!panicstr &&
/* Zero the address to clear the error */
return;
}
}
/*
* Log the UE and check for causes of this UE error that
* don't cause a trap (Copyback error). cpu_async_error()
* has already checked the i/o buses for us.
*/
if (aflt->flt_in_memory)
break;
case CPU_EDP_LDP_ERR:
NULL, " EDP event on");
NULL, " LDP event on");
/* Log ecache info if exist */
if (spf_flt->flt_ec_lcnt > 0) {
NULL, " AFAR was derived from E$Tag");
} else {
NULL, " No error found in ecache (No fault "
"PA available)");
}
break;
case CPU_WP_ERR:
/*
* If the memscrub thread hasn't yet read
* all of memory, as we requested in the
* trap handler, then give it a kick to
* make sure it does.
*/
memscrub_run();
" WP event on");
return;
case CPU_BTO_BERR_ERR:
/*
* A bus timeout or error occurred that was in user mode or not
* in a protected kernel code region.
*/
" Bus Error on System Bus in %s mode from",
}
" Timeout on System Bus in %s mode from",
}
return;
case CPU_PANIC_CP_ERR:
/*
* Process the Copyback (CP) error info (if any) obtained from
* polling all the cpus in the panic flow. This case is only
* entered if we are panicking.
*/
/* See which space - this info may not exist */
space = "Data ";
space = "Instruction ";
else
space = "";
" AFAR was derived from UE report,"
" CP event on CPU%d (caused %saccess error on %s%d)",
if (spf_flt->flt_ec_lcnt > 0)
else
NULL, " No cache dump available");
return;
case CPU_TRAPPING_CP_ERR:
/*
* For sabre only. This is a copyback ecache parity error due
* to a PCI DMA read. We should be panicking if we get here.
*/
" AFAR was derived from UE report,"
" CP event on CPU%d (caused Data access error "
return;
/*
* We log the ecache lines of the following states,
* clean_bad_idle, clean_bad_busy, dirty_bad_idle and
* dirty_bad_busy if ecache_scrub_verbose is set and panic
* in addition to logging if ecache_scrub_panic is set.
*/
case CPU_BADLINE_CI_ERR:
ecache_scrub_logstr = "CBI";
/* FALLTHRU */
case CPU_BADLINE_CB_ERR:
if (ecache_scrub_logstr == NULL)
ecache_scrub_logstr = "CBB";
/* FALLTHRU */
case CPU_BADLINE_DI_ERR:
if (ecache_scrub_logstr == NULL)
ecache_scrub_logstr = "DBI";
/* FALLTHRU */
case CPU_BADLINE_DB_ERR:
if (ecache_scrub_logstr == NULL)
ecache_scrub_logstr = "DBB";
" %s event on", ecache_scrub_logstr);
return;
case CPU_ORPHAN_CP_ERR:
/*
* Orphan CPs, where the CP bit is set, but when a CPU
* doesn't report a UE.
*/
if (read_all_memscrub)
memscrub_run();
NULL, " Orphan CP event on");
/* Log ecache info if exist */
if (spf_flt->flt_ec_lcnt > 0)
else
" No error found in ecache (No fault "
"PA available");
return;
case CPU_ECACHE_ADDR_PAR_ERR:
" E$ Tag Address Parity error on");
return;
case CPU_ECACHE_STATE_ERR:
" E$ Tag State Parity error on");
return;
case CPU_ECACHE_TAG_ERR:
" E$ Tag scrub event on");
return;
case CPU_ECACHE_ETP_ETS_ERR:
" AFSR.ETP is set and AFSR.ETS is zero on");
return;
case CPU_ADDITIONAL_ERR:
" Additional errors detected during error processing on");
return;
default:
return;
}
/* ... fall through from the UE, EDP, or LDP cases */
if (!panicstr) {
} else {
/*
* Clear UEs on panic so that we don't
* get haunted by them during panic or
* after reboot
*/
}
}
/*
* Log final recover message
*/
if (!panicstr) {
NULL, " Above Error is in User Mode"
"\n and is fatal: "
"will SIGKILL process and notify contract");
NULL, " Above Error detected while dumping core;"
"\n core file will be truncated");
NULL, " Above Error is due to Kernel access"
"\n to User space and is fatal: "
"will SIGKILL process and notify contract");
" Above Error detected by protected Kernel code"
"\n that will try to clear error from system");
}
}
}
/*
* Check all cpus for non-trapping UE-causing errors
*/
void
{
int pix;
if (CPU_XCALL_READY(pix)) {
char *space;
/* See which space - this info may not exist */
space = "Data ";
space = "Instruction ";
else
space = "";
NULL, " AFAR was derived from UE report,"
" CP event on CPU%d (caused %saccess "
if (spf_cpflt->flt_ec_lcnt > 0)
else
" No cache dump available");
}
}
}
}
#ifdef DEBUG
int test_mp_cp = 0;
#endif
/*
* Cross-call callback routine to tell a CPU to read its own %afsr to check
* for copyback errors and capture relevant information.
*/
static uint_t
{
int i;
get_asyncflt(&afsr);
*acc_afsr = 0;
}
#ifdef DEBUG
if (test_mp_cp)
#endif
/*
* Capture the UDBs
*/
/*
* Clear CP bit before capturing ecache data
* and AFSR info.
*/
/*
* See if we can capture the ecache line for the
* fault PA.
*
* Return a valid matching ecache line, if any.
* Otherwise, return the first matching ecache
* line marked invalid.
*/
spf_flt->flt_ec_lcnt = 0;
acc_afsr);
continue;
sizeof (ec_data));
if (valid)
break;
}
}
}
return (0);
}
/*
* CPU-module callback for the non-panicking CPUs. This routine is invoked
* from panic_idle() as part of the other CPUs stopping themselves when a
* panic occurs. We need to be VERY careful what we do here, since panicstr
* is NOT set yet and we cannot blow through locks. If panic_aflt is set
* (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
* CP error information.
*/
void
cpu_async_panic_callb(void)
{
if (panic_aflt.flt_id != 0) {
if (*scrub_afsr & P_AFSR_CP) {
*scrub_afsr = 0;
}
}
}
}
}
/*
* Turn off all cpu error detection, normally only used for panics.
*/
void
cpu_disable_errors(void)
{
}
/*
* Enable errors.
*/
void
cpu_enable_errors(void)
{
}
static void
{
int i, loop = 1;
if (verbose)
loop = 8;
for (i = 0; i < loop; i++) {
if (verbose) {
if (ce_err) {
"Data 0x%08x.%08x, ECC 0x%x", paddr,
ecc_0);
} else {
"Data 0x%08x.%08x", paddr,
}
}
}
}
static struct { /* sec-ded-s4ed ecc code */
} ecc_code[8] = {
{ 0xee55de23U, 0x16161161U },
{ 0x55eede93U, 0x61612212U },
{ 0xbb557b8cU, 0x49494494U },
{ 0x55bb7b6cU, 0x94948848U },
{ 0x16161161U, 0xee55de23U },
{ 0x61612212U, 0x55eede93U },
{ 0x49494494U, 0xbb557b8cU },
{ 0x94948848U, 0x55bb7b6cU }
};
static ushort_t
{
int i, j;
struct {
/* mask out bits according to sec-ded-s4ed ecc code */
for (i = 0; i < 8; i++) {
}
/*
* xor all bits in masked_data[i] to get bit_i of checker,
* where i = 0 to 7
*/
checker = 0;
for (i = 0; i < 8; i++) {
bit_mask = 1 << i;
for (j = 0; j < 32; j++) {
}
}
return (checker);
}
/*
* Flush the entire ecache using displacement flush by reading through a
* physical address range as large as the ecache.
*/
void
cpu_flush_ecache(void)
{
}
/*
* read and display the data in the cache line where the
* original ce error occurred.
* This routine is mainly used for debugging new hardware.
*/
void
{
/* disable ECC error traps */
/*
* flush the ecache
* read the data
* check to see if an ECC error occured
*/
/* enable ECC error traps */
}
/*
* If UE or CE errors are detected, the routine will
* clears all the AFSR sticky bits (except CP for
*/
static int
{
int persistent = 0;
/*
* Capture the AFSR, AFAR and UDBs info
*/
t_afar &= SABRE_AFAR_PA;
/*
* Clear the errors
*/
clr_datapath();
else
/*
* determine whether to check UDBH or UDBL for persistence
*/
t_afar |= 0x8;
} else {
}
/* Package the info nicely in the spf_flt struct */
" check_ecc: Dumping captured error states ...");
}
/*
* if the fault addresses don't match, not persistent
*/
return (persistent);
}
/*
* check for UE persistence
* since all DIMMs in the bank are identified for a UE,
* there's no reason to check the syndrome
*/
persistent = 1;
}
/*
* check for CE persistence
*/
if ((udb & P_DER_E_SYND) ==
persistent = 1;
}
}
}
return (persistent);
}
#ifdef HUMMINGBIRD
#define HB_FULL_DIV 1
#define HB_HALF_DIV 2
#define HB_LOWEST_DIV 8
#define HB_ECLK_INVALID 0xdeadbad
HB_ECLK_8 };
#define HB_SLOW_DOWN 0
#define HB_SPEED_UP 1
#define SET_ESTAR_MODE(mode) \
/* \
* PLL logic requires minimum of 16 clock \
* cycles to lock to the new clock speed. \
* Wait 1 usec to satisfy this requirement. \
*/ \
drv_usecwait(1);
{ \
new_count = (HB_REFRESH_INTERVAL * \
(new_count << HB_REFRESH_COUNT_SHIFT); \
/* \
* If we are slowing down the cpu and Memory \
* Self Refresh is not enabled, it is required \
* to wait for old refresh count to count-down and \
* new refresh count to go into effect (let new value \
* counts down once). \
*/ \
if ((direction) == HB_SLOW_DOWN && \
(data & HB_SELF_REFRESH_MASK) == 0) { \
/* \
* Each count takes 64 cpu clock cycles \
* to decrement. Wait for current refresh \
* count plus new refresh count at current \
* cpu speed to count down to zero. Round \
* up the delay time. \
*/ \
delay = ((HB_REFRESH_CLOCKS_PER_COUNT * \
drv_usecwait(delay); \
} \
}
#define SET_SELF_REFRESH(bit) \
{ \
((bit) << HB_SELF_REFRESH_SHIFT); \
}
#endif /* HUMMINGBIRD */
/* ARGSUSED */
void
{
#ifdef HUMMINGBIRD
int index;
return;
}
cur_divisor = index;
break;
}
}
if (cur_divisor == 0)
"can't be determined!");
/*
* If we are already at the requested divisor speed, just
* return.
*/
if (cur_divisor == new_divisor)
return;
/* LINTED: E_FALSE_LOGICAL_EXPR */
/*
* Transition to 1/2 speed first, then to
* lower speed.
*/
/*
* Transition to 1/2 speed first, then to
* full speed.
*/
/* LINTED: E_FALSE_LOGICAL_EXPR */
/* LINTED: E_FALSE_LOGICAL_EXPR */
} else if (cur_divisor < new_divisor) {
} else if (cur_divisor > new_divisor) {
/* LINTED: E_FALSE_LOGICAL_EXPR */
}
#endif
}
/*
* we clear all the sticky bits. If a non-null pointer to a async fault
* structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
* info will be returned in the structure. If a non-null pointer to a
* uint64_t is passed in, this will be updated if the CP bit is set in the
* AFSR. The afsr will be returned.
*/
static uint64_t
{
get_asyncflt(&afsr);
}
clr_datapath(); /* clear udbs */
return (afsr);
}
/*
* Scan the ecache to look for bad lines. If found, the afsr, afar, e$ data
* tag of the first bad line will be returned. We also return the old-afsr
* (before clearing the sticky bits). The linecnt data will be updated to
* indicate the number of bad lines detected.
*/
static void
{
*linecnt = 0;
for (i = 0; i < ecache_sz; i += 64) {
cpu_afsr);
/*
* Scan through the whole 64 bytes line in 8 8-byte chunks
* looking for the first occurrence of an EDP error. The AFSR
* info is captured for each 8-byte chunk. Note that for
* 16-byte chunk granularity (i.e. the AFSR will be the same
* for the high and low 8-byte words within the 16-byte chunk).
* For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
* granularity and only PSYND bits [7:0] are used.
*/
for (j = 0; j < 8; j++) {
/*
* the PSYND to make sure that this 8-byte chunk
* is the right one. PSYND bits [15:8] belong
* to the upper 8-byte (even) chunk. Bits
* [7:0] belong to the lower 8-byte chunk (odd).
*/
if (j & 0x1)
else
if (!psynd)
continue; /* wrong chunk */
}
/* Construct the PA */
cpu_ec_tag_shift) | ((i | (j << 3)) %
/* clean up the cache line */
(*linecnt)++;
/*
* Capture the PA for the first bad line found.
* Return the ecache dump and tag info.
*/
if (pa == AFLT_INV_ADDR) {
int k;
for (k = 0; k < 8; k++)
ecache_data[k] = t_ecdata[k];
*ecache_tag = t_etag;
}
break;
}
}
}
}
static void
{
char linestr[30];
char *state_str;
int i;
/*
* Check the ecache tag to make sure it
* is valid. If invalid, a memory dump was
* captured instead of a ecache dump.
*/
if (estate == cpu_ec_state_shr)
state_str = "Shared";
else if (estate == cpu_ec_state_exl)
state_str = "Exclusive";
else if (estate == cpu_ec_state_own)
state_str = "Owner";
else if (estate == cpu_ec_state_mod)
state_str = "Modified";
else
state_str = "Invalid";
} else {
linestr[0] = '\0';
}
" PA=0x%08x.%08x\n E$tag 0x%08x.%08x E$State: %s "
} else {
" E$tag != PA from AFAR; E$line was victimized"
"\n dumping memory from PA 0x%08x.%08x instead",
}
/*
* Dump out all 8 8-byte ecache data captured
* For each 8-byte data captured, we check the
* captured afsr's parity syndrome to find out
* which 8-byte chunk is bad. For memory dump, the
* AFSR values were initialized to 0.
*/
for (i = 0; i < 8; i++) {
/*
* For Sabre/Hummingbird, parity synd is captured only
* in [7:0] of AFSR.PSYND for each 8-byte chunk.
* in 16-byte granularity. [15:8] represent
* the upper 8 byte and [7:0] the lower 8 byte.
*/
else
" E$Data (0x%02x): 0x%08x.%08x "
"*Bad* PSYND=0x%04x", offset,
} else {
" E$Data (0x%02x): 0x%08x.%08x", offset,
}
}
}
/*
* Common logging function for all cpu async errors. This function allows the
* caller to generate a single cmn_err() call that logs the appropriate items
* from the fault structure, and implements our rules for AFT logging levels.
*
* ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
* tagnum: 0, 1, 2, .. generate the [AFT#] tag
* spflt: pointer to spitfire async fault structure
* logflags: bitflags indicating what to output
* endstr: a end string to appear at the end of this log
* fmt: a format string to appear at the beginning of the log
*
* The logflags allows the construction of predetermined output from the spflt
* structure. The individual data items always appear in a consistent order.
* Note that either or both of the spflt structure pointer and logflags may be
* NULL or zero respectively, indicating that the predetermined output
* substrings are not requested in this log. The output looks like this:
*
* [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
* <CPU_SPACE><CPU_ERRID>
* newline+4spaces<CPU_AFSR><CPU_AFAR>
* newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
* newline+4spaces<CPU_UDBH><CPU_UDBL>
* newline+4spaces<CPU_SYND>
* newline+4spaces<endstr>
*
* Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
* it is assumed that <endstr> will be the unum string in this case. The size
* of our intermediate formatting buf[] is based on the worst case of all flags
* being enabled. We pass the caller's varargs directly to vcmn_err() for
* formatting so we don't need additional stack space to format them here.
*/
/*PRINTFLIKE6*/
static void
{
int console_log_flag;
} else {
if (!verbose)
return;
}
if (console_log_flag)
else
p += strlen(p);
}
/*
* Copy the caller's format string verbatim into buf[]. It will be
* formatted by the call to vcmn_err() at the end of this function.
*/
p += strlen(p);
}
if (logflags & CPU_FLTCPU) {
p += strlen(p);
}
" Data access");
" Instruction access");
p += strlen(p);
}
p += strlen(p);
}
", errID 0x%08x.%08x",
p += strlen(p);
}
"\n AFSR 0x%08b.%08b",
p += strlen(p);
}
p += strlen(p);
}
if (logflags & CPU_AF_PSYND) {
"\n AFSR.PSYND 0x%04x(Score %02d)",
p += strlen(p);
}
if (logflags & CPU_AF_ETS) {
p += strlen(p);
}
if (logflags & CPU_FAULTPC) {
p += strlen(p);
}
"\n UDBH 0x%04b UDBH.ESYND 0x%02x",
p += strlen(p);
}
" UDBL 0x%04b UDBL.ESYND 0x%02x",
p += strlen(p);
}
"\n %s Syndrome 0x%x Memory Module ",
p += strlen(p);
}
}
else
p += strlen(p);
}
}
/*
* Ecache Scrubbing
*
* The basic idea is to prevent lines from sitting in the ecache long enough
* to build up soft errors which can lead to ecache parity errors.
*
* The following rules are observed when flushing the ecache:
*
* 1. When the system is busy, flush bad clean lines
* 2. When the system is idle, flush all clean lines
* 3. When the system is idle, flush good dirty lines
* 4. Never flush bad dirty lines.
*
* modify parity busy idle
* ----------------------------
* clean good X
* clean bad X X
* dirty good X
* dirty bad
*
* Bad or good refers to whether a line has an E$ parity error or not.
* Clean or dirty refers to the state of the modified bit. We currently
* default the scan rate to 100 (scan 10% of the cache per second).
*
* The following are E$ states and actions.
*
* We encode our state as a 3-bit number, consisting of:
* ECACHE_STATE_MODIFIED (0=clean, 1=dirty)
* ECACHE_STATE_PARITY (0=good, 1=bad)
* ECACHE_STATE_BUSY (0=idle, 1=busy)
*
* We associate a flushing and a logging action with each state.
*
* E$ actions are different for Spitfire and Sabre/Hummingbird modules.
* MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
* E$ only, in addition to value being set by ec_flush.
*/
struct {
char ec_flush; /* whether to flush or not */
char ec_log; /* ecache logging */
char ec_log_type; /* log type info */
} ec_action[] = { /* states of the E$ line in M P B */
{ ALWAYS_FLUSH, 0, 0 }, /* 0 0 0 clean_good_idle */
{ MIRROR_FLUSH, 0, 0 }, /* 0 0 1 clean_good_busy */
{ ALWAYS_FLUSH, 0, 0 }, /* 1 0 0 dirty_good_idle */
{ MIRROR_FLUSH, 0, 0 }, /* 1 0 1 dirty_good_busy */
};
/*
* Offsets into the ec_action[] that determines clean_good_busy and
* dirty_good_busy lines.
*/
/*
* We are flushing lines which are Clean_Good_Busy and also the lines
* Dirty_Good_Busy. And we only follow it for non-mirrored E$.
*/
#define ECACHE_STATE_MODIFIED 0x4
#define ECACHE_STATE_PARITY 0x2
#define ECACHE_STATE_BUSY 0x1
/*
* If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
*/
int ecache_calls_a_sec_mirrored = 1;
int ecache_lines_per_call_mirrored = 1;
int ecache_scrub_panic = 0; /* panics on a clean and dirty line */
/*
* Interrupt number and pil for ecache scrubber cross-trap calls.
*/
static uint64_t ecache_scrub_inum;
/*
* Kstats for the E$ scrubber.
*/
typedef struct ecache_kstat {
static ecache_kstat_t ec_kstat_template = {
{ "clean_good_idle", KSTAT_DATA_ULONG },
{ "clean_good_busy", KSTAT_DATA_ULONG },
{ "clean_bad_idle", KSTAT_DATA_ULONG },
{ "clean_bad_busy", KSTAT_DATA_ULONG },
{ "dirty_good_idle", KSTAT_DATA_ULONG },
{ "dirty_good_busy", KSTAT_DATA_ULONG },
{ "dirty_bad_idle", KSTAT_DATA_ULONG },
{ "dirty_bad_busy", KSTAT_DATA_ULONG },
{ "invalid_lines", KSTAT_DATA_ULONG },
{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
};
struct kmem_cache *sf_private_cache;
/*
* Called periodically on each CPU to scan the ecache once a sec.
* adjusting the ecache line index appropriately
*/
void
{
int mpb; /* encode Modified, Parity, Busy for action */
switch (ec_mirror) {
default:
case ECACHE_CPU_NON_MIRROR:
/*
* The E$ scan rate is expressed in units of tenths of
* a percent. ecache_scan_rate = 1000 (100%) means the
* whole cache is scanned every second.
*/
(1000 * ecache_calls_a_sec);
if (!(ssmp->ecache_busy)) {
if (ecache_idle_factor > 0) {
}
} else {
ecache_flush_clean_good_busy) / 100;
ecache_flush_dirty_good_busy) / 100;
}
ecache_calls_a_sec : 1);
break;
case ECACHE_CPU_MIRROR:
break;
}
/*
* The ecache scrubber algorithm operates by reading and
* decoding the E$ tag to determine whether the corresponding E$ line
* can be scrubbed. There is a implicit assumption in the scrubber
* logic that the E$ tag is valid. Unfortunately, this assertion is
* flawed since the E$ tag may also be corrupted and have parity errors
* The scrubber logic is enhanced to check the validity of the E$ tag
* before scrubbing. When a parity error is detected in the E$ tag,
* it is possible to recover and scrub the tag under certain conditions
* so that a ETP error condition can be avoided.
*/
/*
* We get the old-AFSR before clearing the AFSR sticky bits
* in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
* If CP bit is set in the old-AFSR, we log an Orphan CP event.
*/
/*
* ETP is set try to scrub the ecache tag.
*/
if (nafsr & P_AFSR_ETP) {
} else if (state & cpu_ec_state_valid) {
/*
* ETP is not set, E$ tag is valid.
* Proceed with the E$ scrubbing.
*/
if (state & cpu_ec_state_dirty)
if (tafsr & P_AFSR_EDP) {
if (ecache_scrub_verbose ||
}
}
if (ssmp->ecache_busy)
mpb |= ECACHE_STATE_BUSY;
/*
* We flush the E$ lines depending on the ec_flush,
* we additionally flush clean_good_busy and
* dirty_good_busy lines for mirrored E$.
*/
} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
}
/*
* Conditionally flush both the clean_good and
* dirty_good lines when busy.
*/
(flush_dirty_busy > 0)) {
}
tafsr);
}
} else {
}
index = 0;
}
/*
* set the ecache scrub index for the next time around
*/
*acc_afsr = 0;
}
}
/*
* Handler for ecache_scrub_inum softint. Call scrub_ecache_line until
* we decrement the outstanding request count to zero.
*/
/*ARGSUSED*/
{
int i;
int outstanding;
do {
outstanding = *countp;
ASSERT(outstanding > 0);
for (i = 0; i < outstanding; i++)
return (DDI_INTR_CLAIMED);
}
/*
* force each cpu to perform an ecache scrub, called from a timeout
*/
extern xcfunc_t ecache_scrubreq_tl1;
void
do_scrub_ecache_line(void)
{
long delta;
if (ecache_calls_a_sec > hz)
else if (ecache_calls_a_sec <= 0)
ecache_calls_a_sec = 1;
if (ecache_calls_a_sec_mirrored > hz)
else if (ecache_calls_a_sec_mirrored <= 0)
if (ecache_scrub_enable) {
} else {
}
(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
delta);
}
/*
* initialization for ecache scrubbing
* This routine is called AFTER all cpus have had cpu_init_private called
* to initialize their private data areas.
*/
void
cpu_init_cache_scrub(void)
{
if (ecache_calls_a_sec > hz) {
}
/*
* Register softint for ecache scrubbing.
*/
/*
* kick off the scrubbing using realtime timeout
*/
(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
hz / ecache_calls_a_sec);
}
/*
* Unset the busy flag for this cpu.
*/
void
{
}
}
/*
* Set the busy flag for this cpu.
*/
void
{
}
}
/*
* initialize the ecache scrubber data structures
* The global entry point cpu_init_private replaces this entry point.
*
*/
static void
{
/*
* intialize bookkeeping for cache scrubbing
*/
ssmp->ecache_flush_index = 0;
/*
* Determine whether we are running on mirrored SRAM
*/
else
/*
* initialize the kstats
*/
}
/*
* uninitialize the ecache scrubber data structures
* The global entry point cpu_uninit_private replaces this entry point.
*/
static void
{
}
/*
* un-initialize bookkeeping for cache scrubbing
*/
}
struct kmem_cache *sf_private_cache;
/*
* Cpu private initialization. This includes allocating the cpu_private
* data structure, initializing it, and initializing the scrubber for this
* cpu. This is called once for EVERY cpu, including CPU 0. This function
* calls cpu_init_ecache_scrub_dr to init the scrubber.
* We use kmem_cache_create for the spitfire private data structure because it
* needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
*/
void
{
/*
* If the sf_private_cache has not been created, create it.
*/
if (sf_private_cache == NULL) {
}
}
/*
* Cpu private unitialization. Uninitialize the Ecache scrubber and
* deallocate the scrubber data structures and cpu_private data structure.
* For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
* the scrubber for the specified cpu.
*/
void
{
}
/*
* initialize the ecache kstats for each cpu
*/
static void
{
sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
KSTAT_FLAG_WRITABLE)) == NULL) {
return;
}
}
/*
* log the bad ecache information
*/
static void
{
int i;
char *class;
for (i = 0; i < 8; i++) {
}
switch (mpb) {
case CPU_ECACHE_TAG_ERR:
case CPU_ECACHE_ADDR_PAR_ERR:
case CPU_ECACHE_ETP_ETS_ERR:
case CPU_ECACHE_STATE_ERR:
break;
default:
break;
}
"line detected");
}
/*
* Process an ecache error that occured during the E$ scrubbing.
* We do the ecache scan to find the bad line, flush the bad line
* and start the memscrubber to find any UE (in memory or in another cache)
*/
static uint64_t
{
/*
* Scan each line in the cache to look for the one
* with bad parity
*/
}
/*
* If we found a bad PA, update the state to indicate if it is
* memory or I/O space.
*/
MMU_PAGESHIFT)) ? 1 : 0;
}
/*
* We have the bad line, flush that line and start
* the memscrubber.
*/
if (spf_flt.flt_ec_lcnt > 0) {
read_all_memscrub = 1;
memscrub_run();
}
return (oafsr);
}
static void
{
(index % ec_set_size);
/*
* E$ tag state has good parity
*/
if ((afsr_ets & cpu_ec_state_parity) == 0) {
if (afsr_ets & cpu_ec_parity) {
/*
* E$ tag state bits indicate the line is clean,
* invalidate the E$ tag and continue.
*/
if (!(state & cpu_ec_state_dirty)) {
/*
* Zero the tag and mark the state invalid
* with good parity for the tag.
*/
else
/* Sync with the dual tag */
return;
} else {
" parity");
}
} else if ((afsr_ets & cpu_ec_parity) == 0) {
/*
* ETS is zero but ETP is set
*/
" AFSR.ETS is zero");
}
} else {
/*
* E$ tag state bit has a bad parity
*/
}
}
static void
ecache_page_retire(void *arg)
{
}
void
sticksync_slave(void)
{}
void
sticksync_master(void)
{}
/*ARGSUSED*/
void
{}
void
{
int status;
}
/*ARGSUSED*/
void
{
}
#define MAX_SIMM 8
struct ce_info {
char name[UNUM_NAMLEN];
unsigned short leaky_bucket_cnt;
};
/*
* Separately-defined structure for use in reporting the ce_info
* to SunVTS without exposing the internal layout and implementation
* of struct ce_info.
*/
static struct ecc_error_info ecc_error_info_data = {
{ "version", KSTAT_DATA_UINT32 },
{ "maxcount", KSTAT_DATA_UINT32 },
{ "count", KSTAT_DATA_UINT32 }
};
sizeof (struct kstat_named);
#error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
#endif
size_t mem_ce_simm_size = 0;
/*
* Default values for the number of CE's allowed per interval.
* Interval is defined in minutes
* SOFTERR_MIN_TIMEOUT is defined in microseconds
*/
#define SOFTERR_LIMIT_DEFAULT 2
#define TIMEOUT_NONE ((timeout_id_t)0)
/*
* timeout identifer for leaky_bucket
*/
/*
* Tunables for maximum number of allowed CE's in a given time
*/
void
cpu_mp_init(void)
{
size_t i;
/*
* Initialize the CE error handling buffers.
*/
}
for (i = 0; i < mem_ce_simm_size; i++) {
struct kstat_ecc_mm_info *kceip;
KM_SLEEP);
sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
/*
* Re-declare ks_data_size to include room for the
* UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
* set.
*/
"name", KSTAT_DATA_STRING);
"intermittent_total", KSTAT_DATA_UINT64);
"persistent_total", KSTAT_DATA_UINT64);
"sticky_total", KSTAT_DATA_UINT64);
/*
* Use the default snapshot routine as it knows how to
* deal with named kstats with long strings.
*/
} else {
}
}
}
/*ARGSUSED*/
static void
leaky_bucket_timeout(void *arg)
{
int i;
for (i = 0; i < mem_ce_simm_size; i++) {
if (psimm[i].leaky_bucket_cnt > 0)
}
}
static void
add_leaky_bucket_timeout(void)
{
long timeout_in_microsecs;
/*
* create timeout for next leak.
*
* The timeout interval is calculated as follows
*
* (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
*
* ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
* in a minute), then multiply this by MICROSEC to get the interval
* in microseconds. Divide this total by ecc_softerr_limit so that
* the timeout interval is accurate to within a few microseconds.
*/
if (ecc_softerr_limit <= 0)
if (ecc_softerr_interval <= 0)
}
/*
* Legacy Correctable ECC Error Hash
*
* All of the code below this comment is used to implement a legacy array
* which counted intermittent, persistent, and sticky CE errors by unum,
* and then was later extended to publish the data as a kstat for SunVTS.
* All of this code is replaced by FMA, and remains here until such time
* that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
*
* Errors are saved in three buckets per-unum:
* (1) sticky - scrub was unsuccessful, cannot be scrubbed
* This could represent a problem, and is immediately printed out.
* (2) persistent - was successfully scrubbed
* These errors use the leaky bucket algorithm to determine
* if there is a serious problem.
* and does not necessarily indicate any problem with the dimm itself,
* is critical information for debugging new hardware.
* Because we do not know if it came from the dimm, it would be
* inappropriate to include these in the leaky bucket counts.
*
* If the E$ line was modified before the scrub operation began, then the
* displacement flush at the beginning of scrubphys() will cause the modified
* line to be written out, which will clean up the CE. Then, any subsequent
* read will not cause an error, which will cause persistent errors to be
* identified as intermittent.
*
* If a DIMM is going bad, it will produce true persistents as well as
* false intermittents, so these intermittents can be safely ignored.
*
* If the error count is excessive for a DIMM, this function will return
* PR_MCE, and the CPU module may then decide to remove that page from use.
*/
static int
{
int i;
int page_status = PR_OK;
if (len <= 0 ||
return (page_status);
/*
* Initialize the leaky_bucket timeout
*/
for (i = 0; i < mem_ce_simm_size; i++) {
/*
* Hit the end of the valid entries, add
* a new one.
*/
if (status & ECC_STICKY) {
/*
* Sticky - the leaky bucket is used to track
* soft errors. Since a sticky error is a
* hard error and likely to be retired soon,
* we do not count it in the leaky bucket.
*/
psimm[i].leaky_bucket_cnt = 0;
psimm[i].intermittent_total = 0;
psimm[i].persistent_total = 0;
"[AFT0] Sticky Softerror encountered "
"on Memory Module %s\n", unum);
} else if (status & ECC_PERSISTENT) {
psimm[i].intermittent_total = 0;
psimm[i].sticky_total = 0;
} else {
/*
* Intermittent - Because the scrub operation
* cannot find the error in the DIMM, we will
* not count these in the leaky bucket
*/
psimm[i].leaky_bucket_cnt = 0;
psimm[i].persistent_total = 0;
psimm[i].sticky_total = 0;
}
break;
/*
* Found an existing entry for the current
* memory module, adjust the counts.
*/
if (status & ECC_STICKY) {
psimm[i].sticky_total++;
"[AFT0] Sticky Softerror encountered "
"on Memory Module %s\n", unum);
} else if (status & ECC_PERSISTENT) {
int new_value;
psimm[i].persistent_total++;
if (new_value > ecc_softerr_limit) {
" soft errors from Memory Module"
" %s exceed threshold (N=%d,"
" T=%dh:%02dm) triggering page"
ecc_softerr_interval / 60,
ecc_softerr_interval % 60);
}
} else { /* Intermittent */
psimm[i].intermittent_total++;
}
break;
}
}
if (i >= mem_ce_simm_size)
"space.\n");
return (page_status);
}
/*
* Function to support counting of IO detected CEs.
*/
void
{
int err;
}
}
static int
{
int i = ksp->ks_instance;
if (rw == KSTAT_WRITE)
return (EACCES);
ASSERT(i < mem_ce_simm_size && i >= 0);
/*
* Since we're not using locks, make sure that we don't get partial
* data. The name is always copied before the counters are incremented
* so only do this update routine if at least one of the counters is
* non-zero, which ensures that ce_count_unum() is done, and the
* string is fully copied.
*/
if (ceip[i].intermittent_total == 0 &&
ceip[i].persistent_total == 0 &&
ceip[i].sticky_total == 0) {
/*
* Uninitialized or partially initialized. Ignore.
* The ks_data buffer was allocated via kmem_zalloc,
* so no need to bzero it.
*/
return (0);
}
return (0);
}
#define VIS_BLOCKSIZE 64
int
{
if (watched)
return (ret);
}
/*ARGSUSED*/
void
{
}
/*ARGSUSED*/
void
{
}
/*ARGSUSED*/
void
{
}
{
if (lpsize == 0) {
return (MMU_PAGESIZE4M);
}
return (lpsize);
}
}