/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/plat_ecc_unum.h>
#include <sys/plat_ecc_dimm.h>
/*
* Pointer to platform specific function to initialize a cache of DIMM
* serial ids
*/
int (*p2init_sid_cache)(void);
/*
* This file contains the common code that is used for parsing
* ecc unum data and logging it appropriately as the platform
* that calls this code implements.
*/
static void plat_ecc_send_msg(void *);
#define CHECK_UNUM \
break; \
}
/*
* See plat_ecc_unum.h for the meaning of these variables.
*/
/*
* We log all ECC errors using the function that is defined as
* plat_send_ecc_mailbox_msg(); We first parse the unum string and
* then pass the data to be logged to the plat_send_ecc_mailbox_msg
* function for logging. Each platform that uses this code needs to
* implement a suitable function for this purpose.
*/
void
{
int board_num;
int proc_position;
switch (afsr_bit) {
case C_AFSR_CE:
break;
case C_AFSR_UE:
break;
case C_AFSR_EDC:
break;
case C_AFSR_EDU:
break;
case C_AFSR_WDC:
break;
case C_AFSR_WDU:
break;
case C_AFSR_CPC:
break;
case C_AFSR_CPU:
break;
case C_AFSR_UCC:
break;
case C_AFSR_UCU:
break;
case C_AFSR_EMC:
break;
case C_AFSR_EMU:
break;
default:
/*
* Do not send messages with unknown error codes, since
* the SC will not be able to tell what type of error
* occurred.
*/
return;
}
if (ecc->flt_in_memory)
switch (ecc_type) {
case PLAT_ECC_MEMORY: {
/*
* The unum string is expected to be in this form:
* for serengeti. As this code is shared with Starcat
* if N is missing then it is set to 0.
* From that we will extract the bank number, dimm
* number, and Jnumber.
*/
int i;
/*
* On Serengeti we expect to find 'N' in the unum string
* however, on Starcat 'N' does not appear in the unum string.
* We do not want this code to break at this point, so the
* unum_ptr is reset to the start of unum string if we fail
* to find an 'N'.
*/
ecc_error_data.node_no = 0;
} else {
unum_ptr++;
}
/*
* Now pull out the SB number
*/
unum_ptr += 2;
/*
* Now pull out the Proc position (relative to the board)
*/
unum_ptr++;
/*
* Using the SB number and Proc position we create a FRU
* cpu id.
*/
/*
* Now pull out the Memory Bank number
*/
unum_ptr++;
/*
* Now pull out the Dimm number within the Memory Bank.
*/
unum_ptr++;
/*
* Now pull out the J-number.
*/
unum_ptr++;
for (i = PLAT_ECC_JNUMBER_LENGTH;
/*
* If we get here, we can assume the unum is valid
*/
invalid_unum = 0;
break;
}
case PLAT_ECC_ECACHE: {
/*
* The unum string is expected to be in this form:
* for serengeti. As this code is shared with Starcat
* if N is missing then it is set to 0. IO may only appear
* on Starcats. From that we will extract the bank number,
* dimm number, and Jnumber.
*/
int is_maxcat = 0;
int i;
/*
* On Serengeti we expect to find 'N' in the unum string
* however, on Starcat 'N' does not appear in the unum string.
* We do not want this code to break at this point, so the
* unum_ptr is reset to the start of unum string if we fail
* to find an 'N'.
*/
ecc_error_data.node_no = 0;
} else {
unum_ptr++;
}
/*
*/
/*
* Since this is an E$ error, it must have occurred on
* either a System Board (represented by "SB" in the
* unum string) or a Maxcat board ("IO" in the unum
* string). Since we failed the "SB" check, we'll
* assume this is a maxcat board.
*/
is_maxcat = 1;
}
unum_ptr += 2;
/*
* Now pull out the Proc position (relative to the board)
*/
unum_ptr++;
/*
* proc position, we create the cpu id.
*/
unum_ptr++;
unum_ptr++;
for (i = PLAT_ECC_JNUMBER_LENGTH;
/*
* If we get here, we can assume the unum is valid
*/
invalid_unum = 0;
break;
}
default:
/*
* Unknown error
*/
break;
}
/*
* This is where CHECK_UNUM goes when it finds an error
*/
if (ECC_SYND_DATA_BEGIN <= synd_code &&
} else if (ECC_SYND_ECC_BEGIN <= synd_code &&
} else if (ECC_SYND_MTAG_BEGIN <= synd_code &&
} else if (ECC_SYND_MECC_BEGIN <= synd_code &&
synd_code < ECC_SYND_M2) {
} else {
switch (synd_code) {
case ECC_SYND_M2:
break;
case ECC_SYND_M3:
break;
case ECC_SYND_M4:
break;
case ECC_SYND_M:
break;
default:
break;
}
}
#ifdef DEBUG
if (invalid_unum &&
#endif
/*
* Send this data off as a mailbox message to the SC.
*/
}
/*
* The unum string for memory is expected to be in this form:
* Or if the unum was generated as the result of a UE:
* From that we will extract the board number, processor position,
* bank number and jnumber.
*
* Return (1) for an invalid unum string. If the unum is for an
* individual DIMM and there is no jnumber, jnumber will be set
* to -1 and the caller can decide if the unum is valid. This
* is because Serengeti does not have jnumbers for bank unums
* which may be used to create DIMM unums (e.g. for acquiring
* DIMM serial ids).
*/
int
int *jnumber)
{
char *c;
return (1);
c += 2;
if (*c++ != '/' || *c++ != 'P')
return (1);
if (*c++ != '/' || *c++ != 'B')
return (1);
*dimm = -1;
*jnumber = 0;
return (0);
}
c++;
*jnumber = -1;
return (0);
}
c++;
return (0);
}
/*
* The unum string for ecache is expected to be in this form:
* From that we will extract the board number, processor position and
* junmber.
*
* return (1) for any invalid unum string.
*/
static int
{
char *c;
/*
* Since this is an E$ error, it must have occurred on
* either a System Board (represented by "SB" in the
* unum string) or a Maxcat board ("IO" in the unum
* string).
*/
return (1);
*maxcat = 1;
}
c += 2;
if (*c++ != '/' || *c++ != 'P')
return (1);
return (1);
c++;
return (0);
}
/* The following array maps the error to its corresponding set */
PLAT_ECC_ERROR2_NONE, /* 0x00 */
PLAT_ECC_ERROR2_SEND_L2_XXC, /* 0x01 */
PLAT_ECC_ERROR2_SEND_L2_XXU, /* 0x02 */
PLAT_ECC_ERROR2_SEND_L3_XXC, /* 0x03 */
PLAT_ECC_ERROR2_SEND_L3_XXU, /* 0x04 */
PLAT_ECC_ERROR2_SEND_MEM_ERRS, /* 0x05 */
PLAT_ECC_ERROR2_SEND_MEM_ERRS, /* 0x06 */
PLAT_ECC_ERROR2_SEND_MEM_ERRS, /* 0x07 */
PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x08 */
PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x09 */
PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x0a */
PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x0b */
PLAT_ECC_ERROR2_SEND_L2_TAG_ERRS, /* 0x0c */
PLAT_ECC_ERROR2_SEND_L2_TAG_ERRS, /* 0x0d */
PLAT_ECC_ERROR2_SEND_L3_TAG_ERRS, /* 0x0e */
PLAT_ECC_ERROR2_SEND_L3_TAG_ERRS, /* 0x0f */
PLAT_ECC_ERROR2_SEND_L1_PARITY, /* 0x10 */
PLAT_ECC_ERROR2_SEND_L1_PARITY, /* 0x11 */
PLAT_ECC_ERROR2_SEND_TLB_PARITY, /* 0x12 */
PLAT_ECC_ERROR2_SEND_TLB_PARITY, /* 0x13 */
PLAT_ECC_ERROR2_SEND_IV_ERRS, /* 0x14 */
PLAT_ECC_ERROR2_SEND_IV_ERRS, /* 0x15 */
PLAT_ECC_ERROR2_SEND_MTAG_XXC, /* 0x16 */
PLAT_ECC_ERROR2_SEND_IV_MTAG_XXC, /* 0x17 */
PLAT_ECC_ERROR2_SEND_L3_XXC, /* 0x18 */
PLAT_ECC_ERROR2_SEND_PCACHE /* 0x19 */
};
/*
* log enhanced error information to SC.
*/
void
{
int maxcat = 0;
/* Check the flags */
if ((ecc_error2_mailbox_flags & flags) == 0)
return;
/* Fill the header */
/* Fill the data */
if (aflt->flt_in_memory) {
return;
/*
* Using the SB number and Proc position we create a FRU
* cpu id.
*/
return;
/*
* Using the SB number and Proc position we create a FRU
* cpu id.
*/
} else {
/*
* L1 Cache
*/
}
/* Send the message to SC */
}
/*
* We log all Solaris indictments of failing hardware. We pull the system
* board number and jnumber out of the unum string, and calculate the cpuid
* from some members of the unum string. The rest of the structure is filled
* in through the other arguments. The data structure is then passed to
* plat_ecc_dispatch_task(). This function should only be loaded into memory
* or called on platforms that define a plat_send_ecc_mailbox_msg() function.
*/
static int
{
char *unum_ptr;
int is_maxcat = 0;
switch (ecc_indictment_mailbox_disable) {
case (PLAT_ECC_INDICTMENT_OK):
case (PLAT_ECC_INDICTMENT_SUSPECT):
break;
case (PLAT_ECC_INDICTMENT_NO_SEND):
default:
return (ECONNREFUSED);
}
switch (msg_type) {
case (PLAT_ECC_INDICT_DIMM):
if ((ecc_indictment_mailbox_flags &
PLAT_ECC_SEND_DIMM_INDICT) == 0)
return (ECONNREFUSED);
break;
if ((ecc_indictment_mailbox_flags &
return (ECONNREFUSED);
break;
if ((ecc_indictment_mailbox_flags &
return (ECONNREFUSED);
break;
default:
return (ECONNREFUSED);
}
/* LINTED: E_TRUE_LOGICAL_EXPR */
wrapperp = (plat_ecc_message_t *)
contentsp = &(((plat_ecc_indictment_data_t *)
/*
* Find board_num, jnumber, and proc position from the unum string.
* Use the board number, is_maxcat, and proc position to calculate
* cpuid.
*/
is_maxcat = 1;
return (EINVAL);
}
}
unum_ptr += 2;
return (EINVAL);
}
unum_ptr++;
return (EINVAL);
}
unum_ptr++;
/*
* Fill in the rest of the data
*/
/*
* Build the solaris_version string:
*/
/*
* Send the data on to the queuing function
*/
return (plat_ecc_dispatch_task(wrapperp));
}
/* The following array maps the indictment to its corresponding set */
PLAT_ECC_INDICT2_NONE, /* 0x00 */
PLAT_ECC_SEND_INDICT2_L2_XXU, /* 0x01 */
PLAT_ECC_SEND_INDICT2_L2_XXC_SERD, /* 0x02 */
PLAT_ECC_SEND_INDICT2_L2_TAG_SERD, /* 0x03 */
PLAT_ECC_SEND_INDICT2_L3_XXU, /* 0x04 */
PLAT_ECC_SEND_INDICT2_L3_XXC_SERD, /* 0x05 */
PLAT_ECC_SEND_INDICT2_L3_TAG_SERD, /* 0x06 */
PLAT_ECC_SEND_INDICT2_L1_SERD, /* 0x07 */
PLAT_ECC_SEND_INDICT2_L1_SERD, /* 0x08 */
PLAT_ECC_SEND_INDICT2_TLB_SERD, /* 0x09 */
PLAT_ECC_SEND_INDICT2_TLB_SERD, /* 0x0a */
PLAT_ECC_SEND_INDICT2_FPU, /* 0x0b */
PLAT_ECC_SEND_INDICT2_PCACHE_SERD /* 0x0c */
};
static int
{
int maxcat = 0;
/*
* If the unum is null or empty, skip parsing it
*/
return (EINVAL);
}
if ((ecc_indictment_mailbox_disable != PLAT_ECC_INDICTMENT_OK) &&
return (ECONNREFUSED);
/* Check the flags */
if ((ecc_indictment2_mailbox_flags & flags) == 0)
return (ECONNREFUSED);
wrapperp = (plat_ecc_message_t *)
/* Initialize the wrapper */
/* Fill the header */
/* Fill the data */
pos);
} else {
}
/*
* Send the data on to the queuing function
*/
return (plat_ecc_dispatch_task(wrapperp));
}
int
plat_ecc_capability_send(void)
{
int ver_len;
/* Initialize the wrapper */
/* Fill the header */
/* Set the default domain capability */
/*
* Build the solaris_version string:
* utsname.release + " " + utsname.version
*/
/*
* Send the data on to the queuing function
*/
return (plat_ecc_dispatch_task(wrapperp));
}
int
{
switch (type) {
case PLAT_ECC_ERROR_MESSAGE:
if (ecc_log_fruid_enable &&
return (1);
break;
case PLAT_ECC_ERROR2_MESSAGE:
return (1);
break;
if (!(plat_ecc_capability_map_sc &
return (1);
break;
return (1);
break;
return (1);
default:
return (0);
}
return (0);
}
int plat_ecc_cap_sc_set_cnt = 0;
void
{
if (p2init_sid_cache)
}
/*
* The following table represents mapping between the indictment1 reason
* to its type.
*/
{ "l2cachedata", PLAT_ECC_INDICT_ECACHE_CORRECTABLES },
{ "l3cachedata", PLAT_ECC_INDICT_ECACHE_CORRECTABLES },
{ "l2cachedata", PLAT_ECC_INDICT_ECACHE_UNCORRECTABLE },
{ "l3cachedata", PLAT_ECC_INDICT_ECACHE_UNCORRECTABLE }
};
/*
* The following table represents mapping between the indictment2 reason
* to its type.
*/
{ "l2cachedata", PLAT_ECC_INDICT2_L2_SERD },
{ "l3cachedata", PLAT_ECC_INDICT2_L3_SERD },
{ "l2cachedata", PLAT_ECC_INDICT2_L2_UE },
{ "l3cachedata", PLAT_ECC_INDICT2_L3_UE },
{ "l2cachetag", PLAT_ECC_INDICT2_L2_TAG_SERD },
{ "l3cachetag", PLAT_ECC_INDICT2_L3_TAG_SERD },
{ "icache", PLAT_ECC_INDICT2_ICACHE_SERD },
{ "dcache", PLAT_ECC_INDICT2_DCACHE_SERD },
{ "pcache", PLAT_ECC_INDICT2_PCACHE_SERD },
{ "itlb", PLAT_ECC_INDICT2_ITLB_SERD },
{ "dtlb", PLAT_ECC_INDICT2_DTLB_SERD },
{ "fpu", PLAT_ECC_INDICT2_FPU }
};
/*
* The following function returns the indictment type for a given version
*/
static int
{
int mapsz;
char *p;
int i;
/* Check if it starts with proper fault name */
return (PLAT_ECC_INDICT_NONE);
/* Skip the cpu type */
return (PLAT_ECC_INDICT_NONE);
p++; /* skip the "." */
if (indict_version == 0) {
mapsz = sizeof (plat_ecc_bl_map_v1) /
sizeof (plat_ecc_bl_map_t);
} else {
mapsz = sizeof (plat_ecc_bl_map_v2) /
sizeof (plat_ecc_bl_map_t);
}
for (i = 0; i < mapsz; i++) {
}
}
return (PLAT_ECC_INDICT_NONE);
}
/*
* Blacklisting
*/
int
{
char *unum;
return (EINVAL);
if (cmd != BLIOC_INSERT)
return (ENOTSUP);
/*
* We support both the blacklisting of CPUs via mem-schemed
* FMRIs that name E$ J-numbers, and CPUs via cpu-schemed FMRIs
* that name the cpuid.
*/
return (EINVAL);
return (EINVAL);
} else {
return (ENOTSUP);
}
/*
* If the SC cannot handle indictment2, so fall back to old one.
* Also if the domain does not support FMA, then send only the old one.
*/
if (is_old_indict)
else
if (msg_type == PLAT_ECC_INDICT_NONE)
return (ENOTSUP);
/*
* The current blacklisting interfaces are designed for a world where
* the SC is much more involved in the diagnosis and error reporting
* process than it is in the FMA world. As such, the existing
* interfaces want all kinds of information about the error that's
* triggering the blacklist. In the FMA world, we don't have access
* to any of that information by the time we're doing the blacklist,
* so we fake values.
*/
if (is_old_indict) {
unum));
else
return (ENOTSUP);
} else {
}
}
/*
* plat_ecc_dispatch_task: Dispatch the task on a taskq and wait for the
* return value. We use cv_wait_sig to wait for the return values. If a
* signal interrupts us, we return EINTR. Otherwise, we return the value
* returned by the mailbox functions.
*
* To avoid overloading the lower-level mailbox routines, we use a taskq
* to serialize all messages. Currently, it is expected that only one
* process (fmd) will use this ioctl, so the delay caused by the taskq
* should not have much of an effect.
*/
int
{
int ret;
return (ENOMEM);
}
/*
* It's possible that the taskq function completed before we
* acquired the mutex. Check for this first. If this did not
* happen, we wait for the taskq function to signal us, or an
* interrupt. We also check ecc_msg_status to protect against
* spurious wakeups from cv_wait_sig.
*/
} else {
&plat_ecc_mutex)) != 0 &&
;
/* An interrupt was received */
} else {
}
}
return (ret);
}
static void
{
int ret;
/*
* Send this data off as a mailbox message to the SC.
*/
/*
* If the dispatching function received an interrupt, don't bother
* signalling it, and throw away the results. Otherwise, set the
* return value and signal the condvar.
*/
} else {
}
}
void
plat_ecc_init(void)
{
int bd;
}
}