/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#include <sys/privregs.h>
#include <sys/pci_impl.h>
#include <sys/x86_archext.h>
#include <sys/sysmacros.h>
#include <sys/sysevent.h>
#include <sys/cpu_module.h>
#include "ao.h"
#include "ao_mca_disp.h"
extern int x86gentopo_legacy; /* x86 generic topology support */
struct ao_ctl_init {
};
/*
* Additional NB MCA ctl initialization for revs F and G
*/
{ X86_CHIPREV_UNKNOWN, 0 }
};
typedef struct ao_bank_cfg {
};
/*
* This is quite awful but necessary to work around x86 system vendor's view of
* the world. Other operating systems (you know who you are) don't understand
* Opteron-specific error handling, so BIOS and system vendors often hide these
* conditions from them by using SMI polling to copy out any errors from the
* machine-check registers. When Solaris runs on a system with this feature,
* we want to disable the SMI polling so we can use FMA instead. Sadly, there
* isn't even a standard self-describing way to express the whole situation,
* so we have to resort to hard-coded values. This should all be changed to
* be a self-describing vendor-specific SMBIOS structure in the future.
*/
static const struct ao_smi_disable {
} ao_smi_disable[] = {
{ "Sun Microsystems", "Galaxy12",
"American Megatrends", 0x59 },
{ "Sun Microsystems", "Sun Fire X4100 Server",
"American Megatrends", 0x59 },
{ "Sun Microsystems", "Sun Fire X4200 Server",
"American Megatrends", 0x59 },
};
static int
{
AO_MCA_R4_BIT_ERR, /* MCAX86_ERRCODE_RRRR_ERR */
AO_MCA_R4_BIT_RD, /* MCAX86_ERRCODE_RRRR_RD */
AO_MCA_R4_BIT_WR, /* MCAX86_ERRCODE_RRRR_WR */
AO_MCA_R4_BIT_DRD, /* MCAX86_ERRCODE_RRRR_DRD */
AO_MCA_R4_BIT_DWR, /* MCAX86_ERRCODE_RRRR_DWR */
AO_MCA_R4_BIT_IRD, /* MCAX86_ERRCODE_RRRR_IRD */
AO_MCA_R4_BIT_PREFETCH, /* MCAX86_ERRCODE_RRRR_PREFETCH */
AO_MCA_R4_BIT_EVICT, /* MCAX86_ERRCODE_RRRR_EVICT */
AO_MCA_R4_BIT_SNOOP /* MCAX86_ERRCODE_RRRR_SNOOP */
};
}
static int
{
AO_MCA_PP_BIT_SRC, /* MCAX86_ERRCODE_PP_SRC */
AO_MCA_PP_BIT_RES, /* MCAX86_ERRCODE_PP_RES */
AO_MCA_PP_BIT_OBS, /* MCAX86_ERRCODE_PP_OBS */
AO_MCA_PP_BIT_GEN /* MCAX86_ERRCODE_PP_GEN */
};
}
static int
{
AO_MCA_II_BIT_MEM, /* MCAX86_ERRCODE_II_MEM */
0,
AO_MCA_II_BIT_IO, /* MCAX86_ERRCODE_II_IO */
AO_MCA_II_BIT_GEN /* MCAX86_ERRCODE_II_GEN */
};
}
static uint8_t
{
return (val);
}
/*ARGSUSED*/
static int
int bankno)
{
/*
* If the bank's status register indicates overflow, then we can no
* longer rely on the value of CECC: our experience with actual fault
* injection has shown that multiple CE's overwriting each other shows
* AMD_BANK_STAT_CECC and AMD_BANK_STAT_UECC both set to zero. This
* should be clarified in a future BKDG or by the Revision Guide.
* This behaviour is fixed in revision F.
*/
if (bankno == AMD_MCA_BANK_NB &&
status & MSR_MC_STATUS_OVER) {
}
return (0);
/*
* r4 and pp bits are stored separately, so we mask off and compare them
* for the code types that use them. Once we've taken the r4 and pp
* bits out of the equation, we can directly compare the resulting code
* with the one stored in the ao_error_disp_t.
*/
if (AMD_ERRCODE_ISMEM(code)) {
return (0);
} else if (AMD_ERRCODE_ISBUS(code)) {
return (0);
}
}
/*ARGSUSED*/
{
return ((cms_cookie_t)aed);
}
return (NULL);
}
/*ARGSUSED*/
void
{
}
}
static int
{
}
/*
* This knob exists in case any platform has a problem with our default
* policy of disabling any interrupt registered in the NB MC4_MISC
* register. Setting this may cause Solaris and external entities
* who also have an interest in this register to argue over available
* telemetry (so setting it is generally not recommended).
*/
int ao_nb_cfg_mc4misc_noseize = 0;
/*
* The BIOS may have setup to receive SMI on counter overflow. It may also
* have locked various fields or made them read-only. We will clear any
* SMI request and leave the register locked. We will also clear the
* counter and enable counting - while we don't use the counter it is nice
* to have it enabled for verification and debug work.
*/
static void
{
return;
return;
return; /* stash BIOS value, but no changes */
/*
* The Valid bit tells us whether the CtrP bit is defined; if it
* is the CtrP bit tells us whether an ErrCount field is present.
* If not then there is nothing for us to do.
*/
return;
if (locked)
if (locked)
}
}
/*
* NorthBridge (NB) MCA Configuration.
*
* We add and remove bits from the BIOS-configured value, rather than
* writing an absolute value. The variables ao_nb_cfg_{add,remove}_cmn and
* ap_nb_cfg_{add,remove}_revFG are available for modification via kmdb
* after the common changes, and one write is made to the config register.
* These are not intended for watchdog configuration via these variables -
* use the watchdog policy below.
*/
/*
* Bits to be added to the NB configuration register - all revs.
*/
/*
* Bits to be cleared from the NB configuration register - all revs.
*/
/*
* Bits to be added to the NB configuration register - revs F and G.
*/
/*
* Bits to be cleared from the NB configuration register - revs F and G.
*/
struct ao_nb_cfg {
};
};
/*
* Bits to be used if we configure the NorthBridge (NB) Watchdog. The watchdog
* triggers a machine check exception when no response to an NB system access
* occurs within a specified time interval.
*/
/*
* The default watchdog policy is to enable it (at the above rate) if it
* is disabled; if it is enabled then we leave it enabled at the rate
* chosen by the BIOS.
*/
enum {
static void
{
/*
* Read the NorthBridge (NB) configuration register in PCI space,
* modify the settings accordingly, and store the new value back.
* Note that the stashed BIOS config value aos_bcfg_nb_cfg is used
* in ereport payload population to determine ECC syndrome type for
* memory errors.
*/
switch (ao_nb_watchdog_policy) {
case AO_NB_WDOG_LEAVEALONE:
break;
case AO_NB_WDOG_DISABLE:
break;
default:
"using default policy", ao_nb_watchdog_policy);
/*FALLTHRU*/
if (!(val & AMD_NB_CFG_WDOGTMRDIS))
break; /* if enabled leave rate intact */
/*FALLTHRU*/
val &= ~AMD_NB_CFG_WDOGTMRDIS;
val |= ao_nb_cfg_wdog;
break;
}
/*
* Now apply bit adds and removes, first those common to all revs
* and then the revision-specific ones.
*/
val &= ~ao_nb_cfg_remove_cmn;
val |= ao_nb_cfg_add_cmn;
}
nbcp++;
}
}
static void
{
#ifdef OPTERON_ERRATUM_172
}
#endif
}
/*
* This knob exists in case any platform has a problem with our default
* policy of disabling any interrupt registered in the online spare
* control register. Setting this may cause Solaris and external entities
* who also have an interest in this register to argue over available
* telemetry (so setting it is generally not recommended).
*/
int ao_nb_cfg_sparectl_noseize = 0;
/*
* Setup the online spare control register (revs F and G). We disable
* any interrupt registered by the BIOS and zero all error counts.
*/
static void
{
return; /* stash BIOS value, but no changes */
/*
* If the BIOS has requested SMI interrupt type for ECC count
* overflow for a chip-select or channel force those off.
*/
/*
*/
}
}
}
{
int en;
if (ao_forgive_uc)
if (ao_forgive_pcc)
if (retval)
return (retval);
mslogout);
/*
* If we do not recognise the error let the cpu module apply
* the generic criteria to decide how to react.
*/
return (0);
if ((when & AO_AED_PANIC_ALWAYS) ||
/*
* The original AMD implementation would panic on a machine check
* (not a poll) if the status overflow bit was set, with an
* exception for the case of rev F or later with an NB error
* indicating CECC. This came from the perception that the
* overflow bit was not correctly managed on rev E and earlier, for
* example that repeated correctable memeory errors did not set
* OVER but somehow clear CECC.
*
* We will leave the generic support to evaluate overflow errors
* and decide to panic on their individual merits, e.g., if PCC
* is set and so on. The AMD docs do say (as Intel does) that
* the status information is *all* from the higher-priority
* error in the case of an overflow, so it is at least as serious
* as the original and we can decide panic etc based on it.
*/
return (retval);
}
/*
* Will need to change for family 0x10
*/
static uint_t
int is_nb)
{
if (is_nb) {
return (AMD_NB_STAT_CKSYND(status));
} else {
return (AMD_BANK_SYND(status));
}
} else {
return (AMD_BANK_SYND(status));
}
}
static nvlist_t *
{
return (NULL);
return (NULL);
}
unump->unum_offset);
if (!x86gentopo_legacy) {
if (board_list == NULL) {
return (NULL);
}
board_list, 4,
} else {
}
return (nvl);
}
static void
{
int nelems = 0;
int i;
for (i = 0; i < MC_UNUM_NDIMM; i++) {
break;
break;
nelems++;
}
if (nelems == 0)
return;
for (i = 0; i < nelems; i++)
}
/*ARGSUSED*/
void
{
return;
banknum == AMD_MCA_BANK_NB);
if (members & FM_EREPORT_PAYLOAD_FLAG_SYND) {
}
if (members & FM_EREPORT_PAYLOAD_FLAG_SYND_TYPE) {
}
if (members & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
}
}
/*ARGSUSED*/
{
return (0);
return ((aed->aed_ereport_members &
FM_EREPORT_PAYLOAD_FLAG_STACK) != 0);
}
{
rv = CMS_SUCCESS;
return (rv);
}
/*ARGSUSED*/
{
}
{
if (banknum != AMD_MCA_BANK_NB)
return (B_FALSE);
/*
* If we are the first to atomically set the "I'll do it" bit
* then return B_FALSE (do not skip), otherwise skip with B_TRUE.
*/
}
{
return (def);
extrap++;
}
return (mcictl);
}
/*ARGSUSED*/
void
{
#ifndef __xpv
return;
if (!(hwcr & AMD_HWCR_MCI_STATUS_WREN)) {
}
#endif
}
/*ARGSUSED*/
void
{
#ifndef __xpv
if (!(hwcr & AMD_HWCR_MCI_STATUS_WREN)) {
}
#endif
}
void
{
int i;
/*
* Read the bank ctl mask MSRs, but only as many as we know
* certainly exist - don't calculate the register address.
* Also initialize the MCi_MISC register where required.
*/
maskp++);
}
}
}
/*
* Note that although this cpu module is loaded before the PSMs are
* loaded (and hence before acpica is loaded), this function is
* called from post_startup(), after PSMs are initialized and acpica
* is loaded.
*/
static int
{
/*
* AcpiGetTable works even if ACPI is disabled, so a failure
* here means we weren't able to retreive a pointer to the FADT.
*/
return (-1);
return (0);
}
/*ARGSUSED*/
void
{
/*
* Fetch the System and BIOS vendor strings from SMBIOS and see if they
* match a value in our table. If so, disable SMI error polling. This
* is grotesque and should be replaced by self-describing vendor-
* specific SMBIOS data or a specification enhancement instead.
*/
continue;
/*
* Look for the SMI_CMD port in the ACPI FADT,
* if the port is 0, this platform doesn't support
* SMM, so there is no SMI error polling to disable.
*/
asd_port != 0) {
"favor of Solaris Fault Management for "
"AMD Processors\n");
} else if (rv < 0) {
"for AMD Processors could not disable SMI "
"polling because an error occurred while "
"trying to determine the SMI command port "
"from the ACPI FADT table\n");
}
break;
}
}
}