/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* PCI ECC support
*/
#include <sys/ddi_impldefs.h>
#include <sys/machsystm.h>
#include <sys/sysmacros.h>
#include <sys/errclassify.h>
#include <sys/cpu_module.h>
/*LINTLIBRARY*/
static void ecc_disable(ecc_t *, int);
static void ecc_delayed_ce(void *);
void
{
#ifdef DEBUG
#endif
/*
* Determine the virtual addresses of the streaming cache
*/
/*
* Register routines to be called from system error handling code.
*/
}
int
{
int ret;
/*
* Install the UE and CE error interrupt handlers.
*/
return (ret);
return (ret);
return (DDI_SUCCESS);
}
void
{
/*
* Disable UE and CE ECC error interrupts.
*/
/*
* Remove the ECC interrupt handlers.
*/
/*
* Unregister our error handling functions.
*/
/*
* If a timer has been set, unset it.
*/
}
void
{
uint64_t l;
/*
* Clear any pending ECC errors.
*/
/*
* Enable ECC error detections via the control register.
*/
}
void
{
}
void
{
}
{
return (BF_NONE);
}
static void
{
}
/*
* I/O ECC error handling:
*
* Below are the generic functions that handle PCI(pcisch, pcipsy) detected
* ECC errors.
*
* The registered interrupt handler for both pcisch and pcipsy is ecc_intr(),
* it's function is to receive the error, capture some state, and pass that on
* to the ecc_err_handler() for reporting purposes.
*
* ecc_err_handler() gathers more state(via ecc_errstate_get) and attempts
* to handle and report the error. ecc_err_handler() must determine if we need
* to panic due to this error (via pci_ecc_classify, which also decodes the
* ECC afsr), and if any side effects exist that may have caused or are due
* to this error. PBM errors related to the ECC error may exist, to report
* them we call pci_pbm_err_handler() and call ndi_fm_handler_dispatch() so
* that the child devices can log their pci errors.
*
* To report the error we must also get the syndrome and unum, which can not
* be done in high level interrupted context. Therefore we have an error
* queue(pci_ecc_queue) which we dispatch errors to, to report the errors
* (ecc_err_drain()).
*
* ecc_err_drain() will be called when either the softint is triggered
* or the system is panicing. Either way it will gather more information
* about the error from the CPU(via ecc_cpu_call(), ecc.c), attempt to
* retire the faulty page(if error is a UE), and report the detected error.
*
* ecc_delayed_ce() is called via timeout from ecc_err_handler() following
* the receipt of a CE interrupt. It will be called after 6ms and check to
* see if any new CEs are present, if so we will log and another timeout will
* be set by(ecc_err_handler()). If no CEs are present then it will re-enable
* CEs by clearing the previous interrupt. This is to keep the system going
* in the event of a CE storm.
*/
/*
* Function used to get ECC AFSR register
*/
static uint64_t
{
uint_t i;
if (!ecc_ii_p->ecc_errpndg_mask)
for (i = 0; i < pci_ecc_afsr_retries; i++) {
/*
* If we timeout, the logging routine will
* know because it will see the ERRPNDG bits
* set in the AFSR.
*/
break;
}
return (afsr);
}
/*
* IO detected ECC error interrupt handler, calls ecc_err_handler to post
* error reports and handle the interrupt. Re-entry into ecc_err_handler
* is protected by the per-chip mutex pci_fm_mutex.
*/
{
if (ret == DDI_FM_FATAL) {
/*
* Need delay here to allow CPUs to handle related traps,
* such as FRUs for USIIIi systems.
*/
fm_panic("Fatal PCI UE Error");
}
return (DDI_INTR_CLAIMED);
}
/*
* Function used to gather IO ECC error state.
*/
static void
{
/*
* Read the fault registers.
*/
}
/*
* ecc_pci_check: Called by ecc_err_handler() this function is responsible
* and calling their children error handlers(via ndi_fm_handler_dispatch()).
*/
static int
{
int i;
int ret;
/*
* Need to report any PBM errors which may have caused or
* resulted from this error.
*
* Each psycho or schizo is represented by a pair of pci nodes
* in the device tree.
*/
for (i = 0; i < 2; i++) {
/* Make sure PBM PCI node exists */
continue;
PCI_ECC_CALL) == DDI_FM_FATAL)
ret = DDI_FM_FATAL;
}
if (ret == DDI_FM_FATAL)
return (DDI_FM_FATAL);
else
return (DDI_FM_NONFATAL);
}
/*
* Function used to handle and log IO detected ECC errors, can be called by
* ecc_intr and pci_err_callback(trap callback). Protected by pci_fm_mutex.
*/
int
{
int fatal = 0;
int nonfatal = 0;
int i;
case CBNINTR_UE:
if (pri_err) {
sizeof (ecc_errstate_t),
}
if (sec_err) {
ecc_sec_err = *ecc_err_p;
ecc_sec_err.ecc_pri = 0;
/*
* Secondary errors are cumulative so we need to loop
* through to capture them all.
*/
for (i = 0; i < 3; i++) {
if (sec_tmp) {
&ecc_sec_err);
}
}
}
/*
* Check for PCI bus errors that may have resulted from or
* caused this UE.
*/
/*
* Disable all further errors since this will be
* treated as a fatal error.
*/
(void) ecc_disable_nowait(ecc_p);
fatal++;
}
break;
case CBNINTR_CE:
if (pri_err) {
nonfatal++;
}
if (sec_err) {
ecc_sec_err = *ecc_err_p;
ecc_sec_err.ecc_pri = 0;
/*
* Secondary errors are cumulative so we need to loop
* through to capture them all.
*/
for (i = 0; i < 3; i++) {
if (sec_tmp) {
&ecc_sec_err);
}
}
nonfatal++;
}
break;
default:
return (DDI_FM_OK);
}
/* Clear the errors */
/*
* Clear the interrupt if called by ecc_intr and UE error or if called
* by ecc_intr and CE error and delayed CE interrupt handling is
* turned off.
*/
return (DDI_FM_OK);
else if (fatal)
return (DDI_FM_FATAL);
return (DDI_FM_NONFATAL);
}
/*
* Called from ecc_err_drain below for CBINTR_CE case.
*/
static int
{
return (0);
return (0);
} else {
}
}
/*
* Function used to drain pci_ecc_queue, either during panic or after softint
* is generated, to log IO detected ECC errors.
*/
/*ARGSUSED*/
void
{
/*
* Perform any additional actions that occur after the
* ecc_err_cexdiag below and post the ereport.
*/
return;
}
switch (ecc_type) {
case CBNINTR_UE:
}
break;
case CBNINTR_CE:
/*
* Setup timeout (if CE detected via interrupt) to
* re-enable CE interrupts if no more CEs are detected.
* This is to protect against CE storms.
*/
if (ecc_ce_delayed &&
MICROSEC));
}
/* ecc_err_cexdiag returns nonzero to recirculate */
return;
break;
}
}
static void
{
/*
* If no more CE errors are found then enable interrupts(by
* clearing the previous interrupt), else send in for logging
* and the timeout should be set again.
*/
} else {
(void) ecc_err_handler(&ecc_err);
}
}
/*
* Function used to post IO detected ECC ereports.
*/
static void
{
/*
* We do not use ddi_fm_ereport_post because we need to set a
* special detector here. Since we do not have a device path for
* the bridge chip we use what we think it should be to aid in
* diagnosis. This path fmri is created by pci_fmri_create()
* during initialization.
*/
return;
if (ptr)
*ptr = '\0';
int len;
int ret;
if (ret == 0) {
(void) cpu_get_mem_offset(
}
}
NULL);
} else {
NULL);
}
}