/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* PX Fault Management Architecture
*/
#include "px_obj.h"
extern uint_t px_ranges_phi_mask;
#define PX_PCIE_PANIC_BITS \
#define PX_PCIE_NO_PANIC_BITS \
/*
* Global panicing state variabled used to control if further error handling
* should occur. If the system is already panic'ing or if PX itself has
* recommended panic'ing the system, no further error handling should occur to
* prevent the system from hanging.
*/
#if defined(DEBUG)
#else /* DEBUG */
#define px_pcie_log 0 &&
#endif /* DEBUG */
/*
* Initialize px FMA support
*/
int
{
int i;
/*
* check parents' capability
*/
/*
* parents need to be ereport and error handling capable
*/
/*
* Initialize lock to synchronize fabric error handling
*/
px_p->px_pfd_idx = 0;
for (i = 0; i < 5; i++)
return (DDI_SUCCESS);
}
/*
* Deregister FMA
*/
void
{
int i;
for (i = 0; i < 5; i++)
}
/*
* register error callback in parent
*/
void
{
}
void
{
}
/*
* Function used to setup access functions depending on level of desired
* protection.
*/
void
{
switch (fflag) {
case DDI_FLAGERR_ACC:
else
break;
case DDI_CAUTIOUS_ACC :
else
break;
default:
/* Illegal state, remove the handle from cache */
break;
}
}
}
/*
* Function used to initialize FMA for our children nodes. Called
* through pci busops when child node calls ddi_fm_init.
*/
/*ARGSUSED*/
int
{
}
/*
* lock access for exclusive PCIe access
*/
void
{
/*
* Both utilize i_ddi_ontrap which, on sparcv9, implements
* similar protection as what on_trap() does, and which calls
* membar #Sync to flush out all cpu deferred errors
* membar #Sync - a difference from what's in pci_bus_enter().
*/
}
/*
* unlock access for exclusive PCIe access
*/
/* ARGSUSED */
void
{
}
static uint64_t
{
return (addr_low);
return (0);
}
/*
* PCI error callback which is registered with our parent to call
* for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors
*/
/*ARGSUSED*/
int
{
int i, acc_type = 0;
int range_len;
/*
* If the current thread already owns the px_fm_mutex, then we
* have encountered an error while processing a previous
* error. Attempting to take the mutex again will cause the
* system to deadlock.
*/
return (DDI_FM_FATAL);
return (DDI_FM_FATAL);
}
/*
* Make sure this failed load came from this PCIe port. Check by
* matching the upper 32 bits of the address with the ranges property.
*/
i = 0;
if (base_addr) {
case PCI_ADDR_CONFIG:
0xFFFF);
break;
case PCI_ADDR_IO:
case PCI_ADDR_MEM64:
case PCI_ADDR_MEM32:
break;
}
break;
}
}
/* This address doesn't belong to this leaf, just return with OK */
if (!acc_type) {
return (DDI_FM_OK);
}
bdf);
/* Update affected info, either addr or bdf is not NULL */
if (addr) {
} else if (PCIE_CHECK_VALID_BDF(bdf)) {
}
if (!px_die)
return (DDI_FM_OK);
(fab_err & PF_ERR_FATAL_FLAGS) ||
(lookup == PF_HDL_NOTFOUND))
return (DDI_FM_FATAL);
return (DDI_FM_OK);
return (DDI_FM_NONFATAL);
}
/*
* px_err_fabric_intr:
* Interrupt handler for PCIE fabric block.
* o lock
* o create derr
* o px_err_cmn_intr(leaf, with jbc)
* o send ereport(fire fmri, derr, payload = BDF)
* o dispatch (leaf)
* o unlock
* o handle error: fatal? fm_panic() : return INTR_CLAIMED)
*/
/* ARGSUSED */
{
goto done;
/* Create the derr */
if (msg_code == PCIE_MSG_CODE_ERR_COR) {
ue_source = 0;
} else {
ce_source = 0;
if (msg_code == PCIE_MSG_CODE_ERR_NONFATAL)
else {
}
}
"pcie_adv_rp_command", DATA_TYPE_UINT32, 0,
NULL);
}
/* Ensure that the rid of the fabric message will get scanned. */
/* call rootport dispatch */
done:
return (DDI_INTR_CLAIMED);
}
/*
* px_scan_fabric:
*
* Check for drain state and if there is anything to scan.
*
* Note on pfd: Different interrupts will populate the pfd's differently. The
* px driver can have a total of 5 different error sources, so it has a queue of
* 5 pfds. Each valid PDF is linked together and passed to pf_scan_fabric.
*
* Each error handling will populate the following info in the pfd
*
* Root Fault Intr Src Affected BDF
* ----------------+---------------+------------
* Mondo 62/63 (sun4u) decode error |N/A |N/A
* EPKT (sun4v) decode epkt |INTERNAL |decode epkt
* Fabric Message fabric payload |FABRIC |NULL
* ----------------+---------------+------------
*/
int
int fab_err = 0;
}
return (fab_err);
}
/*
* px_err_safeacc_check:
* done on a particular leaf.
*
* Safe access reads induced fire errors will be handled by cpu trap handler
* which will call px_fm_callback() which calls this function. In that
* case, the derr fields will be set by trap handler with the correct values.
*
* Safe access writes induced errors will be handled by px interrupt
* handlers, this function will fill in the derr fields.
*
* If a cpu trap does occur, it will quiesce all other interrupts allowing
* the cpu trap error handling to finish before Fire receives an interrupt.
*
* If fire does indeed have an error when a cpu trap occurs as a result of
* In which case derr will be initialized as "UNEXPECTED" by the interrupt
* handler and this function will need to find if this error occured in the
* middle of a safe access operation.
*
* @param px_p leaf in which to check access
* @param derr fm err data structure to be updated
*/
void
{
return;
}
/* safe access checking */
switch (acctype) {
case DDI_FM_ERR_EXPECTED:
/*
* cautious access protection, protected from all err.
*/
break;
case DDI_FM_ERR_POKE:
/*
* ddi_poke protection, check nexus and children for
* expected errors.
*/
membar_sync();
break;
case DDI_FM_ERR_PEEK:
break;
}
}
/*
* Suggest panic if any EQ (except CE q) has overflown.
*/
int
{
int i;
for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
continue;
return (PX_PANIC);
}
return (PX_NO_PANIC);
}
/* ARGSUSED */
int
{
int i;
/*
* set RC s_status in PCI term to coordinate with downstream fabric
* errors ananlysis.
*/
if (!regs->primary_ue)
goto done;
adv_reg->pcie_adv_ctl++;
if (adv_reg->pcie_ue_tgt_trans ==
}
/*
* Normally for Poisoned Completion TLPs we can look at the
* transmit log header for the original request and the original
* address, however this doesn't seem to be working. HW BUG.
*/
}
done:
/* Return No Error here and let the pcie misc module analyse it */
return (PX_NO_ERROR);
}
#if defined(DEBUG)
static void
{
"A PCIe RC error has occured\n"
"\tCE: 0x%x UE: 0x%x Primary UE: 0x%x\n"
"\tTX Hdr: 0x%x 0x%x 0x%x 0x%x\n\tRX Hdr: 0x%x 0x%x 0x%x 0x%x\n",
}
#endif
/*
* handle lookup.
*/
static int
{
return (PX_PANIC);
goto done;
if (tlp_sts == DDI_FAILURE)
goto done;
switch (tlp_cmd) {
case PCIE_TLP_TYPE_CPL:
case PCIE_TLP_TYPE_CPLLK:
/*
* Usually a PTLP is a CPL with data. Grab the completer BDF
* from the RX TLP, and the original address from the TX TLP.
*/
if (lookup != DDI_SUCCESS)
break;
} /* FALLTHRU */
case PCIE_TLP_TYPE_IO:
case PCIE_TLP_TYPE_MEM:
case PCIE_TLP_TYPE_MEMLK:
bdf);
break;
default:
}
done:
}
/*
* px_get_pdf automatically allocates a RC pf_data_t and returns a pointer to
* it. This function should be used when an error requires a fabric scan.
*/
/* Clear Old Data */
if (idx > 0) {
} else {
}
pfd_p->pe_severity_flags = 0;
pfd_p->pe_orig_severity_flags = 0;
return (pfd_p);
}
/*
* This function appends a pf_data structure to the error q which is used later
* during PCIe fabric scan. It signifies:
*
* scan_bdf: The bdf that caused the fault, which may have error bits set.
* scan_addr: The PIO addr that caused the fault, such as failed PIO, but not
* failed DMAs.
* s_status: Secondary Status equivalent to why the fault occured.
* Either the scan bdf or addr may be NULL, but not both.
*/
{
return (NULL);
return (pfd_p);
}
/*
* Find and Mark CFG Handles as failed associated with the given BDF. We should
* always know the BDF for CFG accesses, since it is encoded in the address of
* the TLP. Since there can be multiple cfg handles, mark them all as failed.
*/
/* ARGSUSED */
int
const void *arg2)
{
/*
* Because CFG and IO Acc Handlers are on the same cache list and both
* types of hdls gets called for both types of errors. For this checker
* only mark the device as "Non-Fatal" if the addr == NULL and bdf !=
* NULL.
*/
return (status);
}
/*
* Find and Mark all ACC Handles associated with a give address and BDF as
* failed. If the BDF != NULL, then check to see if the device has a ACC Handle
* associated with ADDR. If the handle is not found, mark all the handles as
* failed. If the BDF == NULL, mark the handle as failed if it is associated
* with ADDR.
*/
int
const void *arg2)
{
int range_len;
/*
* Find the correct px dip. On system with a real Root Port, it's the
* node above the root port. On systems without a real Root Port the px
* dip is the bus_rp_dip.
*/
/* Normalize the base addr to the addr and strip off the HB info. */
i = 0;
if (range_addr) {
case PCI_ADDR_IO:
case PCI_ADDR_MEM64:
case PCI_ADDR_MEM32:
break;
}
break;
}
}
/*
* Mark the handle as failed if the ADDR is mapped, or if we
* know the BDF and ADDR == 0.
*/
return (status);
}
/*
* Find and Mark all DNA Handles associated with a give address and BDF as
* failed. If the BDF != NULL, then check to see if the device has a DMA Handle
* associated with ADDR. If the handle is not found, mark all the handles as
* failed. If the BDF == NULL, mark the handle as failed if it is associated
* with ADDR.
*/
int
const void *arg2)
{
/*
* Mark the handle as failed if the ADDR is mapped, or if we
* know the BDF and ADDR == 0.
*/
return (status);
}
int
return (DDI_FAILURE);
/*
* In rare cases when trap occurs and in the middle of scanning the
* fabric, a PIO will fail in the scan fabric. The CPU error handling
* code will correctly panic the system, while a mondo for the failed
* PIO may also show up. Normally the mondo will try to grab the mutex
* and wait until the callback finishes. But in this rare case,
* mutex_enter actually suceeds also continues to scan the fabric.
*
* This code below is designed specifically to check for this case. If
* we successfully grab the px_fm_mutex, the px_fm_mutex_owner better be
* NULL. If it isn't that means we are in the rare corner case. Return
* DDI_FAILURE, this should prevent PX from doing anymore error
* handling.
*/
if (px_p->px_fm_mutex_owner) {
return (DDI_FAILURE);
}
if (px_panicing) {
return (DDI_FAILURE);
}
/* Signal the PCIe error handling module error handling is starting */
return (DDI_SUCCESS);
}
static void
{
/*
* check if all devices under the root device are unassigned.
* this function should quickly return in non-IOV environment.
*/
return;
/* assume all affected devs were in the error Q */
continue;
if (PCIE_IS_BDG(bus_p)) {
while (rl) {
}
} else {
}
/* clear panic flag */
}
}
void
if (px_p->px_pfd_idx == 0) {
return;
}
/* panic the affected domains that are non-fma-capable */
/* Signal the PCIe error handling module error handling is ending */
px_p->px_pfd_idx = 0;
}
/*
* Panic if the err tunable is set and that we are not already in the middle
* of panic'ing.
*
* rc_err = Error severity of PX specific errors
* msg = Where the error was detected
* fabric_err = Error severity of PCIe Fabric errors
* isTest = Test if error severity causes panic
*/
void
{
if (panicstr) {
return;
}
goto fabric;
if (fabric_err & PF_ERR_FATAL_FLAGS)
ferr = PX_NO_PANIC;
}
}
if (!isTest)
fm_panic("Fatal error has occured in:%s.(0x%x)(0x%x)",
}
}