/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#include <sys/sysmacros.h>
#include <sys/pcie_impl.h>
/* PCIe Fault Fabric Error analysis table */
typedef struct pf_fab_err_tbl {
/*
* Secondary affected flag, effective when the information
* indicated by the primary flag is not available, eg.
*/
/* Functions for scanning errors */
/* Functions for gathering errors */
/* Functions for analysing errors */
static void pf_adjust_for_no_aer(pf_data_t *);
static void pf_adjust_for_no_saer(pf_data_t *);
uint32_t);
/* PCIe Fabric Handle Lookup Support Functions. */
pcie_req_id_t, ndi_fmc_t *);
static void pf_handler_exit(dev_info_t *);
static void pf_reset_pfd(pf_data_t *);
/* Inform interested parties that error handling is about to begin. */
/* ARGSUSED */
void
}
/* Inform interested parties that error handling has ended. */
void
{
/* Clear affected device info and INTR SRC */
}
}
}
/*
* caller may create a local pf_data_t with the "root fault"
* information populated to either do a precise or full scan. More
* than one pf_data_t maybe linked together if there are multiple
* errors. Only a PCIe compliant Root Port device may pass in NULL
* for the root_pfd_p.
*
* "Root Complexes" such as NPE and PX should call scan_fabric using itself as
* the rdip. PCIe Root ports should call pf_scan_fabric using it's parent as
* the rdip.
*
* Scan fabric initiated from RCs are likely due to a fabric message, traps or
*
* This code assumes that by the time pf_scan_fabric is
* called, pf_handler_enter has NOT been called on the rdip.
*/
int
{
if (pcie_disable_scan)
return (analyse_flag);
/* Find the head and tail of this link list */
;
/* If scan is initiated from RP then RP itself must be scanned. */
!root_pfd_p) {
if (scan_flag & PF_SCAN_DEADLOCK)
goto done;
if (scan_flag & PF_SCAN_NO_ERR_IN_CHILD)
goto done;
}
/*
* Scan the fabric using the scan_bdf and scan_addr in error q.
* scan_bdf will be valid in the following cases:
* - Fabric message
* - Poisoned TLP
* - PIO load failures
*/
if (full_scan ||
if (full_scan)
break;
}
done:
/*
* If this is due to safe access, don't analyze the errors and return
* success regardless of how scan fabric went.
*/
} else {
}
/*
* Check if any hardened driver's callback reported a panic.
* If so panic.
*/
if (scan_flag & PF_SCAN_CB_FAILURE)
/*
* If a deadlock was detected, panic the system as error analysis has
* been compromised.
*/
if (scan_flag & PF_SCAN_DEADLOCK)
return (analyse_flag);
}
void
}
/*
* pf_dispatch walks the device tree and calls the pf_default_hdl if the device
* falls in the error path.
*
* Returns PF_SCAN_* flags
*/
static int
{
/* Make sure dip is attached and ready */
continue;
if (scan_flag & PF_SCAN_DEADLOCK)
break;
/*
* Handle this device if it is a:
* o Full Scan
* o Fault BDF = Device BDF
*/
if (full_scan ||
/*
* A bridge may have detected no errors in which case
* there is no need to scan further down.
*/
if (hdl_flag & PF_SCAN_NO_ERR_IN_CHILD)
continue;
} else {
continue;
}
/* match or in bridge bus-range */
switch (bus_p->bus_dev_type) {
break;
case PCIE_PCIECAP_DEV_TYPE_UP:
{
/*
* Continue if the fault BDF != the switch or there is a
* parity error
*/
break;
}
/*
* Reached a PCIe end point so stop. Note dev_type
* PCI_DEV is just a PCIe device that requires IO Space
*/
break;
if (PCIE_IS_BDG(bus_p))
break;
default:
}
}
return (scan_flag);
}
{
/* check if given bdf falls within bridge's bus range */
if (PCIE_IS_BDG(bus_p) &&
return (B_TRUE);
else
return (B_FALSE);
}
/*
* Return whether the "addr" is in the assigned addr of a device.
*/
{
uint_t i;
return (B_TRUE);
}
return (B_FALSE);
}
/*
* "addr" is in the assigned addr of a device.
*/
static boolean_t
{
uint_t i;
if (!addr)
return (B_FALSE);
/* check if given address belongs to this device */
return (B_TRUE);
/* check if given address belongs to a child below this device */
if (!PCIE_IS_BDG(bus_p))
return (B_FALSE);
case PCI_ADDR_IO:
case PCI_ADDR_MEM32:
return (B_TRUE);
break;
case PCI_ADDR_MEM64:
return (B_TRUE);
break;
}
}
return (B_FALSE);
}
static pcie_bus_t *
{
if (!bus_p)
return (NULL);
return (NULL);
return (bus_p);
}
static void
{
if (bdg) {
} else {
}
}
static void
{
/*
* For PCI-X device PCI-X Capability only exists for Type 0 Headers.
* PCI-X Bridge Capability only exists for Type 1 Headers.
* Both capabilities do not exist at the same time.
*/
if (PCIE_IS_BDG(bus_p)) {
if (PCIX_ECC_VERSION_CHECK(bus_p)) {
/*
* PCI Express to PCI-X bridges only implement the
* secondary side of the PCI-X ECC registers, bit one is
* read-only so we make sure we do not write to it.
*/
if (!PCIE_IS_PCIE_BDG(bus_p)) {
0);
1);
}
}
} else {
if (PCIX_ECC_VERSION_CHECK(bus_p))
B_TRUE);
}
}
static void
{
if (PCIE_IS_ROOT(bus_p)) {
}
if (!PCIE_HAS_AER(bus_p))
return;
/* Gather UE AERs */
PCIE_AER_HDR_LOG + 0x4);
PCIE_AER_HDR_LOG + 0x8);
PCIE_AER_HDR_LOG + 0xc);
/* Gather CE AERs */
/*
* If pci express to pci bridge then grab the bridge
* error registers.
*/
if (PCIE_IS_PCIE_BDG(bus_p)) {
PCIE_AER_SHDR_LOG + 0x4);
PCIE_AER_SHDR_LOG + 0x8);
PCIE_AER_SHDR_LOG + 0xc);
}
/*
* If PCI Express root port then grab the root port
* error registers.
*/
if (PCIE_IS_ROOT(bus_p)) {
}
}
static void
{
/*
* Start by reading all the error registers that are available for
*/
/*
* If pci-pci bridge grab PCI bridge specific error registers.
*/
if (PCIE_IS_BDG(bus_p)) {
}
/*
* If pci express device grab pci express error registers and
* check for advanced error reporting features and grab them if
* available.
*/
if (PCIE_IS_PCIE(bus_p))
else if (PCIE_IS_PCIX(bus_p))
}
static void
{
if (PCIE_IS_BDG(bus_p)) {
if (PCIX_ECC_VERSION_CHECK(bus_p)) {
/*
* PCI Express to PCI-X bridges only implement the
* secondary side of the PCI-X ECC registers. For
* clearing, there is no need to "select" the ECC
* register, just write what was originally read.
*/
if (!PCIE_IS_PCIE_BDG(bus_p)) {
}
}
} else {
if (PCIX_ECC_VERSION_CHECK(bus_p)) {
}
}
}
static void
{
if (!PCIE_HAS_AER(bus_p))
return;
if (PCIE_IS_PCIE_BDG(bus_p)) {
}
/*
* If PCI Express root complex then clear the root complex
* error registers.
*/
if (PCIE_IS_ROOT(bus_p)) {
}
}
static void
{
if (PCIE_IS_PCIE(bus_p))
else if (PCIE_IS_PCIX(bus_p))
if (PCIE_IS_BDG(bus_p)) {
}
}
/* ARGSUSED */
void
{
}
/* Find the fault BDF, fault Addr or full scan on a PCIe Root Port. */
static void
{
int num_faults = 0;
/* Since this data structure is reused, make sure to reset it */
root_fault->scan_addr = 0;
if (!PCIE_HAS_AER(bus_p) &&
return;
}
/*
* Check to see if an error has been received that
* requires a scan of the fabric. Count the number of
* atleast 2 faults, so just return with full_scan.
*/
if ((root_err & PCIE_AER_RE_STS_MUL_CE_RCVD) ||
return;
}
if (root_err & PCIE_AER_RE_STS_CE_RCVD)
num_faults++;
num_faults++;
if (ue_err & PCIE_AER_UCE_CA)
num_faults++;
if (ue_err & PCIE_AER_UCE_UR)
num_faults++;
/* If no faults just return */
if (num_faults == 0)
return;
/* If faults > 1 do full scan */
if (num_faults > 1) {
return;
}
/* By this point, there is only 1 fault detected */
if (root_err & PCIE_AER_RE_STS_CE_RCVD) {
num_faults--;
} else if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) {
num_faults--;
DDI_SUCCESS)) {
num_faults--;
}
/*
* This means an error did occur, but we couldn't extract the fault BDF
*/
if (num_faults > 0)
}
/*
*
* Returns a scan flag.
* o PF_SCAN_SUCCESS - Error gathered and cleared sucessfuly, data added to
* Fault Q
* o PF_SCAN_BAD_RESPONSE - Unable to talk to device, item added to fault Q
* o PF_SCAN_CB_FAILURE - A hardened device deemed that the error was fatal.
* o PF_SCAN_NO_ERR_IN_CHILD - Only applies to bridge to prevent further
* unnecessary scanning
* o PF_SCAN_IN_DQ - This device has already been scanned; it was skipped this
* time.
*/
static int
{
/* Make sure this device hasn't already been snapshotted and cleared */
goto done;
}
/*
* could very well be a device that isn't responding anymore. Just
* stop. Save the basic info in the error q for post mortem debugging
* purposes.
*/
/*
* but populate affected info and severity. Clear out any data
* that maybe been saved in the last fabric scan.
*/
/* Add the snapshot to the error q */
return (PF_SCAN_BAD_RESPONSE);
}
if (PCIE_IS_RP(bus_p))
/* Add the snapshot to the error q */
done:
/*
* If a bridge does not have any error no need to scan any further down.
* For PCIe devices, check the PCIe device status and PCI secondary
* status.
* - Some non-compliant PCIe devices do not utilize PCIe
* error registers. If so rely on legacy PCI error registers.
* For PCI devices, check the PCI secondary status.
*/
if (PCIE_IS_PCIE_BDG(bus_p) &&
if (PCIE_IS_PCI_BDG(bus_p) &&
return (scan_flag);
}
/*
* Called during postattach to initialize a device's error handling
* capabilities. If the devices has already been hardened, then there isn't
* much needed. Otherwise initialize the device's default FMA capabilities.
*
* In a future project where PCIe support is removed from pcifm, several
* "properties" that are setup in ddi_fm_init and pci_ereport_setup need to be
*/
void
{
if (!bus_p) {
return;
}
if (fmhdl) {
/*
* If device is only ereport capable and not callback capable
* make it callback capable. The only downside is that the
* "fm-errcb-capable" property is not created for this device
* which should be ok since it's not used anywhere.
*/
} else {
int cap;
/*
* fm-capable in driver.conf can be used to set fm_capabilities.
* If fm-capable is not defined, set the default
* DDI_FM_EREPORT_CAPABLE and DDI_FM_ERRCB_CAPABLE.
*/
DDI_PROP_DONTPASS, "fm-capable",
if (cmd == DDI_ATTACH) {
}
if (cap & DDI_FM_ERRCB_CAPABLE)
}
/* If ddi_fm_init fails for any reason RETURN */
if (!fmhdl) {
bus_p->bus_fm_flags = 0;
return;
}
if (cmd == DDI_ATTACH) {
if (need_cb_register)
}
}
/* undo FMA lock, called at predetach */
void
{
if (!bus_p)
return;
/* Don't fini anything if device isn't FM Ready */
return;
/* no other code should set the flag to false */
/*
* Grab the mutex to make sure device isn't in the middle of
* error handling. Setting the bus_fm_flag to ~PF_FM_READY
* should prevent this device from being error handled after
* the mutex has been released.
*/
/* undo non-hardened drivers */
if (cmd == DDI_DETACH) {
/*
* ddi_fini itself calls ddi_handler_unregister,
* so no need to explicitly call unregister.
*/
}
}
}
/*ARGSUSED*/
static int
{
return (DDI_FM_OK);
}
/*
* Add PFD to queue. If it is an RC add it to the beginning,
* otherwise add it to the end.
*/
static void
{
if (!head_p) {
return;
}
/* Check if this is a Root Port eprt */
if (PFD_IS_ROOT(pfd_p)) {
/* The first item must be a RP */
/* root_p is the last RP pfd. last_p is the first non-RP pfd. */
if (last_p)
else
} else {
}
}
/*
* Ignore:
* - TRAINING: as leaves do not have children
* - SD: as leaves do not have children
*/
PF_AFFECTED_PARENT, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_PARENT, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
0, 0},
PF_AFFECTED_PARENT, 0},
PF_AFFECTED_PARENT, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
};
0, 0},
};
PF_AFFECTED_CHILDREN, 0},
0, 0},
};
0, 0},
0, 0},
0, 0},
0, 0},
PF_AFFECTED_CHILDREN, 0},
PF_AFFECTED_CHILDREN, 0},
0, 0},
0, 0},
};
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
};
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
PF_AFFECTED_SELF, 0},
};
/*
* Analyse all the PCIe Fault Data (erpt) gathered during dispatch in the erpt
* Queue.
*/
static int
{
sts_flags = 0;
/* skip analysing error when no error info is gathered */
goto done;
if (PCIE_DEVSTS_CE_DETECTED &
break;
break;
/* no adjust_for_aer for pseudo RC */
/* keep the severity passed on from RC if any */
break;
case PCIE_PCIECAP_DEV_TYPE_UP:
if (PCIE_DEVSTS_CE_DETECTED &
break;
if (PCIE_DEVSTS_CE_DETECTED &
/*
* Some non-compliant PCIe devices do not utilize PCIe
* error registers. So fallthrough and rely on legacy
* PCI error registers.
*/
break;
/* FALLTHROUGH */
break;
}
done:
/* Have pciev_eh adjust the severity */
}
return (error_flags);
}
static int
{
int err = 0;
continue;
/*
* check if the primary flag is valid;
* if not, use the secondary flag
*/
if (flags & PF_AFFECTED_AER) {
}
} else if (flags & PF_AFFECTED_SAER) {
}
} else if (flags & PF_AFFECTED_ADDR) {
/* only Root has this flag */
}
}
}
if (!err)
return (err);
}
/*
* PCIe Completer Abort and Unsupport Request error analyser. If a PCIe device
* this error may be safely ignored. If not check the logs and see if an
* associated handler for this transaction can be found.
*/
/* ARGSUSED */
static int
{
/* If UR's are masked forgive this error */
if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
(bit == PCIE_AER_UCE_UR))
return (PF_ERR_NO_PANIC);
/*
* such as a config read or a bad DMA address.
*/
goto handle_lookup;
if (bit == PCIE_AER_UCE_UR)
else
return (PF_ERR_MATCHED_RC);
return (PF_ERR_MATCHED_DEVICE);
return (PF_ERR_PANIC);
}
/*
* PCIe-PCI Bridge Received Master Abort and Target error analyser. If a PCIe
* then this error may be safely ignored. If not check the logs and see if an
* associated handler for this transaction can be found.
*/
/* ARGSUSED */
static int
{
/* If UR's are masked forgive this error */
if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
(bit == PCIE_AER_SUCE_RCVD_MA))
return (PF_ERR_NO_PANIC);
if (bit == PCIE_AER_SUCE_RCVD_MA)
else
return (PF_ERR_MATCHED_RC);
return (PF_ERR_PANIC);
return (PF_ERR_MATCHED_DEVICE);
return (PF_ERR_PANIC);
}
/*
* Generic PCI error analyser. This function is used for Parity Errors,
* Received Master Aborts, Received Target Aborts, and Signaled Target Aborts.
* In general PCI devices do not have error logs, it is very difficult to figure
* out what transaction caused the error. Instead find the nearest PCIe-PCI
* Bridge and check to see if it has logs and if it has an error associated with
* this PCI Device.
*/
/* ARGSUSED */
static int
{
return (PF_ERR_PANIC);
/* If UR's are masked forgive this error */
if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
(bit == PCI_STAT_R_MAST_AB))
return (PF_ERR_NO_PANIC);
} else {
}
if (parent_pfd_p == NULL)
return (PF_ERR_PANIC);
/* Check if parent bridge has seen this error */
return (PF_ERR_PANIC);
/*
* If the addr or bdf from the parent PCIe bridge logs belong to this
* PCI device, assume the PCIe bridge's error handling has already taken
* care of this PCI device's error.
*/
return (PF_ERR_PANIC);
return (PF_ERR_MATCHED_PARENT);
/*
* If this device is a PCI-PCI bridge, check if the bdf in the parent
* PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges.
* If they are, then assume the PCIe bridge's error handling has already
* taken care of this PCI-PCI bridge device's error.
*/
if (PCIE_IS_BDG(bus_p) &&
return (PF_ERR_MATCHED_PARENT);
return (PF_ERR_PANIC);
}
/*
* PCIe Bridge transactions associated with PERR.
* o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe
* o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe
* o Bridge received a poisoned Completion on a Split Transction from PCIe
* o Bridge received a poisoned Completion on a Delayed Transction from PCIe
*
* Check for non-poisoned PCIe transactions that got forwarded to the secondary
* side and detects a PERR#. Except for delayed read completions, a poisoned
* TLP will be forwarded to the secondary bus and PERR# will be asserted.
*/
/* ARGSUSED */
static int
{
return (PF_ERR_PANIC);
switch (cmd) {
case PCI_PCIX_CMD_IOWR:
case PCI_PCIX_CMD_MEMWR:
case PCI_PCIX_CMD_MEMWR_BL:
case PCI_PCIX_CMD_MEMWRBL:
/* Posted Writes Transactions */
B_FALSE);
break;
case PCI_PCIX_CMD_CFWR:
/*
* Check to see if it is a non-posted write. If so, a
* UR Completion would have been sent.
*/
goto done;
}
B_FALSE);
break;
case PCI_PCIX_CMD_SPL:
B_FALSE);
break;
case PCI_PCIX_CMD_DADR:
if (cmd != PCI_PCIX_CMD_DADR)
goto cmd_switch;
/* FALLTHROUGH */
default:
/* Unexpected situation, panic */
}
if (hdl_sts == PF_HDL_FOUND)
else
err = PF_ERR_PANIC;
} else {
/*
* Check to see if it is a non-posted write. If so, a UR
* Completion would have been sent.
*/
/* Check for posted writes. Transaction is lost. */
err = PF_ERR_PANIC;
/*
* All other scenarios are due to read completions. Check for
* PERR on the primary side. If found the primary side error
* handling will take care of this error.
*/
if (err == PF_ERR_NO_ERROR) {
else
err = PF_ERR_PANIC;
}
}
done:
return (err);
}
/*
* PCIe Poisoned TLP error analyser. If a PCIe device receives a Poisoned TLP,
* check the logs and see if an associated handler for this transaction can be
* found.
*/
/* ARGSUSED */
static int
{
/*
* If AERs are supported find the logs in this device, otherwise look in
* it's parent's logs.
*/
/*
* Double check that the log contains a poisoned TLP.
* Some devices like PLX switch do not log poison TLP headers.
*/
return (PF_ERR_MATCHED_DEVICE);
}
/*
* If an address is found and hdl lookup failed panic.
* Otherwise check parents to see if there was enough
* information recover.
*/
return (PF_ERR_PANIC);
}
/*
* Check to see if the rc has already handled this error or a parent has
* already handled this error.
*
* If the error info in the RC wasn't enough to find the fault device,
* such as if the faulting device lies behind a PCIe-PCI bridge from a
* poisoned completion, check to see if the PCIe-PCI bridge has enough
* info to recover. For completion TLP's, the AER header logs only
* contain the faulting BDF in the Root Port. For PCIe device the fault
* BDF is the fault device. But if the fault device is behind a
* PCIe-PCI bridge the fault BDF could turn out just to be a PCIe-PCI
* bridge's secondary bus number.
*/
if (!PFD_IS_ROOT(pfd_p)) {
return (PF_ERR_MATCHED_RC);
}
return (PF_ERR_MATCHED_PARENT);
} else {
/*
* Looking for a pcie bridge only makes sense if the BDF
*/
goto done;
goto done;
return pf_analyse_perr_assert(derr,
}
}
done:
return (PF_ERR_PANIC);
}
/*
* PCIe-PCI Bridge Received Master and Target abort error analyser on Split
* associated handler for this transaction can be found.
*/
/* ARGSUSED */
static int
{
return (PF_ERR_PANIC);
return (PF_ERR_PANIC);
if (cmd == PCI_PCIX_CMD_SPL)
if (sts == PF_HDL_FOUND)
return (PF_ERR_MATCHED_DEVICE);
return (PF_ERR_PANIC);
}
/*
* PCIe Timeout error analyser. This error can be forgiven if it is marked as
* CE Advisory. If it is marked as advisory, this means the HW can recover
*/
/* ARGSUSED */
static int
{
return (PF_ERR_NO_PANIC);
return (PF_ERR_PANIC);
}
/*
* PCIe Unexpected Completion. Check to see if this TLP was misrouted by
* matching the device BDF with the TLP Log. If misrouting panic, otherwise
* don't panic.
*/
/* ARGSUSED */
static int
{
return (PF_ERR_NO_PANIC);
/*
* This is a case of mis-routing. Any of the switches above this
* device could be at fault.
*/
return (PF_ERR_PANIC);
}
/*
* PCIe-PCI Bridge Uncorrectable Data error analyser. All Uncorrectable Data
* errors should have resulted in a PCIe Poisoned TLP to the RC, except for
* Posted Writes. Check the logs for Posted Writes and if the RC did not see a
* Poisoned TLP.
*
* Non-Posted Writes will also generate a UR in the completion status, which the
* RC should also see.
*/
/* ARGSUSED */
static int
{
return (PF_ERR_PANIC);
return (PF_ERR_MATCHED_RC);
return (PF_ERR_MATCHED_DEVICE);
return (PF_ERR_PANIC);
}
/* ARGSUSED */
static int
{
return (PF_ERR_NO_PANIC);
}
/* ARGSUSED */
static int
{
return (PF_ERR_PANIC);
}
/*
* If a PCIe device does not support AER, assume all AER statuses have been set,
* unless other registers do not indicate a certain error occuring.
*/
static void
{
return;
/* Check if the device received a PTLP */
if (!(status & PCI_STAT_PERROR))
aer_ue &= ~PCIE_AER_UCE_PTLP;
/* Check if the device signaled a CA */
if (!(status & PCI_STAT_S_TARG_AB))
aer_ue &= ~PCIE_AER_UCE_CA;
/* Check if the device sent a UR */
aer_ue &= ~PCIE_AER_UCE_UR;
/*
* Ignore ECRCs as it is optional and will manefest itself as
* another error like PTLP and MFP
*/
aer_ue &= ~PCIE_AER_UCE_ECRC;
/*
* Generally if NFE is set, SERR should also be set. Exception:
* When certain non-fatal errors are masked, and some of them
* happened to be the cause of the NFE, SERR will not be set and
* they can not be the source of this interrupt.
*
* On x86, URs are masked (NFE + UR can be set), if any other
* non-fatal errors (i.e, PTLP, CTO, CA, UC, ECRC, ACS) did
* occur, SERR should be set since they are not masked. So if
* SERR is not set, none of them occurred.
*/
if (!(status & PCI_STAT_S_SYSERR))
aer_ue &= ~PCIE_AER_UCE_TO;
}
aer_ue &= ~PCIE_AER_UCE_SD;
}
}
static void
{
return;
/* Check if the device received a UC_DATA */
if (!(status & PCI_STAT_PERROR))
if (!(status & (PCI_STAT_R_MAST_AB))) {
}
if (!(status & (PCI_STAT_R_TARG_AB))) {
}
}
}
/* Find the PCIe-PCI bridge based on secondary bus number */
static pf_data_t *
{
/* Search down for the PCIe-PCI device. */
return (bdg_pfd_p);
}
return (NULL);
}
/* Find the PCIe-PCI bridge of a PCI device */
static pf_data_t *
{
/* This only makes sense if the device is a PCI device */
return (NULL);
/*
* Search up for the PCIe-PCI device. Watchout for x86 where pci
* devices hang directly off of NPE.
*/
return (PCIE_DIP2PFD(dip));
}
return (NULL);
}
/*
* See if a leaf error was bubbled up to the Root Complex (RC) and handled.
* As of right now only RC's have enough information to have errors found in the
* fabric to be matched to the RC. Note that Root Port's (RP) do not carry
* enough information. Currently known RC's are SPARC Fire architecture and
* it's equivalents, and x86's NPE.
* SPARC Fire architectures have a plethora of error registers, while currently
* NPE only have the address of a failed load.
*
* Check if either the fault address found in the rc matches the device's
* assigned address range (PIO's only) or the fault BDF in the rc matches the
*/
static boolean_t
{
/* Only root complex's have enough information to match */
continue;
/* If device and rc abort type does not match continue */
continue;
/* The Fault BDF = Device's BDF */
return (B_TRUE);
/* The Fault Addr is in device's address range */
if (pf_in_addr_range(bus_p,
return (B_TRUE);
/* The Fault BDF is from PCIe-PCI Bridge's secondary bus */
if (PCIE_IS_PCIE_BDG(bus_p) &&
return (B_TRUE);
}
return (B_FALSE);
}
/*
* it is a DMA, otherwise it's a PIO
*/
static void
pcie_req_id_t *bdf) {
/* Could be DMA or PIO. Find out by look at error type. */
case PCIE_AER_SUCE_TA_ON_SC:
case PCIE_AER_SUCE_MA_ON_SC:
return;
case PCIE_AER_SUCE_RCVD_TA:
case PCIE_AER_SUCE_RCVD_MA:
*bdf = PCIE_INVALID_BDF;
return;
case PCIE_AER_SUCE_USC_ERR:
break;
default:
*addr = 0;
*bdf = PCIE_INVALID_BDF;
*trans_type = 0;
return;
}
*bdf = PCIE_INVALID_BDF;
if (PFD_IS_ROOT(rc_pfd_p) &&
PCI_STAT_PERROR)) {
return;
}
}
}
/*
* pf_pci_decode function decodes the secondary aer transaction logs in
* PCIe-PCI bridges.
*
* The log is 128 bits long and arranged in this manner.
* [0:35] Transaction Attribute (s_aer_h0-saer_h1)
* [36:39] Transaction lower command (saer_h1)
* [40:43] Transaction upper command (saer_h1)
* [44:63] Reserved
* [64:127] Address (saer_h2-saer_h3)
*/
/* ARGSUSED */
int
switch (*cmd) {
case PCI_PCIX_CMD_IORD:
case PCI_PCIX_CMD_IOWR:
/* IO Access should always be down stream */
break;
case PCI_PCIX_CMD_MEMRD_DW:
case PCI_PCIX_CMD_MEMRD_BL:
case PCI_PCIX_CMD_MEMRDBL:
case PCI_PCIX_CMD_MEMWR:
case PCI_PCIX_CMD_MEMWR_BL:
case PCI_PCIX_CMD_MEMWRBL:
break;
case PCI_PCIX_CMD_CFRD:
case PCI_PCIX_CMD_CFWR:
/*
* CFG Access should always be down stream. Match the BDF in
* the address phase.
*/
addr = 0;
break;
case PCI_PCIX_CMD_SPL:
/*
* Check for DMA read completions. The requesting BDF is in the
* Address phase.
*/
addr = 0;
break;
case PCI_PCIX_CMD_DADR:
/*
* For Dual Address Cycles the transaction command is in the 2nd
* address phase.
*/
if (*cmd != PCI_PCIX_CMD_DADR)
goto cmd_switch;
/* FALLTHROUGH */
default:
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND.
*/
int
{
/* If we don't know the addr or rid just return with NOTFOUND */
return (PF_HDL_NOTFOUND);
/*
* Disable DMA handle lookup until DMA errors can be handled and
* reported synchronously. When enabled again, check for the
* PF_ADDR_DMA flag
*/
return (PF_HDL_NOTFOUND);
}
}
static int
{
return (status);
}
if (!i_ddi_fm_handler_owned(dip)) {
/*
* pf_handler_enter always returns SUCCESS if the 'impl' arg is
* NULL.
*/
}
/* Check if dip and BDF match, if not recurse to it's children. */
else
if (fcp)
else
if (fcp)
}
/* If we found the handler or know it's this device, we're done */
(status == PF_HDL_FOUND)))
goto done;
/*
* If the current devuce us a PCIe-PCI bridge need to check for special
* cases:
*
* If it is a PIO and we don't have an address or this is a DMA, check
* to see if the BDF = secondary bus. If so stop. The BDF isn't a real
* BDF and the fault device could have come from any device in the PCI
* bus.
*/
if (PCIE_IS_PCIE_BDG(bus_p) &&
goto done;
/* If we can't find the handler check it's children */
continue;
bdf);
if (status == PF_HDL_FOUND)
goto done;
}
done:
return (status);
}
static int
int found = 0;
int status;
/*
* Compare captured error state with handle
* resources. During the comparison and
* subsequent error handling, we block
* attempts to free the cache entry.
*/
fep->fce_resource) :
fep->fce_resource);
continue;
if (status == DDI_FM_NONFATAL) {
found++;
/* Set the error for this resource handle */
if (flag == ACC_HANDLE) {
} else {
}
}
}
/*
* If a handler isn't found and we know this is the right device mark
* them all failed.
*/
if (status == PF_HDL_FOUND)
found++;
}
}
/*
* Automatically decode AER header logs and does a handling look up based on the
* AER header decoding.
*
* in the pfd (PCIe Fault Data) arg.
*
* Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND.
*/
/* ARGSUSED */
static int
{
/*
* Disabling this function temporarily until errors can be handled
* synchronously.
*
* This function is currently only called during the middle of a fabric
* scan. If the fabric scan is called synchronously with an error seen
* PF_ERR_MATCHED_RC error severity. pf_log_hdl_lookup code will be by
* passed when the severity is PF_ERR_MATCHED_RC. Handle lookup would
* manner. Errors unrelated should panic, because they are being
* handled asynchronously.
*
* will return PF_HDL_NOTFOUND to ensure that the system panics.
*/
return (PF_HDL_NOTFOUND);
}
/*
* Decodes the TLP and returns the BDF of the handler, address and transaction
* type if known.
*
* Types of TLP logs seen in RC, and what to extract:
*
* Memory(DMA) - Requester BDF, address, PF_DMA_ADDR
* Memory(PIO) - address, PF_PIO_ADDR
* CFG - Should not occur and result in UR
* Completion(DMA) - Requester BDF, PF_DMA_ADDR
* Completion(PIO) - Requester BDF, PF_PIO_ADDR
*
*
* Memory(DMA) - Requester BDF, address, PF_DMA_ADDR
* Memory(PIO) - address, PF_PIO_ADDR
* CFG - Destined BDF, address, PF_CFG_ADDR
* Completion(DMA) - Requester BDF, PF_DMA_ADDR
* Completion(PIO) - Requester BDF, PF_PIO_ADDR
*
* The adv_reg_p must be passed in separately for use with SPARC RPs. A
* SPARC RP could have multiple AER header logs which cannot be directly
* accessed via the bus_p.
*/
int
adv_reg_p->pcie_ue_tgt_addr = 0;
adv_reg_p->pcie_ue_tgt_trans = 0;
case PCIE_TLP_TYPE_IO:
case PCIE_TLP_TYPE_MEM:
case PCIE_TLP_TYPE_MEMLK:
/* Grab the 32/64bit fault address */
} else {
}
/*
* If the req bdf >= this.bdf, then it means the request is this
* device or came from a device below it. Unless this device is
* a PCIe root port then it means is a DMA, otherwise PIO.
*/
} else if (PCIE_IS_ROOT(bus_p) &&
} else {
}
break;
case PCIE_TLP_TYPE_CFG0:
case PCIE_TLP_TYPE_CFG1:
flt_addr = 0;
break;
case PCIE_TLP_TYPE_CPL:
case PCIE_TLP_TYPE_CPLLK:
{
/*
* If the cpl bdf < this.bdf, then it means the request is this
* device or came from a device below it. Unless this device is
* a PCIe root port then it means is a DMA, otherwise PIO.
*/
} else {
}
break;
}
default:
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
return (DDI_FAILURE);
}
/*
*/
if (dip == ddi_root_node()) {
device_path[0] = '/';
} else {
}
if (ena == 0)
return (DDI_SUCCESS);
}
/* ARGSUSED */
static void
{
}
static void
{
/*
* Ereports need to be sent in a top down fashion. The fabric translator
* expects the ereports from the Root first. This is needed to tell if
*/
continue;
continue;
"scan_bdf", DATA_TYPE_UINT16,
"scan_addr", DATA_TYPE_UINT64,
"intr_src", DATA_TYPE_UINT16,
NULL);
goto generic;
}
/* Generic PCI device information */
"device_id", DATA_TYPE_UINT16,
"vendor_id", DATA_TYPE_UINT16,
NULL);
/* PCI registers */
"pci_status", DATA_TYPE_UINT16,
"pci_command", DATA_TYPE_UINT16,
NULL);
/* PCI bridge registers */
if (PCIE_IS_BDG(bus_p)) {
"pci_bdg_sec_status", DATA_TYPE_UINT16,
"pci_bdg_ctrl", DATA_TYPE_UINT16,
NULL);
}
/* PCIx registers */
"pcix_status", DATA_TYPE_UINT32,
"pcix_command", DATA_TYPE_UINT16,
NULL);
}
/* PCIx ECC Registers */
if (PCIX_ECC_VERSION_CHECK(bus_p)) {
if (PCIE_IS_BDG(bus_p))
"pcix_ecc_control_0", DATA_TYPE_UINT16,
PCIE_IS_BDG(bus_p) ?
"pcix_ecc_status_0", DATA_TYPE_UINT16,
PCIE_IS_BDG(bus_p) ?
"pcix_ecc_fst_addr_0", DATA_TYPE_UINT32,
PCIE_IS_BDG(bus_p) ?
"pcix_ecc_sec_addr_0", DATA_TYPE_UINT32,
PCIE_IS_BDG(bus_p) ?
"pcix_ecc_attr_0", DATA_TYPE_UINT32,
PCIE_IS_BDG(bus_p) ?
NULL);
}
/* PCIx ECC Bridge Registers */
"pcix_ecc_control_1", DATA_TYPE_UINT16,
"pcix_ecc_status_1", DATA_TYPE_UINT16,
"pcix_ecc_fst_addr_1", DATA_TYPE_UINT32,
"pcix_ecc_sec_addr_1", DATA_TYPE_UINT32,
"pcix_ecc_attr_1", DATA_TYPE_UINT32,
NULL);
}
/* PCIx Bridge */
"pcix_bdg_status", DATA_TYPE_UINT32,
"pcix_bdg_sec_status", DATA_TYPE_UINT16,
NULL);
}
/* PCIe registers */
if (PCIE_IS_PCIE(bus_p)) {
"pcie_status", DATA_TYPE_UINT16,
"pcie_command", DATA_TYPE_UINT16,
"pcie_dev_cap", DATA_TYPE_UINT32,
NULL);
}
/* PCIe AER registers */
if (PCIE_HAS_AER(bus_p)) {
"pcie_adv_ctl", DATA_TYPE_UINT32,
"pcie_ue_status", DATA_TYPE_UINT32,
"pcie_ue_mask", DATA_TYPE_UINT32,
"pcie_ue_sev", DATA_TYPE_UINT32,
"pcie_ue_hdr0", DATA_TYPE_UINT32,
"pcie_ue_hdr1", DATA_TYPE_UINT32,
"pcie_ue_hdr2", DATA_TYPE_UINT32,
"pcie_ue_hdr3", DATA_TYPE_UINT32,
"pcie_ce_status", DATA_TYPE_UINT32,
"pcie_ce_mask", DATA_TYPE_UINT32,
NULL);
}
/* PCIe AER decoded header */
"pcie_ue_tgt_trans", DATA_TYPE_UINT32,
"pcie_ue_tgt_addr", DATA_TYPE_UINT64,
"pcie_ue_tgt_bdf", DATA_TYPE_UINT16,
NULL);
/* Clear these values as they no longer valid */
}
/* PCIe BDG AER registers */
"pcie_sue_adv_ctl", DATA_TYPE_UINT32,
"pcie_sue_status", DATA_TYPE_UINT32,
"pcie_sue_mask", DATA_TYPE_UINT32,
"pcie_sue_sev", DATA_TYPE_UINT32,
"pcie_sue_hdr0", DATA_TYPE_UINT32,
"pcie_sue_hdr1", DATA_TYPE_UINT32,
"pcie_sue_hdr2", DATA_TYPE_UINT32,
"pcie_sue_hdr3", DATA_TYPE_UINT32,
NULL);
}
/* PCIe BDG AER decoded header */
"pcie_sue_tgt_trans", DATA_TYPE_UINT32,
"pcie_sue_tgt_addr", DATA_TYPE_UINT64,
"pcie_sue_tgt_bdf", DATA_TYPE_UINT16,
NULL);
/* Clear these values as they no longer valid */
}
/* PCIe RP registers */
if (PCIE_IS_RP(bus_p)) {
"pcie_rp_status", DATA_TYPE_UINT32,
"pcie_rp_control", DATA_TYPE_UINT16,
NULL);
}
/* PCIe RP AER registers */
"pcie_adv_rp_status", DATA_TYPE_UINT32,
"pcie_adv_rp_command", DATA_TYPE_UINT32,
"pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16,
"pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16,
NULL);
}
/* IOV related information */
"pcie_aff_flags", DATA_TYPE_UINT16,
"pcie_aff_bdf", DATA_TYPE_UINT16,
"orig_sev", DATA_TYPE_UINT32,
NULL);
}
/* Misc ereport information */
NULL);
&eqep);
}
/* Unlock all the devices in the queue */
}
}
}
/*
* pf_handler_enter must be called to serial access to each device's pf_data_t.
* Once error handling is finished with the device call pf_handler_exit to allow
* other threads to access it. The same thread may call pf_handler_enter
* several times without any consequences.
*
* The "impl" variable is passed in during scan fabric to double check that
* there is not a recursive algorithm and to ensure only one thread is doing a
* fabric scan at all times.
*
* In some cases "impl" is not available, such as "child lookup" being called
* from outside of scan fabric, just pass in NULL for this variable and this
* extra check will be skipped.
*/
static int
{
/*
* Check to see if the lock has already been taken by this
* thread. If so just return and don't take lock again.
*/
return (PF_SCAN_SUCCESS);
}
/* Check to see that this dip is already in the "impl" error queue */
return (PF_SCAN_SUCCESS);
}
}
return (PF_SCAN_DEADLOCK);
}
static void
{
}
/*
* This function calls the driver's callback function (if it's FMA hardened
* and callback capable). This function relies on the current thread already
* owning the driver's fmhdl lock.
*/
static int
{
break;
}
}
}
return (cb_sts);
}
static void
{
pfd_p->pe_severity_flags = 0;
pfd_p->pe_orig_severity_flags = 0;
/* pe_lock and pe_valid were reset in pf_send_ereport */
if (PCIE_IS_ROOT(bus_p)) {
}
if (PCIE_IS_BDG(bus_p)) {
}
if (PCIE_IS_PCIE(bus_p)) {
if (PCIE_IS_ROOT(bus_p)) {
sizeof (pf_pcie_rp_err_regs_t));
sizeof (pf_pcie_adv_rp_err_regs_t));
} else if (PCIE_IS_PCIE_BDG(bus_p)) {
sizeof (pf_pcie_adv_bdg_err_regs_t));
}
if (PCIX_ECC_VERSION_CHECK(bus_p)) {
sizeof (pf_pcix_ecc_regs_t));
sizeof (pf_pcix_ecc_regs_t));
}
}
PCIE_ADV_HDR(pfd_p, 0) = 0;
} else if (PCIE_IS_PCIX(bus_p)) {
if (PCIE_IS_BDG(bus_p)) {
if (PCIX_ECC_VERSION_CHECK(bus_p)) {
sizeof (pf_pcix_ecc_regs_t));
sizeof (pf_pcix_ecc_regs_t));
}
} else {
if (PCIX_ECC_VERSION_CHECK(bus_p)) {
sizeof (pf_pcix_ecc_regs_t));
}
}
}
}
{
return (temp_bus_p);
}
}
return (NULL);
}
{
return (temp_bus_p);
}
}
return (NULL);
}
{
} else if (trans_type == PF_ADDR_PIO) {
} else {
/* PF_ADDR_DMA type */
}
return (temp_bus_p);
}
{
} else if (PCIE_CHECK_VALID_BDF(bdf)) {
}
return (temp_bus_p);
}