px_err.c revision d0f40dc6a997c84bacf5f9ba83d57a95495c399b
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* sun4v Fire Error Handling
*/
#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/fm/protocol.h>
#include <sys/fm/util.h>
#include <sys/membar.h>
#include "px_obj.h"
#include "px_err.h"
static void px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p,
px_rc_err_t *epkt);
static uint_t px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt);
static int px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr,
px_rc_err_t *epkt, pf_data_t *pfd_p);
static void px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt,
boolean_t is_block_pci, char *msg);
static void px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt,
boolean_t is_block_pci, int err, ddi_fm_error_t *derr,
boolean_t is_valid_epkt);
static int px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
px_rc_err_t *epkt, pf_data_t *pfd_p);
static int px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
px_rc_err_t *epkt, pf_data_t *pfd_p);
static int px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
px_rc_err_t *epkt, pf_data_t *pfd_p);
static int px_port_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
px_rc_err_t *epkt, pf_data_t *pfd_p);
static int px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
px_rc_err_t *epkt, pf_data_t *pfd_p);
static int px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr,
px_rc_err_t *epkt, pf_data_t *pfd_p);
static int px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr,
px_rc_err_t *epkt, pf_data_t *pfd_p);
static void px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr,
px_rc_err_t *epkt);
static int px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr,
px_rc_err_t *epkt);
/* Include the code generated sun4v epkt checking code */
#include "px_err_gen.c"
/*
* This variable indicates if we have a hypervisor that could potentially send
* incorrect epkts. We always set this to TRUE for now until we find a way to
* tell if this HV bug has been fixed.
*/
boolean_t px_legacy_epkt = B_TRUE;
/*
* px_err_cb_intr:
* Interrupt handler for the Host Bus Block.
*/
uint_t
px_err_cb_intr(caddr_t arg)
{
px_fault_t *fault_p = (px_fault_t *)arg;
px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload;
if (epkt != NULL) {
return (px_err_intr(fault_p, epkt));
}
return (DDI_INTR_UNCLAIMED);
}
/*
* px_err_dmc_pec_intr:
* Interrupt handler for the DMC/PEC block.
*/
uint_t
px_err_dmc_pec_intr(caddr_t arg)
{
px_fault_t *fault_p = (px_fault_t *)arg;
px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload;
if (epkt != NULL) {
return (px_err_intr(fault_p, epkt));
}
return (DDI_INTR_UNCLAIMED);
}
/*
* px_err_cmn_intr:
* Common function called by trap, mondo and fabric intr.
* This function is more meaningful in sun4u implementation. Kept
* to mirror sun4u call stack.
* o check for safe access
* o create and queue RC info for later use in fabric scan.
* o RUC/WUC, PTLP, MMU Errors(CA), UR
*
* @param px_p leaf in which to check access
* @param derr fm err data structure to be updated
* @param caller PX_TRAP_CALL | PX_INTR_CALL
* @param chkjbc whether to handle hostbus registers (ignored)
* @return err PX_NO_PANIC | PX_PROTECTED |
* PX_PANIC | PX_HW_RESET | PX_EXPECTED
*/
/* ARGSUSED */
int
px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block)
{
px_err_safeacc_check(px_p, derr);
return (PX_NO_ERROR);
}
/*
* fills RC specific fault data
*/
static void
px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p, px_rc_err_t *epkt) {
pf_pcie_adv_err_regs_t adv_reg;
pcie_req_id_t fault_bdf = PCIE_INVALID_BDF;
uint64_t fault_addr = 0;
uint16_t s_status = 0;
px_pec_err_t *pec_p;
uint32_t dir;
/* Add an PCIE PF_DATA Entry */
switch (epkt->rc_descr.block) {
case BLOCK_MMU:
/* Only PIO Fault Addresses are valid, this is DMA */
s_status = PCI_STAT_S_TARG_AB;
fault_addr = NULL;
if (epkt->rc_descr.H) {
fault_bdf = (pcie_req_id_t)(epkt->hdr[0] >> 16);
PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags =
PF_AFFECTED_BDF;
PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf =
fault_bdf;
}
break;
case BLOCK_PCIE:
pec_p = (px_pec_err_t *)epkt;
dir = pec_p->pec_descr.dir;
/* translate RC UR/CA to legacy secondary errors */
if ((dir == DIR_READ || dir == DIR_WRITE) &&
pec_p->pec_descr.U) {
if (pec_p->ue_reg_status & PCIE_AER_UCE_UR)
s_status |= PCI_STAT_R_MAST_AB;
if (pec_p->ue_reg_status & PCIE_AER_UCE_CA)
s_status |= PCI_STAT_R_TARG_AB;
}
if (pec_p->ue_reg_status & PCIE_AER_UCE_PTLP)
s_status |= PCI_STAT_PERROR;
if (pec_p->ue_reg_status & PCIE_AER_UCE_CA)
s_status |= PCI_STAT_S_TARG_AB;
if (pec_p->pec_descr.H) {
adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0] >>32);
adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0]);
adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1] >>32);
adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1]);
if (pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg) ==
DDI_SUCCESS) {
fault_bdf = adv_reg.pcie_ue_tgt_bdf;
fault_addr = adv_reg.pcie_ue_tgt_addr;
/*
* affected BDF is to be filled in by
* px_scan_fabric
*/
}
}
break;
case BLOCK_HOSTBUS:
case BLOCK_INTR:
case BLOCK_PORT:
/*
* If the affected device information is available then we
* add the affected_bdf to the pfd, so the affected device
* will be scanned and added to the error q. This will then
* go through the pciev_eh code path and forgive the error
* as needed.
*/
if (PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags ==
PF_AFFECTED_BDF)
fault_bdf = PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf;
break;
default:
break;
}
PCIE_ROOT_FAULT(pfd_p)->scan_bdf = fault_bdf;
PCIE_ROOT_FAULT(pfd_p)->scan_addr = (uint64_t)fault_addr;
PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status;
}
/*
* Convert error severity from PX internal values to PCIe Fabric values. Most
* are self explanitory, except PX_PROTECTED. PX_PROTECTED will never be
* returned as is if forgivable.
*/
static int
px_err_to_fab_sev(int *rc_err) {
int fab_err = 0;
if (*rc_err & px_die) {
/*
* Let fabric scan decide the final severity of the error.
* This is needed incase IOV code needs to forgive the error.
*/
*rc_err = PX_FABRIC_SCAN;
fab_err |= PF_ERR_PANIC;
}
if (*rc_err & (PX_EXPECTED | PX_NO_PANIC))
fab_err |= PF_ERR_NO_PANIC;
if (*rc_err & PX_NO_ERROR)
fab_err |= PF_ERR_NO_ERROR;
return (fab_err);
}
/*
* px_err_intr:
* Interrupt handler for the JBC/DMC/PEC block.
* o lock
* o create derr
* o check safe access
* o px_err_check_severity(epkt)
* o pcie_scan_fabric
* o Idle intr state
* o unlock
* o handle error: fatal? fm_panic() : return INTR_CLAIMED)
*/
static uint_t
px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt)
{
px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip);
dev_info_t *rpdip = px_p->px_dip;
int rc_err, tmp_rc_err, fab_err, msg;
ddi_fm_error_t derr;
pf_data_t *pfd_p;
if (px_fm_enter(px_p) != DDI_SUCCESS)
goto done;
pfd_p = px_get_pfd(px_p);
PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_INTERNAL;
PCIE_ROOT_EH_SRC(pfd_p)->intr_data = epkt;
/* Create the derr */
bzero(&derr, sizeof (ddi_fm_error_t));
derr.fme_version = DDI_FME_VERSION;
derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1);
derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
/* Basically check for safe access */
(void) px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_ALL);
/* Check the severity of this error */
rc_err = px_err_epkt_severity(px_p, &derr, epkt, pfd_p);
/* Pass the 'rc_err' severity to the fabric scan code. */
tmp_rc_err = rc_err;
pfd_p->pe_severity_flags = px_err_to_fab_sev(&rc_err);
/* Scan the fabric */
if (!(fab_err = px_scan_fabric(px_p, rpdip, &derr))) {
/*
* Fabric scan didn't occur because of some error condition
* such as Root Port being in drain state, so reset rc_err.
*/
rc_err = tmp_rc_err;
}
/* Set the intr state to idle for the leaf that received the mondo */
if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino,
INTR_IDLE_STATE) != DDI_SUCCESS) {
px_fm_exit(px_p);
return (DDI_INTR_UNCLAIMED);
}
switch (epkt->rc_descr.block) {
case BLOCK_MMU: /* FALLTHROUGH */
case BLOCK_INTR:
msg = PX_RC;
break;
case BLOCK_PCIE:
msg = PX_RP;
break;
case BLOCK_HOSTBUS: /* FALLTHROUGH */
default:
msg = PX_HB;
break;
}
px_err_panic(rc_err, msg, fab_err, B_TRUE);
px_fm_exit(px_p);
px_err_panic(rc_err, msg, fab_err, B_FALSE);
done:
return (DDI_INTR_CLAIMED);
}
/*
* px_err_epkt_severity:
* Check the severity of the fire error based the epkt received
*
* @param px_p leaf in which to take the snap shot.
* @param derr fm err in which the ereport is to be based on
* @param epkt epkt recevied from HV
*/
static int
px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt,
pf_data_t *pfd_p)
{
px_pec_t *pec_p = px_p->px_pec_p;
dev_info_t *dip = px_p->px_dip;
boolean_t is_safeacc = B_FALSE;
boolean_t is_block_pci = B_FALSE;
boolean_t is_valid_epkt = B_FALSE;
int err = 0;
/* Cautious access error handling */
switch (derr->fme_flag) {
case DDI_FM_ERR_EXPECTED:
/*
* For ddi_caut_put treat all events as nonfatal. Here
* we have the handle and can call ndi_fm_acc_err_set().
*/
derr->fme_status = DDI_FM_NONFATAL;
ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr);
is_safeacc = B_TRUE;
break;
case DDI_FM_ERR_PEEK:
case DDI_FM_ERR_POKE:
/*
* For ddi_peek/poke treat all events as nonfatal.
*/
is_safeacc = B_TRUE;
break;
default:
is_safeacc = B_FALSE;
}
/*
* Older hypervisors in some cases send epkts with incorrect fields.
* We have to handle these "special" epkts correctly.
*/
if (px_legacy_epkt)
px_fix_legacy_epkt(dip, derr, epkt);
/*
* The affected device by default is set to 'SELF'. The 'block'
* specific error handling below will update this as needed.
*/
PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF;
switch (epkt->rc_descr.block) {
case BLOCK_HOSTBUS:
err = px_cb_epkt_severity(dip, derr, epkt, pfd_p);
break;
case BLOCK_MMU:
err = px_mmu_epkt_severity(dip, derr, epkt, pfd_p);
break;
case BLOCK_INTR:
err = px_intr_epkt_severity(dip, derr, epkt, pfd_p);
break;
case BLOCK_PORT:
err = px_port_epkt_severity(dip, derr, epkt, pfd_p);
break;
case BLOCK_PCIE:
is_block_pci = B_TRUE;
err = px_pcie_epkt_severity(dip, derr, epkt, pfd_p);
break;
default:
err = 0;
}
px_err_fill_pfd(dip, pfd_p, epkt);
if ((err & PX_HW_RESET) || (err & PX_PANIC)) {
if (px_log & PX_PANIC)
px_err_log_handle(dip, epkt, is_block_pci, "PANIC");
is_valid_epkt = B_TRUE;
} else if (err & PX_PROTECTED) {
if (px_log & PX_PROTECTED)
px_err_log_handle(dip, epkt, is_block_pci, "PROTECTED");
is_valid_epkt = B_TRUE;
} else if (err & PX_NO_PANIC) {
if (px_log & PX_NO_PANIC)
px_err_log_handle(dip, epkt, is_block_pci, "NO PANIC");
is_valid_epkt = B_TRUE;
} else if (err & PX_NO_ERROR) {
if (px_log & PX_NO_ERROR)
px_err_log_handle(dip, epkt, is_block_pci, "NO ERROR");
is_valid_epkt = B_TRUE;
} else if (err == 0) {
px_err_log_handle(dip, epkt, is_block_pci, "UNRECOGNIZED");
is_valid_epkt = B_FALSE;
/* Panic on a unrecognized epkt */
err = PX_PANIC;
}
px_err_send_epkt_erpt(dip, epkt, is_block_pci, err, derr,
is_valid_epkt);
/* Readjust the severity as a result of safe access */
if (is_safeacc && !(err & PX_PANIC) && !(px_die & PX_PROTECTED))
err = PX_NO_PANIC;
return (err);
}
static void
px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt,
boolean_t is_block_pci, int err, ddi_fm_error_t *derr,
boolean_t is_valid_epkt)
{
char buf[FM_MAX_CLASS], descr_buf[1024];
/* send ereport for debug purposes */
(void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_RC_UNRECOG);
if (is_block_pci) {
px_pec_err_t *pec = (px_pec_err_t *)epkt;
(void) snprintf(descr_buf, sizeof (descr_buf),
"%s Epkt contents:\n"
"Block: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d\n"
"I=%d, H=%d, C=%d, U=%d, E=%d, P=%d\n"
"PCI Err Status: 0x%x, PCIe Err Status: 0x%x\n"
"CE Status Reg: 0x%x, UE Status Reg: 0x%x\n"
"HDR1: 0x%lx, HDR2: 0x%lx\n"
"Err Src Reg: 0x%x, Root Err Status: 0x%x\n"
"Err Severity: 0x%x\n",
is_valid_epkt ? "Valid" : "Invalid",
pec->pec_descr.block, pec->pec_descr.dir,
pec->pec_descr.Z, pec->pec_descr.S,
pec->pec_descr.R, pec->pec_descr.I,
pec->pec_descr.H, pec->pec_descr.C,
pec->pec_descr.U, pec->pec_descr.E,
pec->pec_descr.P, pec->pci_err_status,
pec->pcie_err_status, pec->ce_reg_status,
pec->ue_reg_status, pec->hdr[0],
pec->hdr[1], pec->err_src_reg,
pec->root_err_status, err);
ddi_fm_ereport_post(dip, buf, derr->fme_ena,
DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
EPKT_SYSINO, DATA_TYPE_UINT64,
is_valid_epkt ? pec->sysino : 0,
EPKT_EHDL, DATA_TYPE_UINT64,
is_valid_epkt ? pec->ehdl : 0,
EPKT_STICK, DATA_TYPE_UINT64,
is_valid_epkt ? pec->stick : 0,
EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)pec)[3],
EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)pec)[4],
EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)pec)[5],
EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)pec)[6],
EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)pec)[7],
EPKT_PEC_DESCR, DATA_TYPE_STRING, descr_buf);
} else {
(void) snprintf(descr_buf, sizeof (descr_buf),
"%s Epkt contents:\n"
"Block: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n"
"Dir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d\n"
"M=%d, S=%d, Size: 0x%x, Addr: 0x%lx\n"
"Hdr1: 0x%lx, Hdr2: 0x%lx, Res: 0x%lx\n"
"Err Severity: 0x%x\n",
is_valid_epkt ? "Valid" : "Invalid",
epkt->rc_descr.block, epkt->rc_descr.op,
epkt->rc_descr.phase, epkt->rc_descr.cond,
epkt->rc_descr.dir, epkt->rc_descr.STOP,
epkt->rc_descr.H, epkt->rc_descr.R,
epkt->rc_descr.D, epkt->rc_descr.M,
epkt->rc_descr.S, epkt->size, epkt->addr,
epkt->hdr[0], epkt->hdr[1], epkt->reserved,
err);
ddi_fm_ereport_post(dip, buf, derr->fme_ena,
DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
EPKT_SYSINO, DATA_TYPE_UINT64,
is_valid_epkt ? epkt->sysino : 0,
EPKT_EHDL, DATA_TYPE_UINT64,
is_valid_epkt ? epkt->ehdl : 0,
EPKT_STICK, DATA_TYPE_UINT64,
is_valid_epkt ? epkt->stick : 0,
EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)epkt)[3],
EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)epkt)[4],
EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)epkt)[5],
EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)epkt)[6],
EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)epkt)[7],
EPKT_RC_DESCR, DATA_TYPE_STRING, descr_buf);
}
}
static void
px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, boolean_t is_block_pci,
char *msg)
{
if (is_block_pci) {
px_pec_err_t *pec = (px_pec_err_t *)epkt;
DBG(DBG_ERR_INTR, dip,
"A PCIe root port error has occured with a severity"
" \"%s\"\n"
"\tBlock: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d, I=%d\n"
"\tH=%d, C=%d, U=%d, E=%d, P=%d\n"
"\tpci_err: 0x%x, pcie_err=0x%x, ce_reg: 0x%x\n"
"\tue_reg: 0x%x, Hdr1: 0x%p, Hdr2: 0x%p\n"
"\terr_src: 0x%x, root_err: 0x%x\n",
msg, pec->pec_descr.block, pec->pec_descr.dir,
pec->pec_descr.Z, pec->pec_descr.S, pec->pec_descr.R,
pec->pec_descr.I, pec->pec_descr.H, pec->pec_descr.C,
pec->pec_descr.U, pec->pec_descr.E, pec->pec_descr.P,
pec->pci_err_status, pec->pcie_err_status,
pec->ce_reg_status, pec->ue_reg_status, pec->hdr[0],
pec->hdr[1], pec->err_src_reg, pec->root_err_status);
} else {
DBG(DBG_ERR_INTR, dip,
"A PCIe root complex error has occured with a severity"
" \"%s\"\n"
"\tBlock: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n"
"\tDir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d, M=%d\n"
"\tS=%d, Size: 0x%x, Addr: 0x%p\n"
"\tHdr1: 0x%p, Hdr2: 0x%p, Res: 0x%p\n",
msg, epkt->rc_descr.block, epkt->rc_descr.op,
epkt->rc_descr.phase, epkt->rc_descr.cond,
epkt->rc_descr.dir, epkt->rc_descr.STOP, epkt->rc_descr.H,
epkt->rc_descr.R, epkt->rc_descr.D, epkt->rc_descr.M,
epkt->rc_descr.S, epkt->size, epkt->addr, epkt->hdr[0],
epkt->hdr[1], epkt->reserved);
}
}
/* ARGSUSED */
static void
px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt)
{
/*
* We don't have a default case for any of the below switch statements
* since we are ok with the code falling through.
*/
switch (epkt->rc_descr.block) {
case BLOCK_HOSTBUS:
switch (epkt->rc_descr.op) {
case OP_DMA:
switch (epkt->rc_descr.phase) {
case PH_UNKNOWN:
switch (epkt->rc_descr.cond) {
case CND_UNKNOWN:
switch (epkt->rc_descr.dir) {
case DIR_RESERVED:
epkt->rc_descr.dir = DIR_READ;
break;
} /* DIR */
} /* CND */
} /* PH */
} /* OP */
break;
case BLOCK_MMU:
switch (epkt->rc_descr.op) {
case OP_XLAT:
switch (epkt->rc_descr.phase) {
case PH_DATA:
switch (epkt->rc_descr.cond) {
case CND_PROT:
switch (epkt->rc_descr.dir) {
case DIR_UNKNOWN:
epkt->rc_descr.dir = DIR_WRITE;
break;
} /* DIR */
} /* CND */
break;
case PH_IRR:
switch (epkt->rc_descr.cond) {
case CND_RESERVED:
switch (epkt->rc_descr.dir) {
case DIR_IRR:
epkt->rc_descr.phase = PH_ADDR;
epkt->rc_descr.cond = CND_IRR;
} /* DIR */
} /* CND */
} /* PH */
} /* OP */
break;
case BLOCK_INTR:
switch (epkt->rc_descr.op) {
case OP_MSIQ:
switch (epkt->rc_descr.phase) {
case PH_UNKNOWN:
switch (epkt->rc_descr.cond) {
case CND_ILL:
switch (epkt->rc_descr.dir) {
case DIR_RESERVED:
epkt->rc_descr.dir = DIR_IRR;
break;
} /* DIR */
break;
case CND_IRR:
switch (epkt->rc_descr.dir) {
case DIR_IRR:
epkt->rc_descr.cond = CND_OV;
break;
} /* DIR */
} /* CND */
} /* PH */
break;
case OP_RESERVED:
switch (epkt->rc_descr.phase) {
case PH_UNKNOWN:
switch (epkt->rc_descr.cond) {
case CND_ILL:
switch (epkt->rc_descr.dir) {
case DIR_IRR:
epkt->rc_descr.op = OP_MSI32;
epkt->rc_descr.phase = PH_DATA;
break;
} /* DIR */
} /* CND */
break;
case PH_DATA:
switch (epkt->rc_descr.cond) {
case CND_INT:
switch (epkt->rc_descr.dir) {
case DIR_UNKNOWN:
epkt->rc_descr.op = OP_MSI32;
break;
} /* DIR */
} /* CND */
} /* PH */
} /* OP */
} /* BLOCK */
}
/* ARGSUSED */
static int
px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt,
pf_data_t *pfd_p)
{
return (px_err_check_eq(dip));
}
/* ARGSUSED */
static int
px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt,
pf_data_t *pfd_p)
{
pf_pcie_adv_err_regs_t adv_reg;
uint16_t s_status;
int sts = PX_PANIC;
/*
* Check for failed non-posted writes, which are errors that are not
* defined in the PCIe spec. If not return panic.
*/
if (!((epkt->rc_descr.op == OP_PIO) &&
(epkt->rc_descr.phase == PH_IRR))) {
sts = (PX_PANIC);
goto done;
}
/*
* Gather the error logs, if they do not exist just return with no panic
* and let the fabric message take care of the error.
*/
if (!epkt->rc_descr.H) {
sts = (PX_NO_PANIC);
goto done;
}
adv_reg.pcie_ue_hdr[0] = (uint32_t)(epkt->hdr[0] >> 32);
adv_reg.pcie_ue_hdr[1] = (uint32_t)(epkt->hdr[0]);
adv_reg.pcie_ue_hdr[2] = (uint32_t)(epkt->hdr[1] >> 32);
adv_reg.pcie_ue_hdr[3] = (uint32_t)(epkt->hdr[1]);
sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
if (epkt->rc_descr.M)
adv_reg.pcie_ue_tgt_addr = epkt->addr;
if (!((sts == DDI_SUCCESS) || (epkt->rc_descr.M))) {
/* Let the fabric message take care of error */
sts = PX_NO_PANIC;
goto done;
}
/* See if the failed transaction belonged to a hardened driver */
if (pf_hdl_lookup(dip, derr->fme_ena,
adv_reg.pcie_ue_tgt_trans, adv_reg.pcie_ue_tgt_addr,
adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND)
sts = (PX_NO_PANIC);
else
sts = (PX_PANIC);
/* Add pfd to cause a fabric scan */
switch (epkt->rc_descr.cond) {
case CND_RCA:
s_status = PCI_STAT_R_TARG_AB;
break;
case CND_RUR:
s_status = PCI_STAT_R_MAST_AB;
break;
}
PCIE_ROOT_FAULT(pfd_p)->scan_bdf = adv_reg.pcie_ue_tgt_bdf;
PCIE_ROOT_FAULT(pfd_p)->scan_addr = adv_reg.pcie_ue_tgt_addr;
PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status;
PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_BDF;
PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = adv_reg.pcie_ue_tgt_bdf;
done:
return (sts);
}
/* ARGSUSED */
static int
px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt,
pf_data_t *pfd_p)
{
px_pec_err_t *pec_p = (px_pec_err_t *)epkt;
px_err_pcie_t *pcie = (px_err_pcie_t *)epkt;
pf_pcie_adv_err_regs_t adv_reg;
int sts;
uint32_t temp;
/*
* Check for failed PIO Read/Writes, which are errors that are not
* defined in the PCIe spec.
*/
temp = PCIE_AER_UCE_UR | PCIE_AER_UCE_CA;
if (((pec_p->pec_descr.dir == DIR_READ) ||
(pec_p->pec_descr.dir == DIR_WRITE)) &&
pec_p->pec_descr.U && (pec_p->ue_reg_status & temp)) {
adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0] >> 32);
adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0]);
adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1] >> 32);
adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1]);
sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
if (sts == DDI_SUCCESS &&
pf_hdl_lookup(dip, derr->fme_ena,
adv_reg.pcie_ue_tgt_trans,
adv_reg.pcie_ue_tgt_addr,
adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND)
return (PX_NO_PANIC);
else
return (PX_PANIC);
}
if (!pec_p->pec_descr.C)
pec_p->ce_reg_status = 0;
if (!pec_p->pec_descr.U)
pec_p->ue_reg_status = 0;
if (!pec_p->pec_descr.H)
pec_p->hdr[0] = 0;
if (!pec_p->pec_descr.I)
pec_p->hdr[1] = 0;
/*
* According to the PCIe spec, there is a first error pointer. If there
* are header logs recorded and there are more than one error, the log
* will belong to the error that the first error pointer points to.
*
* The regs.primary_ue expects a bit number, go through the ue register
* and find the first error that occured. Because the sun4v epkt spec
* does not define this value, the algorithm below gives the lower bit
* priority.
*/
temp = pcie->ue_reg;
if (temp) {
int x;
for (x = 0; !(temp & 0x1); x++) {
temp = temp >> 1;
}
pcie->primary_ue = 1 << x;
} else {
pcie->primary_ue = 0;
}
/* Sun4v doesn't log the TX hdr except for CTOs */
if (pcie->primary_ue == PCIE_AER_UCE_TO) {
pcie->tx_hdr1 = pcie->rx_hdr1;
pcie->tx_hdr2 = pcie->rx_hdr2;
pcie->tx_hdr3 = pcie->rx_hdr3;
pcie->tx_hdr4 = pcie->rx_hdr4;
pcie->rx_hdr1 = 0;
pcie->rx_hdr2 = 0;
pcie->rx_hdr3 = 0;
pcie->rx_hdr4 = 0;
} else {
pcie->tx_hdr1 = 0;
pcie->tx_hdr2 = 0;
pcie->tx_hdr3 = 0;
pcie->tx_hdr4 = 0;
}
return (px_err_check_pcie(dip, derr, pcie, PF_INTR_TYPE_INTERNAL));
}
static int
px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt)
{
uint64_t addr = (uint64_t)epkt->addr;
pcie_req_id_t bdf = PCIE_INVALID_BDF;
if (epkt->rc_descr.H) {
bdf = (uint32_t)((epkt->hdr[0] >> 16) && 0xFFFF);
}
return (pf_hdl_lookup(dip, derr->fme_ena, PF_ADDR_DMA, addr,
bdf));
}