fire.esc revision 261a51afbf7133d9f7c89f1388050677f56b7d1a
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#pragma dictionary "SUN4"
/*
* Eversholt rules for the Fire nexus driver
*/
#define SW_FIT 5000 /* No real fit rate, SW */
#define HV_FIT 5000 /* No real fit rate, SW */
#define SW_HV_MISMATCH_FIT 20000 /* No real fit rate, SW */
#define HB_FIT 400
#define HBUS_FIT 400
#define CPU_FIT 500
#define PCI_DEV_FIT 1000
#define PCIEX_DEV_FIT 1000
#define EBUS_FIT 1000
#define LINK_EVENTS_COUNT 10
#define LINK_EVENTS_TIME 1h
#define CE_EVENTS_COUNT 10
#define CE_EVENTS_TIME 1h
/*
* Macros for CE Fabric ereports
*/
#define PF_CE (1 << 1)
#define MATCH_CE (payloadprop("severity") == PF_CE)
/*
* Test for primary or secondary ereports
*/
#define IS_PRIMARY (payloadprop("primary"))
#define IS_SECONDARY (! payloadprop("primary"))
/*
* payload: imu-rds
*
* Extract the request id, the BDF, value in the IMU RDS log register
* Example:
* 0x7766554433221100
* ^^^^
*/
#define IMU_MATCH_BDF(b, d, f) \
( IS_PRIMARY && ((payloadprop("imu-rds") >> 32) & 0xFFFF) == \
((b << 8) | (d << 3) | f))
/***************
* JBC ereports
***************/
event ereport.io.fire.jbc.ape@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.bjc@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.ce_asyn@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.cpe@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.ebus_to@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.ebus_to@hostbridge{within(5s)};
event ereport.io.fire.jbc.icise@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.ijp@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.ill_acc@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.ill_acc_rd@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.ill_bmr@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.ill_bmw@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.jbe@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.jtceei@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.jtceer@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.jtceew@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.jte@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.jue@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.mb_pea@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.mb_per@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.mb_pew@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.pio_cpe@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.pio_dpe@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.pio_unmap@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.pio_unmap_rd@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.rd_dpe@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.ue_asyn@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.unsol_intr@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.unsol_rd@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.jbc.wr_dpe@hostbridge/pciexrc{within(5s)};
/***************
* DMC ereports
***************/
event ereport.io.fire.dmc.byp_err@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.byp_oor@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.cor_not_en@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.eq_not_en@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.eq_over@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.fatal_not_en@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.msi_mal_err@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.msi_not_en@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.msi_par_err@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.nonfatal_not_en@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.pmeack_not_en@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.pmpme_not_en@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.tbw_dme@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.tbw_dpe@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.tbw_err@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.tbw_ude@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.trn_err@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.trn_oor@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.ttc_cae@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.ttc_dpe@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.tte_inv@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.dmc.tte_prt@hostbridge/pciexrc{within(5s)};
/***************
* TLU Other Event ereports
***************/
event ereport.io.fire.pec.crs@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.edp@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.ehp@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.eip@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.emp@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.epe@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.ero@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.erp@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.eru@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.ihb_pe@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.iip@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.ldn@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.lin@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.lrs@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.lup@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.mrc@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.ruc@hostbridge/pciexrc{within(5s)};
event ereport.io.fire.pec.wuc@hostbridge/pciexrc{within(5s)};
/***************
* TLU Uncorrectable and Correctable ereports
***************/
event ereport.io.fire.pec.ur@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.uc@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.cto@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.rof@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.mfc@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.mfp@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.pois@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.fcp@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.dlp@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.te@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.re@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.bdp@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.btp@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.rnr@hostbridge/pciexrc {within(5s)};
event ereport.io.fire.pec.rto@hostbridge/pciexrc {within(5s)};
/***************
* Fire Fabric ereport
* -------------
* Whenever a leaf device sends an error message (fatal, non-fatal, or CE) to
* root complex, the nexus driver publishes this ereport to log the ereport.
***************/
event ereport.io.fire.fabric@pciexbus/pciexdev/pciexfn {within(1s)};
event ereport.io.fire.fabric@pcibus/pcidev/pcifn {within(1s)};
event error.io.fire.fabric-sib@hostbridge/pciexrc;
event error.io.fire.fabric@pciexbus/pciexdev/pciexfn;
event error.io.fire.fabric@pcibus/pcidev/pcifn;
/***************
* sun4v unknown error packets
* -------------
* In a sun4v system, if HV sends an epkt to the guest the following wrong
* things can happen.
* o HV sends malformed epkt
* o Guest has coding error and doesn't understand epkt
* o HV and Guest are out of sync.
***************/
event ereport.io.fire.epkt@hostbridge/pciexrc {within(5s)};
/******************************
* Generic Rules Begin Here *
******************************/
/***************
* Fire Asic HW error
* -------------
* Errors caused by parity or unexpected behaviors in the asic.
***************/
fru hostbridge/pciexrc;
asru hostbridge/pciexrc;
event fault.io.fire.asic@hostbridge/pciexrc,
FITrate=HB_FIT,
ASRU=hostbridge/pciexrc,
FRU=hostbridge/pciexrc;
event error.io.fire.jbc.asic@hostbridge/pciexrc;
event error.io.fire.dmc.asic@hostbridge/pciexrc;
event error.io.fire.pec.asic@hostbridge/pciexrc;
prop fault.io.fire.asic@hostbridge/pciexrc (0)->
error.io.fire.jbc.asic@hostbridge/pciexrc,
error.io.fire.dmc.asic@hostbridge/pciexrc,
error.io.fire.pec.asic@hostbridge/pciexrc;
/***************
* Fire PX SW/HV error
* -------------
* Errors caused by bad SW or HV
***************/
event fault.io.fire.sw-epkt@hostbridge/pciexrc,
FITrate=SW_FIT;
event fault.io.fire.fw-epkt@hostbridge/pciexrc,
FITrate=HV_FIT;
event fault.io.fire.sw-fw-mismatch@hostbridge/pciexrc,
FITrate=SW_HV_MISMATCH_FIT;
event fault.io.fire.hb.sw-config@hostbridge/pciexrc,
FITrate=SW_FIT;
event fault.io.fire.dmc.sw-algorithm@hostbridge/pciexrc,
FITrate=SW_FIT;
event fault.io.fire.dmc.sw-state@hostbridge/pciexrc,
FITrate=SW_FIT;
event fault.io.fire.pec.sw-algorithm@hostbridge/pciexrc,
FITrate=SW_FIT;
event error.io.fire.jbc.driver@hostbridge/pciexrc;
event error.io.fire.dmc.driver@hostbridge/pciexrc;
event error.io.fire.dmc.bad_state@hostbridge/pciexrc;
prop fault.io.fire.sw-epkt@hostbridge/pciexrc,
fault.io.fire.fw-epkt@hostbridge/pciexrc,
fault.io.fire.sw-fw-mismatch@hostbridge/pciexrc->(A)
ereport.io.fire.epkt@hostbridge/pciexrc;
prop fault.io.fire.hb.sw-config@hostbridge/pciexrc (0)->
error.io.fire.jbc.driver@hostbridge/pciexrc;
prop fault.io.fire.dmc.sw-algorithm@hostbridge/pciexrc (0)->
error.io.fire.dmc.driver@hostbridge/pciexrc;
prop fault.io.fire.dmc.sw-state@hostbridge/pciexrc (0)->
error.io.fire.dmc.bad_state@hostbridge/pciexrc;
/***************
* PCI-E/PCI device fault and SW defects
***************/
fru pciexbus/pciexdev;
asru pciexbus/pciexdev/pciexfn;
event fault.io.fire.pciex.device@pciexbus/pciexdev/pciexfn,
FRU=pciexbus/pciexdev,
ASRU=pciexbus/pciexdev/pciexfn,
FITrate=PCIEX_DEV_FIT;
fru pcibus/pcidev;
asru pcibus/pcidev/pcifn;
event fault.io.fire.pci.device@pcibus/pcidev/pcifn,
FITrate=PCI_DEV_FIT,
FRU=pcibus/pcidev,
ASRU=pcibus/pcidev/pcifn;
/******************************
* JBC Rules Begin Here *
******************************/
/***************
* EBUS fault
* -------------
* Errors involving the ebus
***************/
fru hostbridge/pciexrc;
asru hostbridge/pciexrc;
event fault.io.ebus@hostbridge/pciexrc,
FITrate=EBUS_FIT,
FRU=hostbridge/pciexrc,
ASRU=hostbridge/pciexrc;
/*
* A faulty ebus can cause ebus timeout ebus_to
* ebus_to ereport:
* sun4v: The fmri of the ereport is ioboard/hostbridge
* sun4u: The fmri of the ereport is pciexrc
*/
prop fault.io.ebus@hostbridge/pciexrc (1)->
ereport.io.fire.jbc.ebus_to@hostbridge,
ereport.io.fire.jbc.ebus_to@hostbridge/pciexrc;
/***************
* Fire Asic HW error
* -------------
* Errors caused by parity or unexpected behaviors in the asic.
***************/
prop error.io.fire.jbc.asic@hostbridge/pciexrc (1)->
ereport.io.fire.jbc.cpe@hostbridge/pciexrc,
ereport.io.fire.jbc.mb_pea@hostbridge/pciexrc,
ereport.io.fire.jbc.mb_per@hostbridge/pciexrc,
ereport.io.fire.jbc.mb_pew@hostbridge/pciexrc,
ereport.io.fire.jbc.pio_cpe@hostbridge/pciexrc,
ereport.io.fire.jbc.pio_dpe@hostbridge/pciexrc;
/***************
* JBC Hostbus Link Errors
* -------------
* Possible Parity Errors caused by bad links traces or cables.
* For instance on Ontarios there is a flex cable. For Chicagos
* it could be the link trace between the CPU and Fire.
***************/
event error.io.fire.jbc.bad_link@hostbridge/pciexrc;
prop error.io.fire.jbc.bad_link@hostbridge/pciexrc (1)->
ereport.io.fire.jbc.ape@hostbridge/pciexrc,
ereport.io.fire.jbc.bjc@hostbridge/pciexrc,
ereport.io.fire.jbc.rd_dpe@hostbridge/pciexrc,
ereport.io.fire.jbc.wr_dpe@hostbridge/pciexrc;
/***************
* JBC Hostbus Errors
* -------------
* Errors being returned from the hostbus side and detected by fire asic.
***************/
fru hostbridge/pciexrc;
asru hostbridge/pciexrc;
event fault.io.fire.hbus@hostbridge/pciexrc,
FITrate=HBUS_FIT,
FRU=hostbridge/pciexrc,
ASRU=hostbridge/pciexrc;
prop fault.io.fire.hbus@hostbridge/pciexrc (0)->
error.io.fire.jbc.bad_link@hostbridge/pciexrc,
ereport.io.fire.jbc.icise@hostbridge/pciexrc,
ereport.io.fire.jbc.ill_bmr@hostbridge/pciexrc,
ereport.io.fire.jbc.jtceei@hostbridge/pciexrc,
ereport.io.fire.jbc.jtceer@hostbridge/pciexrc,
ereport.io.fire.jbc.jtceew@hostbridge/pciexrc;
/***************
* JBC Datapath Errors
* -------------
* Errors resulting from the datapath of the hostbus and detected by fire asic.
***************/
fru cmp;
asru cmp;
event fault.io.fire.datapath@cmp,
FITrate=CPU_FIT,
FRU=cmp,
ASRU=cmp;
prop fault.io.fire.datapath@cmp (0)->
error.io.fire.jbc.bad_link@hostbridge/pciexrc;
/* Duplicate the above fault prop for cpumodule */
fru cpumodule/cpu;
asru cpumodule/cpu;
event fault.io.fire.datapath@cpumodule/cpu,
FITrate=CPU_FIT,
FRU=cpumodule/cpu,
ASRU=cpumodule/cpu;
prop fault.io.fire.datapath@cpumodule/cpu (0)->
error.io.fire.jbc.bad_link@hostbridge/pciexrc,
ereport.io.fire.jbc.wr_dpe@hostbridge/pciexrc;
/***************
* Fire driver is at fault.
* -------------
* The px driver should not have been in this state. Defect the px driver.
***************/
prop error.io.fire.jbc.driver@hostbridge/pciexrc (1)->
ereport.io.fire.jbc.ijp@hostbridge/pciexrc,
ereport.io.fire.jbc.ill_acc@hostbridge/pciexrc,
ereport.io.fire.jbc.ill_acc_rd@hostbridge/pciexrc,
ereport.io.fire.jbc.ill_bmw@hostbridge/pciexrc,
ereport.io.fire.jbc.jue@hostbridge/pciexrc,
ereport.io.fire.jbc.pio_unmap@hostbridge/pciexrc,
ereport.io.fire.jbc.pio_unmap_rd@hostbridge/pciexrc;
/******************************
* DMC Rules Begin Here *
******************************/
/***************
* Fire PX SW error
* -------------
* Errors caused by bad SW
***************/
prop error.io.fire.dmc.driver@hostbridge/pciexrc (1)->
ereport.io.fire.dmc.ttc_cae@hostbridge/pciexrc;
/***************
* Unexpected Fire State
* -------------
* The px driver should not have been in this state. Defect the px driver.
***************/
event error.io.fire.dmc.bad_state-mmu@hostbridge/pciexrc;
event error.io.fire.dmc.bad_state-no_fab@hostbridge/pciexrc;
prop error.io.fire.dmc.bad_state@hostbridge/pciexrc (1)->
ereport.io.fire.dmc.cor_not_en@hostbridge/pciexrc,
ereport.io.fire.dmc.eq_not_en@hostbridge/pciexrc,
ereport.io.fire.dmc.fatal_not_en@hostbridge/pciexrc,
ereport.io.fire.dmc.msi_not_en@hostbridge/pciexrc,
ereport.io.fire.dmc.nonfatal_not_en@hostbridge/pciexrc,
ereport.io.fire.dmc.pmeack_not_en@hostbridge/pciexrc,
ereport.io.fire.dmc.pmpme_not_en@hostbridge/pciexrc,
error.io.fire.dmc.bad_state-no_fab@hostbridge/pciexrc;
prop error.io.fire.dmc.bad_state-no_fab@hostbridge/pciexrc (1)->
error.io.fire.dmc.bad_state-mmu@hostbridge/pciexrc;
prop error.io.fire.dmc.bad_state-no_fab@hostbridge/pciexrc (1)->
error.io.fire.fabric-sib@hostbridge/pciexrc;
prop error.io.fire.dmc.bad_state-mmu@hostbridge/pciexrc (1)->
ereport.io.fire.dmc.tbw_dme@hostbridge/pciexrc,
ereport.io.fire.dmc.trn_err@hostbridge/pciexrc;
/***************
* Fire Asic HW error
* -------------
* Errors caused by parity or unexpected behaviors in the asic.
***************/
event error.io.fire.dmc.bad_parity@hostbridge/pciexrc;
prop error.io.fire.dmc.asic@hostbridge/pciexrc (1)->
error.io.fire.dmc.bad_parity@hostbridge/pciexrc,
ereport.io.fire.dmc.eq_over@hostbridge/pciexrc;
/***************
* Parity errors caused by dmc
* -------------
* Fire asic error.
***************/
event error.io.fire.dmc.bad_parity-no_fab@hostbridge/pciexrc;
event error.io.fire.dmc.bad_parity-mmu@hostbridge/pciexrc;
prop error.io.fire.dmc.bad_parity@hostbridge/pciexrc (1)->
ereport.io.fire.dmc.msi_par_err@hostbridge/pciexrc,
error.io.fire.dmc.bad_parity-no_fab@hostbridge/pciexrc;
prop error.io.fire.dmc.bad_parity-no_fab@hostbridge/pciexrc (1)->
error.io.fire.dmc.bad_parity-mmu@hostbridge/pciexrc;
prop error.io.fire.dmc.bad_parity-no_fab@hostbridge/pciexrc (1)->
error.io.fire.fabric-sib@hostbridge/pciexrc;
prop error.io.fire.dmc.bad_parity-mmu@hostbridge/pciexrc (1)->
ereport.io.fire.dmc.tbw_dpe@hostbridge/pciexrc,
ereport.io.fire.dmc.tbw_ude@hostbridge/pciexrc,
ereport.io.fire.dmc.ttc_dpe@hostbridge/pciexrc;
/***************
* Bad DMA requests
* -------------
* Errors caused by bad leaf drivers, which will end up sending fire.fabric
* ereport, which will be diagnosed in itself. Do not diagnose the bad DMA
* requests and rely on the fire.fabric error being there. Propagate to a
* fault.io.fire.pciex.device.
*
* Atleast 1 PCIe device will be faulted. PCI devices will also be if the
* DMA came from a PCI device.
***************/
event error.io.fire.dmc.fabric_diag@hostbridge/pciexrc;
prop error.io.fire.dmc.fabric_diag@hostbridge/pciexrc(0) ->
ereport.io.fire.dmc.byp_err@hostbridge/pciexrc{ IS_PRIMARY },
ereport.io.fire.dmc.byp_oor@hostbridge/pciexrc{ IS_PRIMARY },
ereport.io.fire.dmc.trn_oor@hostbridge/pciexrc{ IS_PRIMARY },
ereport.io.fire.dmc.tte_inv@hostbridge/pciexrc{ IS_PRIMARY },
ereport.io.fire.dmc.tte_prt@hostbridge/pciexrc{ IS_PRIMARY };
prop error.io.fire.fabric@pciexbus/pciexdev/pciexfn (0) ->
error.io.fire.dmc.fabric_diag@hostbridge/pciexrc {
is_under(hostbridge/pciexrc, pciexbus/pciexdev/pciexfn)
};
/***************
* Malformed MSI
* -------------
* A non-compliant PCIe/PCI device sent a malformed MSI.
***************/
prop fault.io.fire.pciex.device@pciexbus[b]/pciexdev[d]/pciexfn[f] (0) ->
ereport.io.fire.dmc.msi_mal_err@hostbridge/pciexrc
{
IMU_MATCH_BDF(b, d, f) &&
is_under(hostbridge/pciexrc, pciexbus[b]/pciexdev[d]/pciexfn[f])
};
prop fault.io.fire.pci.device@pcibus[b]/pcidev[d]/pcifn[f] (0) ->
ereport.io.fire.dmc.msi_mal_err@hostbridge/pciexrc
{
IMU_MATCH_BDF(b, d, f) &&
is_under(hostbridge/pciexrc, pcibus[b]/pcidev[d]/pcifn[f])
};
/***************
* Event queue overflow
* -------------
* Since we don't know which devices are sending too many EQ's, we must
* indict Fire asic and all PCIe/PCI devices
***************/
#define PROP_PLAT_FRU "FRU"
#define GET_HB_FRU (confprop(asru(hostbridge/pciexrc), PROP_PLAT_FRU))
#define GET_PCIE_FRU (confprop(asru(pciexbus[b]/pciexdev[d]/pciexfn[0]), PROP_PLAT_FRU))
#define GET_PCI_FRU (confprop(asru(pcibus[b]/pcidev[d]/pcifn[0]), PROP_PLAT_FRU))
prop fault.io.fire.pciex.device@pciexbus[b]/pciexdev[d]/pciexfn[0] (0) ->
ereport.io.fire.dmc.eq_over@hostbridge/pciexrc
{
/*
* Indict PCI-E FRU(s) under this root complex excluding the
* one that the Fire ASIC resides on.
*/
is_under(hostbridge/pciexrc, pciexbus[b]/pciexdev[d]/pciexfn[0]) &&
(GET_HB_FRU != GET_PCIE_FRU)
};
prop fault.io.fire.pci.device@pcibus[b]/pcidev[d]/pcifn[0] (0) ->
ereport.io.fire.dmc.eq_over@hostbridge/pciexrc
{
/*
* Indict PCI FRU(s) under this root complex excluding the
* one that the Fire ASIC resides on.
*/
is_under(hostbridge/pciexrc, pcibus[b]/pcidev[d]/pcifn[0]) &&
(GET_HB_FRU != GET_PCI_FRU)
};
/***************
* Secondary errors
* -------------
* These are errors that require logs to be diagnosable. Secondary errors
* do not have logs so, just propagate them to no-diag.
***************/
event error.io.fire.dmc.secondary@hostbridge/pciexrc;
prop error.io.fire.dmc.secondary@hostbridge/pciexrc (0) ->
ereport.io.fire.dmc.byp_err@hostbridge/pciexrc{ IS_SECONDARY },
ereport.io.fire.dmc.byp_oor@hostbridge/pciexrc{ IS_SECONDARY },
ereport.io.fire.dmc.msi_mal_err@hostbridge/pciexrc{ IS_SECONDARY },
ereport.io.fire.dmc.trn_oor@hostbridge/pciexrc{ IS_SECONDARY },
ereport.io.fire.dmc.tte_inv@hostbridge/pciexrc{ IS_SECONDARY },
ereport.io.fire.dmc.tte_prt@hostbridge/pciexrc{ IS_SECONDARY };
/******************************
* PEC Rules Begin Here *
******************************/
event error.io.fire.pec.buffer-parity@hostbridge/pciexrc;
event error.io.fire.pec.misc-egress@hostbridge/pciexrc ;
event error.io.fire.pec.adjacentnode@hostbridge/pciexrc ;
event error.io.fire.pec.fabric_error@hostbridge/pciexrc ;
/***************
* Fire PX SW error
* -------------
* Errors caused by bad SW
***************/
prop fault.io.fire.pec.sw-algorithm@hostbridge/pciexrc (0) ->
ereport.io.fire.pec.crs@hostbridge/pciexrc,
ereport.io.fire.pec.mrc@hostbridge/pciexrc;
/***************
* Fire Asic HW error
* -------------
* Errors caused by parity or unexpected behaviors in the asic.
***************/
prop error.io.fire.pec.asic@hostbridge/pciexrc (1)->
error.io.fire.pec.buffer-parity@hostbridge/pciexrc,
error.io.fire.pec.misc-egress@hostbridge/pciexrc,
error.io.fire.pec.adjacentnode@hostbridge/pciexrc;
prop error.io.fire.pec.buffer-parity@hostbridge/pciexrc (1) ->
ereport.io.fire.pec.edp@hostbridge/pciexrc,
ereport.io.fire.pec.ehp@hostbridge/pciexrc,
ereport.io.fire.pec.eip@hostbridge/pciexrc,
ereport.io.fire.pec.erp@hostbridge/pciexrc,
ereport.io.fire.pec.ihb_pe@hostbridge/pciexrc,
ereport.io.fire.pec.iip@hostbridge/pciexrc;
prop error.io.fire.pec.misc-egress@hostbridge/pciexrc (1) ->
ereport.io.fire.pec.emp@hostbridge/pciexrc,
ereport.io.fire.pec.epe@hostbridge/pciexrc,
ereport.io.fire.pec.ero@hostbridge/pciexrc,
ereport.io.fire.pec.eru@hostbridge/pciexrc;
/***************
* Bad IO requests
* -------------
* Errors caused by bad leaf drivers, which will end up sending fire.fabric
* ereport, which will be diagnosed in itself. Do not diagnose the bad IO
* requests and rely on the fire.fabric error being there. Propagate to a
* fault.io.fire.pciex.device.
*
* Atleast 1 PCIe device will be fauled. PCI will be fault as well if the
* DMA came from a PCI device.
***************/
event error.io.fire.pec.fabric_diag@hostbridge/pciexrc;
prop error.io.fire.pec.fabric_diag@hostbridge/pciexrc(1) ->
ereport.io.fire.pec.wuc@hostbridge/pciexrc{ IS_PRIMARY },
ereport.io.fire.pec.ruc@hostbridge/pciexrc{ IS_PRIMARY },
ereport.io.fire.pec.pois@hostbridge/pciexrc{ IS_PRIMARY },
ereport.io.fire.pec.ur@hostbridge/pciexrc{ IS_PRIMARY };
prop error.io.fire.fabric@pciexbus/pciexdev/pciexfn (0) ->
error.io.fire.pec.fabric_diag@hostbridge/pciexrc {
is_under(hostbridge/pciexrc, pciexbus/pciexdev/pciexfn)
};
/***************
* Failed Links
* -------------
* They will cause the fabric to be scanned and a fire.fabric ereport
* for the suspected devices will be sent. Do no diagnose these
* ereports and let the fire.fabric ereport be diagnosed.
***************/
event ereport.io.fire.link-events-trip@hostbridge/pciexrc ;
engine serd.io.fire.link-events@hostbridge/pciexrc,
N=LINK_EVENTS_COUNT, T=LINK_EVENTS_TIME, method=persistent,
trip=ereport.io.fire.link-events-trip@hostbridge/pciexrc ;
event upset.io.fire.link-events@hostbridge/pciexrc,
engine=serd.io.fire.link-events@hostbridge/pciexrc ;
event error.io.fire.link-events@hostbridge/pciexrc;
prop upset.io.fire.link-events@hostbridge/pciexrc (0)->
error.io.fire.link-events@hostbridge/pciexrc;
prop error.io.fire.link-events@hostbridge/pciexrc (1)->
ereport.io.fire.pec.lrs@hostbridge/pciexrc,
ereport.io.fire.pec.ldn@hostbridge/pciexrc;
/*
* Fault at the adjacent node which is right below the Fire ASIC
*/
fru hostbridge/pciexrc/pciexbus/pciexdev;
asru hostbridge/pciexrc/pciexbus/pciexdev/pciexfn;
event fault.io.fire.pciex.device@hostbridge/pciexrc/pciexbus/pciexdev/pciexfn,
FITrate=HB_FIT, FRU=hostbridge/pciexrc/pciexbus/pciexdev,
ASRU=hostbridge/pciexrc/pciexbus/pciexdev/pciexfn;
prop fault.io.fire.asic@hostbridge/pciexrc (0) ->
error.io.fire.pec.adjacentnode@hostbridge/pciexrc;
prop fault.io.fire.pciex.device@hostbridge/pciexrc/pciexbus/pciexdev/pciexfn
(0) ->
error.io.fire.pec.adjacentnode@hostbridge/pciexrc
{
is_under(hostbridge/pciexrc,
hostbridge/pciexrc/pciexbus/pciexdev/pciexfn)
};
prop error.io.fire.pec.adjacentnode@hostbridge/pciexrc (0) ->
ereport.io.fire.link-events-trip@hostbridge/pciexrc;
/***************
* Undiagnosible Secondary Errors
* -------------
* Secondary errors of the ereport that the device is at fault.
* Undiagnosed the secondary errors since the payload is invalid.
***************/
event error.io.fire.pec.secondary@hostbridge/pciexrc;
prop error.io.fire.pec.secondary@hostbridge/pciexrc (1) ->
ereport.io.fire.pec.ruc@hostbridge/pciexrc{ IS_SECONDARY },
ereport.io.fire.pec.wuc@hostbridge/pciexrc{ IS_SECONDARY },
ereport.io.fire.pec.pois@hostbridge/pciexrc{ IS_SECONDARY },
ereport.io.fire.pec.ur@hostbridge/pciexrc{ IS_SECONDARY };
/*
* For logging purpose only.
* The Fire nexus driver generates equivalent pciex ereports for the
* common pciex rules to diagnose.
*/
prop error.io.fire.pec.fabric_error@hostbridge/pciexrc (0) ->
ereport.io.fire.pec.bdp@hostbridge/pciexrc,
ereport.io.fire.pec.btp@hostbridge/pciexrc,
ereport.io.fire.pec.cto@hostbridge/pciexrc,
ereport.io.fire.pec.dlp@hostbridge/pciexrc,
ereport.io.fire.pec.fcp@hostbridge/pciexrc,
ereport.io.fire.pec.mfc@hostbridge/pciexrc,
ereport.io.fire.pec.mfp@hostbridge/pciexrc,
ereport.io.fire.pec.re@hostbridge/pciexrc,
ereport.io.fire.pec.rnr@hostbridge/pciexrc,
ereport.io.fire.pec.rof@hostbridge/pciexrc,
ereport.io.fire.pec.rto@hostbridge/pciexrc,
ereport.io.fire.pec.te@hostbridge/pciexrc,
ereport.io.fire.pec.uc@hostbridge/pciexrc;
/******************************
* Fabric Rules Begin Here *
******************************/
/***************
* fire.fabric rules
* -------------
* Below rules are so we get a single suspect list in 1 fault with percentage
* of indiction being equal among all the suspect FRUs
***************/
prop fault.io.fire.pciex.device@pciexbus/pciexdev/pciexfn (0) ->
error.io.fire.fabric@pciexbus/pciexdev/pciexfn;
prop fault.io.fire.pci.device@pcibus/pcidev/pcifn (0) ->
error.io.fire.fabric@pcibus/pcidev/pcifn;
prop error.io.fire.fabric@pciexbus/pciexdev/pciexfn (1) ->
ereport.io.fire.fabric@pciexbus/pciexdev/pciexfn { !MATCH_CE };
prop error.io.fire.fabric@pcibus/pcidev/pcifn (1) ->
ereport.io.fire.fabric@pcibus/pcidev/pcifn;
prop error.io.fire.fabric-sib@hostbridge/pciexrc (0) ->
ereport.io.fire.fabric@pciexbus/pciexdev/pciexfn {
is_under(hostbridge/pciexrc, pciexbus/pciexdev/pciexfn) && !MATCH_CE
};
prop error.io.fire.fabric-sib@hostbridge/pciexrc (0) ->
ereport.io.fire.fabric@pcibus/pcidev/pcifn {
is_under(hostbridge/pciexrc, pcibus/pcidev/pcifn)
};
prop error.io.fire.fabric@pciexbus/pciexdev/pciexfn (1) ->
error.io.fire.fabric-sib@hostbridge/pciexrc {
is_under(hostbridge/pciexrc, pciexbus/pciexdev/pciexfn)
};
prop error.io.fire.fabric@pcibus/pcidev/pcifn (1) ->
error.io.fire.fabric-sib@hostbridge/pciexrc {
is_under(hostbridge/pciexrc, pcibus/pcidev/pcifn)
};
event upset.io.fire.fabric@pciexbus/pciexdev/pciexfn{within(1s)};
event ereport.io.fire.pciex.ce@pciexbus/pciexdev/pciexfn{within(1s)};
/* SERD CEs */
prop upset.io.fire.fabric@pciexbus[b]/pciexdev[d]/pciexfn[f] (0) ->
ereport.io.fire.fabric@pciexbus[b]/pciexdev[d]/pciexfn[f]
{ MATCH_CE };
event upset.io.fire.fabric@pciexbus/pciexdev/pciexfn,
engine=serd.io.fire.fabric@pciexbus/pciexdev/pciexfn;
engine serd.io.fire.fabric@pciexbus/pciexdev/pciexfn,
N=CE_EVENTS_COUNT, T=CE_EVENTS_TIME, method=persistent,
trip=ereport.io.fire.pciex.ce@pciexbus/pciexdev/pciexfn;
prop fault.io.fire.pciex.device@pciexbus/pciexdev/pciexfn (0) ->
ereport.io.fire.pciex.ce@pciexbus/pciexdev/pciexfn;
/***************
* Upsets
* -------------
* Used to hide ereports that are not currently diagnose or should not be
* diagnosed
***************/
event upset.io.fire.nodiag@hostbridge/pciexrc;
event error.io.fire.dmc.nodiag@hostbridge/pciexrc;
prop error.io.fire.dmc.nodiag@hostbridge/pciexrc (1)->
ereport.io.fire.dmc.tbw_err@hostbridge/pciexrc;
prop error.io.fire.dmc.nodiag@hostbridge/pciexrc (1)->
error.io.fire.fabric-sib@hostbridge/pciexrc;
prop upset.io.fire.nodiag@hostbridge/pciexrc (0)->
ereport.io.fire.jbc.ce_asyn@hostbridge/pciexrc, /* CPU */
ereport.io.fire.jbc.jbe@hostbridge/pciexrc, /* CPU */
ereport.io.fire.jbc.jte@hostbridge/pciexrc, /* CPU */
ereport.io.fire.jbc.ue_asyn@hostbridge/pciexrc, /* CPU */
ereport.io.fire.jbc.unsol_intr@hostbridge/pciexrc, /* CPU */
ereport.io.fire.jbc.unsol_rd@hostbridge/pciexrc, /* CPU */
ereport.io.fire.pec.lin@hostbridge/pciexrc,
ereport.io.fire.pec.lup@hostbridge/pciexrc,
error.io.fire.pec.fabric_error@hostbridge/pciexrc,
error.io.fire.pec.secondary@hostbridge/pciexrc,
error.io.fire.dmc.nodiag@hostbridge/pciexrc,
error.io.fire.dmc.fabric_diag@hostbridge/pciexrc,
error.io.fire.dmc.secondary@hostbridge/pciexrc;