px_err.c revision d0f40dc6a997c84bacf5f9ba83d57a95495c399b
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* sun4v Fire Error Handling
*/
#include "px_obj.h"
#include "px_err.h"
px_rc_err_t *epkt);
px_rc_err_t *epkt);
px_rc_err_t *epkt);
/* Include the code generated sun4v epkt checking code */
#include "px_err_gen.c"
/*
* This variable indicates if we have a hypervisor that could potentially send
* incorrect epkts. We always set this to TRUE for now until we find a way to
* tell if this HV bug has been fixed.
*/
/*
* px_err_cb_intr:
* Interrupt handler for the Host Bus Block.
*/
{
}
return (DDI_INTR_UNCLAIMED);
}
/*
* px_err_dmc_pec_intr:
*/
{
}
return (DDI_INTR_UNCLAIMED);
}
/*
* px_err_cmn_intr:
* Common function called by trap, mondo and fabric intr.
* This function is more meaningful in sun4u implementation. Kept
* to mirror sun4u call stack.
* o check for safe access
* o create and queue RC info for later use in fabric scan.
*
* @param px_p leaf in which to check access
* @param derr fm err data structure to be updated
* @param caller PX_TRAP_CALL | PX_INTR_CALL
* @param chkjbc whether to handle hostbus registers (ignored)
* @return err PX_NO_PANIC | PX_PROTECTED |
* PX_PANIC | PX_HW_RESET | PX_EXPECTED
*/
/* ARGSUSED */
int
{
return (PX_NO_ERROR);
}
/*
* fills RC specific fault data
*/
static void
uint64_t fault_addr = 0;
/* Add an PCIE PF_DATA Entry */
case BLOCK_MMU:
/* Only PIO Fault Addresses are valid, this is DMA */
fault_addr = NULL;
}
break;
case BLOCK_PCIE:
}
DDI_SUCCESS) {
/*
* affected BDF is to be filled in by
* px_scan_fabric
*/
}
}
break;
case BLOCK_HOSTBUS:
case BLOCK_INTR:
case BLOCK_PORT:
/*
* If the affected device information is available then we
* add the affected_bdf to the pfd, so the affected device
* will be scanned and added to the error q. This will then
* go through the pciev_eh code path and forgive the error
* as needed.
*/
break;
default:
break;
}
}
/*
* Convert error severity from PX internal values to PCIe Fabric values. Most
* are self explanitory, except PX_PROTECTED. PX_PROTECTED will never be
* returned as is if forgivable.
*/
static int
px_err_to_fab_sev(int *rc_err) {
int fab_err = 0;
/*
* Let fabric scan decide the final severity of the error.
* This is needed incase IOV code needs to forgive the error.
*/
*rc_err = PX_FABRIC_SCAN;
fab_err |= PF_ERR_PANIC;
}
if (*rc_err & PX_NO_ERROR)
return (fab_err);
}
/*
* px_err_intr:
* o lock
* o create derr
* o check safe access
* o px_err_check_severity(epkt)
* o pcie_scan_fabric
* o Idle intr state
* o unlock
* o handle error: fatal? fm_panic() : return INTR_CLAIMED)
*/
static uint_t
{
goto done;
/* Create the derr */
/* Basically check for safe access */
/* Check the severity of this error */
/* Pass the 'rc_err' severity to the fabric scan code. */
tmp_rc_err = rc_err;
/* Scan the fabric */
/*
* Fabric scan didn't occur because of some error condition
* such as Root Port being in drain state, so reset rc_err.
*/
rc_err = tmp_rc_err;
}
/* Set the intr state to idle for the leaf that received the mondo */
INTR_IDLE_STATE) != DDI_SUCCESS) {
return (DDI_INTR_UNCLAIMED);
}
case BLOCK_MMU: /* FALLTHROUGH */
case BLOCK_INTR:
break;
case BLOCK_PCIE:
break;
case BLOCK_HOSTBUS: /* FALLTHROUGH */
default:
break;
}
done:
return (DDI_INTR_CLAIMED);
}
/*
* px_err_epkt_severity:
* Check the severity of the fire error based the epkt received
*
* @param px_p leaf in which to take the snap shot.
* @param derr fm err in which the ereport is to be based on
* @param epkt epkt recevied from HV
*/
static int
{
int err = 0;
/* Cautious access error handling */
case DDI_FM_ERR_EXPECTED:
/*
* For ddi_caut_put treat all events as nonfatal. Here
* we have the handle and can call ndi_fm_acc_err_set().
*/
is_safeacc = B_TRUE;
break;
case DDI_FM_ERR_PEEK:
case DDI_FM_ERR_POKE:
/*
*/
is_safeacc = B_TRUE;
break;
default:
}
/*
* Older hypervisors in some cases send epkts with incorrect fields.
* We have to handle these "special" epkts correctly.
*/
if (px_legacy_epkt)
/*
* The affected device by default is set to 'SELF'. The 'block'
* specific error handling below will update this as needed.
*/
case BLOCK_HOSTBUS:
break;
case BLOCK_MMU:
break;
case BLOCK_INTR:
break;
case BLOCK_PORT:
break;
case BLOCK_PCIE:
break;
default:
err = 0;
}
} else if (err & PX_PROTECTED) {
if (px_log & PX_PROTECTED)
} else if (err & PX_NO_PANIC) {
if (px_log & PX_NO_PANIC)
} else if (err & PX_NO_ERROR) {
if (px_log & PX_NO_ERROR)
} else if (err == 0) {
/* Panic on a unrecognized epkt */
}
/* Readjust the severity as a result of safe access */
err = PX_NO_PANIC;
return (err);
}
static void
{
/* send ereport for debug purposes */
if (is_block_pci) {
"%s Epkt contents:\n"
"Block: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d\n"
"I=%d, H=%d, C=%d, U=%d, E=%d, P=%d\n"
"PCI Err Status: 0x%x, PCIe Err Status: 0x%x\n"
"CE Status Reg: 0x%x, UE Status Reg: 0x%x\n"
"HDR1: 0x%lx, HDR2: 0x%lx\n"
"Err Src Reg: 0x%x, Root Err Status: 0x%x\n"
"Err Severity: 0x%x\n",
} else {
"%s Epkt contents:\n"
"Block: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n"
"Dir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d\n"
"M=%d, S=%d, Size: 0x%x, Addr: 0x%lx\n"
"Hdr1: 0x%lx, Hdr2: 0x%lx, Res: 0x%lx\n"
"Err Severity: 0x%x\n",
err);
}
}
static void
char *msg)
{
if (is_block_pci) {
"A PCIe root port error has occured with a severity"
" \"%s\"\n"
"\tBlock: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d, I=%d\n"
"\tH=%d, C=%d, U=%d, E=%d, P=%d\n"
"\tpci_err: 0x%x, pcie_err=0x%x, ce_reg: 0x%x\n"
"\tue_reg: 0x%x, Hdr1: 0x%p, Hdr2: 0x%p\n"
"\terr_src: 0x%x, root_err: 0x%x\n",
} else {
"A PCIe root complex error has occured with a severity"
" \"%s\"\n"
"\tBlock: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n"
"\tDir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d, M=%d\n"
"\tS=%d, Size: 0x%x, Addr: 0x%p\n"
"\tHdr1: 0x%p, Hdr2: 0x%p, Res: 0x%p\n",
}
}
/* ARGSUSED */
static void
{
/*
* We don't have a default case for any of the below switch statements
* since we are ok with the code falling through.
*/
case BLOCK_HOSTBUS:
case OP_DMA:
case PH_UNKNOWN:
case CND_UNKNOWN:
case DIR_RESERVED:
break;
} /* DIR */
} /* CND */
} /* PH */
} /* OP */
break;
case BLOCK_MMU:
case OP_XLAT:
case PH_DATA:
case CND_PROT:
case DIR_UNKNOWN:
break;
} /* DIR */
} /* CND */
break;
case PH_IRR:
case CND_RESERVED:
case DIR_IRR:
} /* DIR */
} /* CND */
} /* PH */
} /* OP */
break;
case BLOCK_INTR:
case OP_MSIQ:
case PH_UNKNOWN:
case CND_ILL:
case DIR_RESERVED:
break;
} /* DIR */
break;
case CND_IRR:
case DIR_IRR:
break;
} /* DIR */
} /* CND */
} /* PH */
break;
case OP_RESERVED:
case PH_UNKNOWN:
case CND_ILL:
case DIR_IRR:
break;
} /* DIR */
} /* CND */
break;
case PH_DATA:
case CND_INT:
case DIR_UNKNOWN:
break;
} /* DIR */
} /* CND */
} /* PH */
} /* OP */
} /* BLOCK */
}
/* ARGSUSED */
static int
{
return (px_err_check_eq(dip));
}
/* ARGSUSED */
static int
{
/*
* Check for failed non-posted writes, which are errors that are not
* defined in the PCIe spec. If not return panic.
*/
goto done;
}
/*
* Gather the error logs, if they do not exist just return with no panic
* and let the fabric message take care of the error.
*/
sts = (PX_NO_PANIC);
goto done;
}
/* Let the fabric message take care of error */
sts = PX_NO_PANIC;
goto done;
}
/* See if the failed transaction belonged to a hardened driver */
sts = (PX_NO_PANIC);
else
/* Add pfd to cause a fabric scan */
case CND_RCA:
break;
case CND_RUR:
break;
}
done:
return (sts);
}
/* ARGSUSED */
static int
{
int sts;
/*
* defined in the PCIe spec.
*/
if (sts == DDI_SUCCESS &&
return (PX_NO_PANIC);
else
return (PX_PANIC);
}
pec_p->ce_reg_status = 0;
pec_p->ue_reg_status = 0;
/*
* According to the PCIe spec, there is a first error pointer. If there
* are header logs recorded and there are more than one error, the log
* will belong to the error that the first error pointer points to.
*
* The regs.primary_ue expects a bit number, go through the ue register
* and find the first error that occured. Because the sun4v epkt spec
* does not define this value, the algorithm below gives the lower bit
* priority.
*/
if (temp) {
int x;
for (x = 0; !(temp & 0x1); x++) {
}
} else {
pcie->primary_ue = 0;
}
/* Sun4v doesn't log the TX hdr except for CTOs */
} else {
}
}
static int
{
}
bdf));
}