cmd_memerr.c revision 90aa43b4bcbe505624f27a74dc87b9314cb1c698
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Ereport-handling routines for memory errors
*/
#include <cmd_mem.h>
#include <cmd_dimm.h>
#include <cmd_bank.h>
#include <cmd_page.h>
#include <cmd_cpu.h>
#ifdef sun4u
#include <cmd_dp.h>
#include <cmd_dp_page.h>
#endif
#include <cmd.h>
#include <strings.h>
#include <string.h>
#include <errno.h>
#include <sys/errclassify.h>
struct ce_name2type {
const char *name;
};
{
static const struct ce_name2type old[] = {
{ NULL }
};
static const struct ce_name2type new[] = {
{ CE_DISP_DESC_P, CE_DISP_PERS },
{ CE_DISP_DESC_L, CE_DISP_LEAKY },
{ CE_DISP_DESC_S, CE_DISP_STICKY },
{ NULL }
};
const struct ce_name2type *tp;
return (CE_DISP_UNKNOWN);
}
static void
{
fmd_case_t *cp;
cmd_dimm_t *d;
int foundrw;
/* We've already complained about this DIMM */
return;
}
return; /* Don't warn until over specified % of system memory */
/* Look for CEs on DIMMs in other banks */
d != NULL; d = cmd_list_next(d)) {
if (d == dimm) {
dret += d->dimm_nretired;
continue;
}
continue;
foundrw = 1;
dret += d->dimm_nretired;
}
}
if (foundrw) {
/*
* Found a DIMM in another bank with a significant number of
* retirements. Something strange is going on, perhaps in the
* datapath or with a bad CPU. A real person will need to
* figure out what's really happening. Emit a fault designed
* to trigger just that.
*/
d = cmd_list_next(d)) {
continue;
continue;
if (!(d->dimm_flags & CMD_MEM_F_FAULTING)) {
d->dimm_flags |= CMD_MEM_F_FAULTING;
cmd_dimm_dirty(hdl, d);
}
"fault.memory.datapath",
}
return;
}
}
/*ARGSUSED*/
{
const char *uuid;
if (afar_status != AFLT_STAT_VALID ||
return (CMD_EVD_UNUSED);
return (CMD_EVD_REDUND);
#ifdef sun4u
return (CMD_EVD_UNUSED);
}
#endif /* sun4u */
return (NULL);
}
return (CMD_EVD_UNUSED);
switch (type) {
case CE_DISP_UNKNOWN:
return (CMD_EVD_UNUSED);
case CE_DISP_INTERMITTENT:
return (CMD_EVD_UNUSED);
case CE_DISP_POSS_PERS:
break;
case CE_DISP_PERS:
break;
case CE_DISP_LEAKY:
break;
case CE_DISP_POSS_STICKY:
{
if (CE_XDIAG_TESTVALID(ptnrinfo)) {
/* Should have been CE_DISP_STICKY */
return (CMD_EVD_BAD);
} else if (ce1) {
/* Partner could see and could fix CE */
} else {
/* Partner could not see ce1 (ignore ce2) */
}
} else {
}
return (CMD_EVD_UNUSED);
}
case CE_DISP_STICKY:
break;
default:
return (CMD_EVD_BAD);
}
}
switch (type) {
case CE_DISP_POSS_PERS:
case CE_DISP_PERS:
}
return (CMD_EVD_OK); /* engine hasn't fired */
break; /* to retire */
case CE_DISP_LEAKY:
case CE_DISP_STICKY:
break; /* to retire */
}
dimm->dimm_nretired++;
return (CMD_EVD_OK);
}
/*
* Solve a bank case with suspect "fault.memory.bank". The caller must
* have populated bank->bank_case.cc_cp and is also responsible for adding
* associated ereport(s) to that case.
*/
void
{
return; /* Only complain once per bank */
}
/*ARGSUSED*/
{
return (CMD_EVD_UNUSED);
}
if (synd_status != AFLT_STAT_VALID) {
return (CMD_EVD_UNUSED);
}
cpu) == CMD_EVD_UNUSED)
return (CMD_EVD_UNUSED);
/*
* The following code applies only to sun4u, because sun4u does
* not poison data in L2 cache resulting from the fetch of a
* memory UE.
*/
#ifdef sun4u
if (afar_status != AFLT_STAT_VALID) {
/*
* Had this report's AFAR been valid, it would have
* contributed an address to the UE cache. We don't
* know what the AFAR would have been, and thus we can't
* add anything to the cache. If a xxU is caused by
* this UE, we won't be able to detect it, and will thus
* erroneously offline the CPU. To prevent this
* situation, we need to assume that all xxUs generated
* through the next E$ flush are attributable to the UE.
*/
} else {
}
#endif /* sun4u */
if (afar_status != AFLT_STAT_VALID)
return (CMD_EVD_UNUSED);
return (CMD_EVD_REDUND);
return (NULL);
}
return (CMD_EVD_UNUSED);
const char *uuid;
}
#ifdef sun4u
if (cmd_dp_error(hdl)) {
return (CMD_EVD_OK);
return (CMD_EVD_UNUSED);
}
#endif /* sun4u */
bank->bank_nretired++;
return (CMD_EVD_OK);
}
void
{
}
void
{
}