/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Support routines for managing per-Lxcache state.
*/
#include <cmd_Lxcache.h>
#include <cmd_mem.h>
#include <cmd_cpu.h>
#include <cmd.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <strings.h>
#include <fm/fmd_api.h>
#include <sys/fm/protocol.h>
#include <sys/cheetahregs.h>
#include <sys/mem_cache.h>
#define PN_ECSTATE_NA 5
/*
* These values are our threshold values for SERDing CPU's based on the
* the # of times we have retired a cache line for each category.
*/
#define CMD_CPU_SERD_AGG_1 64
#define CMD_CPU_SERD_AGG_2 64
static int8_t cmd_lowest_way[16] = {
/* 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf */
-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
static int cmd_num_of_bits[16] = {
/* 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf */
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
void
cmd_Lxcache_write(fmd_hdl_t *hdl, cmd_Lxcache_t *Lxcache)
{
fmd_buf_write(hdl, NULL, Lxcache->Lxcache_bufname, Lxcache,
sizeof (cmd_Lxcache_pers_t));
}
const char *
cmd_type_to_str(cmd_ptrsubtype_t pstype)
{
switch (pstype) {
case CMD_PTR_CPU_L2DATA:
return ("l2data");
break;
case CMD_PTR_CPU_L3DATA:
return ("l3data");
break;
case CMD_PTR_CPU_L2TAG:
return ("l2tag");
break;
case CMD_PTR_CPU_L3TAG:
return ("l3tag");
break;
default:
return ("unknown");
break;
}
}
const char *
cmd_flags_to_str(int flags)
{
switch (flags) {
case CMD_LxCACHE_F_ACTIVE:
return ("ACTIVE");
case CMD_LxCACHE_F_FAULTING:
return ("FAULTING");
case CMD_LxCACHE_F_RETIRED:
return ("RETIRED");
case CMD_LxCACHE_F_UNRETIRED:
return ("UNRETIRED");
case CMD_LxCACHE_F_RERETIRED:
return ("RERETIRED");
default:
return ("Unknown_flags");
}
}
const char *
cmd_reason_to_str(int reason)
{
switch (reason) {
case CMD_LXSUSPECT_DATA:
return ("SUSPECT_DATA");
case CMD_LXSUSPECT_0_TAG:
return ("SUSPECT_0_TAG");
case CMD_LXSUSPECT_1_TAG:
return ("SUSPECT_1_TAG");
case CMD_LXCONVICTED:
return ("CONVICTED");
case CMD_LXFUNCTIONING:
return ("FUNCTIONING");
default:
return ("Unknown_reason");
}
}
static void
cmd_pretty_print_Lxcache(fmd_hdl_t *hdl, cmd_Lxcache_t *Lxcache)
{
fmd_hdl_debug(hdl,
"\n"
" cpu = %s\n"
" type = %s\n"
" index = %d\n"
" way = %d\n"
" bit = %d\n"
" reason = %s\n"
" flags = %s\n",
Lxcache->Lxcache_cpu_bufname,
cmd_type_to_str(Lxcache->Lxcache_type),
Lxcache->Lxcache_index,
Lxcache->Lxcache_way,
Lxcache->Lxcache_bit,
cmd_reason_to_str(Lxcache->Lxcache_reason),
cmd_flags_to_str(Lxcache->Lxcache_flags));
}
void
cmd_Lxcache_free(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache,
int destroy)
{
cmd_case_t *cc = &Lxcache->Lxcache_case;
fmd_hdl_debug(hdl, "Entering cmd_Lxcache_free for %s destroy = %d\n",
Lxcache->Lxcache_bufname, destroy);
if (cc->cc_cp != NULL)
cmd_case_fini(hdl, cc->cc_cp, destroy);
if (cc->cc_serdnm != NULL) {
if (fmd_serd_exists(hdl, cc->cc_serdnm) && destroy) {
fmd_serd_destroy(hdl, cc->cc_serdnm);
fmd_hdl_strfree(hdl, cc->cc_serdnm);
cc->cc_serdnm = NULL;
}
}
if (Lxcache->Lxcache_nvl) {
nvlist_free(Lxcache->Lxcache_nvl);
Lxcache->Lxcache_nvl = NULL;
}
/*
* Clean up the SERD engine created to handle recheck of TAGS.
* This SERD engine was created to save the event pointer.
*/
if (Lxcache->Lxcache_serdnm != NULL) {
if (fmd_serd_exists(hdl, Lxcache->Lxcache_serdnm) && destroy) {
fmd_serd_destroy(hdl, Lxcache->Lxcache_serdnm);
fmd_hdl_strfree(hdl, Lxcache->Lxcache_serdnm);
Lxcache->Lxcache_serdnm = NULL;
}
}
Lxcache->Lxcache_timeout_id = -1;
Lxcache->Lxcache_ep = NULL;
Lxcache->Lxcache_retry_count = 0;
if (destroy)
fmd_buf_destroy(hdl, NULL, Lxcache->Lxcache_bufname);
cmd_fmri_fini(hdl, &Lxcache->Lxcache_asru, destroy);
cmd_list_delete(&cpu->cpu_Lxcaches, Lxcache);
fmd_hdl_free(hdl, Lxcache, sizeof (cmd_Lxcache_t));
}
void
cmd_Lxcache_destroy(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
{
cmd_Lxcache_free(hdl, cpu, Lxcache, FMD_B_TRUE);
}
cmd_Lxcache_t *
cmd_Lxcache_lookup_by_type_index_way_bit(cmd_cpu_t *cpu,
cmd_ptrsubtype_t pstype, int32_t index, int8_t way, int16_t bit)
{
cmd_Lxcache_t *Lxcache;
for (Lxcache = cmd_list_next(&cpu->cpu_Lxcaches); Lxcache != NULL;
Lxcache = cmd_list_next(Lxcache)) {
if ((Lxcache->Lxcache_type == pstype) &&
(Lxcache->Lxcache_index == (uint32_t)index) &&
(Lxcache->Lxcache_way == (uint32_t)way) &&
(Lxcache->Lxcache_bit == (uint16_t)bit))
return (Lxcache);
}
return (NULL);
}
cmd_Lxcache_t *
cmd_Lxcache_create(fmd_hdl_t *hdl, cmd_xr_t *xr, cmd_cpu_t *cpu,
nvlist_t *modasru, cmd_ptrsubtype_t pstype, int32_t index,
int8_t way, int16_t bit)
{
cmd_Lxcache_t *Lxcache;
nvlist_t *asru;
const char *pstype_name;
uint8_t fmri_Lxcache_type;
pstype_name = cmd_type_to_str(pstype);
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d:Creating new Lxcache for index=%d way=%d bit=%d\n",
pstype_name, cpu->cpu_cpuid, index, way, bit);
CMD_CPU_STAT_BUMP(cpu, Lxcache_creat);
Lxcache = fmd_hdl_zalloc(hdl, sizeof (cmd_Lxcache_t), FMD_SLEEP);
(void) strncpy(Lxcache->Lxcache_cpu_bufname,
cpu->cpu_bufname, CMD_BUFNMLEN);
Lxcache->Lxcache_nodetype = CMD_NT_LxCACHE;
Lxcache->Lxcache_version = CMD_LxCACHE_VERSION;
Lxcache->Lxcache_type = pstype;
Lxcache->Lxcache_index = (uint32_t)index;
Lxcache->Lxcache_way = (uint32_t)way;
Lxcache->Lxcache_bit = (uint16_t)bit;
Lxcache->Lxcache_reason = CMD_LXFUNCTIONING;
Lxcache->Lxcache_flags = CMD_LxCACHE_F_ACTIVE;
Lxcache->Lxcache_timeout_id = -1;
Lxcache->Lxcache_retry_count = 0;
Lxcache->Lxcache_nvl = NULL;
Lxcache->Lxcache_ep = NULL;
Lxcache->Lxcache_serdnm = NULL;
Lxcache->Lxcache_clcode = 0;
Lxcache->xr = xr;
Lxcache->Lxcache_retired_fmri[0] = '\0';
switch (pstype) {
case CMD_PTR_CPU_L2DATA:
fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L2;
break;
case CMD_PTR_CPU_L3DATA:
fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L3;
break;
case CMD_PTR_CPU_L2TAG:
fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L2;
break;
case CMD_PTR_CPU_L3TAG:
fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L3;
break;
default:
break;
}
cmd_bufname(Lxcache->Lxcache_bufname, sizeof (Lxcache->Lxcache_bufname),
"Lxcache_%s_%d_%d_%d_%d", pstype_name, cpu->cpu_cpuid,
index, way, bit);
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d: new Lxcache name is %s\n",
pstype_name, cpu->cpu_cpuid, Lxcache->Lxcache_bufname);
if ((errno = nvlist_dup(modasru, &asru, 0)) != 0 ||
(errno = nvlist_add_uint32(asru, FM_FMRI_CPU_CACHE_INDEX,
index)) != 0 ||
(errno = nvlist_add_uint32(asru, FM_FMRI_CPU_CACHE_WAY,
(uint32_t)way)) != 0 ||
(errno = nvlist_add_uint16(asru, FM_FMRI_CPU_CACHE_BIT,
bit)) != 0 ||
(errno = nvlist_add_uint8(asru, FM_FMRI_CPU_CACHE_TYPE,
fmri_Lxcache_type)) != 0 ||
(errno = fmd_nvl_fmri_expand(hdl, asru)) != 0)
fmd_hdl_abort(hdl, "failed to build Lxcache fmri");
asru->nvl_nvflag |= NV_UNIQUE_NAME_TYPE;
cmd_fmri_init(hdl, &Lxcache->Lxcache_asru, asru,
"%s_asru_%d_%d_%d", pstype_name, index, way, bit);
nvlist_free(asru);
cmd_list_append(&cpu->cpu_Lxcaches, Lxcache);
cmd_Lxcache_write(hdl, Lxcache);
return (Lxcache);
}
cmd_Lxcache_t *
cmd_Lxcache_lookup_by_index_way(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype,
int32_t index, int8_t way)
{
cmd_Lxcache_t *cache;
for (cache = cmd_list_next(&cpu->cpu_Lxcaches); cache != NULL;
cache = cmd_list_next(cache)) {
if ((cache->Lxcache_index == (uint32_t)index) &&
(cache->Lxcache_way == (uint32_t)way) &&
(cache->Lxcache_type == pstype)) {
return (cache);
}
}
return (NULL);
}
static cmd_Lxcache_t *
Lxcache_wrapv1(fmd_hdl_t *hdl, cmd_Lxcache_pers_t *pers, size_t psz)
{
cmd_Lxcache_t *Lxcache;
if (psz != sizeof (cmd_Lxcache_pers_t)) {
fmd_hdl_abort(hdl, "size of state doesn't match size of "
"version 1 state (%u bytes).\n",
sizeof (cmd_Lxcache_pers_t));
}
Lxcache = fmd_hdl_zalloc(hdl, sizeof (cmd_Lxcache_t), FMD_SLEEP);
bcopy(pers, Lxcache, sizeof (cmd_Lxcache_pers_t));
fmd_hdl_free(hdl, pers, psz);
return (Lxcache);
}
void *
cmd_Lxcache_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
{
cmd_Lxcache_t *Lxcache;
cmd_Lxcache_t *recovered_Lxcache;
cmd_cpu_t *cpu;
size_t Lxcachesz;
char *serdnm;
/*
* We need to first extract the cpu name by reading directly
* from fmd buffers in order to begin our search for Lxcache in
* the appropriate cpu list.
* After we identify the cpu list using buf name we look
* in cpu list for our Lxcache states.
*/
fmd_hdl_debug(hdl, "restoring Lxcache from %s\n", ptr->ptr_name);
if ((Lxcachesz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
fmd_hdl_abort(hdl, "Lxcache referenced by case %s does "
"not exist in saved state\n",
fmd_case_uuid(hdl, cp));
} else if (Lxcachesz != sizeof (cmd_Lxcache_pers_t)) {
fmd_hdl_abort(hdl, "Lxcache buffer referenced by case %s "
"is %d bytes. Expected size is %d bytes\n",
fmd_case_uuid(hdl, cp), Lxcachesz,
sizeof (cmd_Lxcache_pers_t));
}
if ((Lxcache = cmd_buf_read(hdl, NULL, ptr->ptr_name,
Lxcachesz)) == NULL) {
fmd_hdl_abort(hdl, "failed to read Lxcache buf %s",
ptr->ptr_name);
}
cmd_pretty_print_Lxcache(hdl, Lxcache);
fmd_hdl_debug(hdl, "found %d in version field\n",
Lxcache->Lxcache_version);
cpu = cmd_restore_cpu_only(hdl, cp, Lxcache->Lxcache_cpu_bufname);
if (cpu == NULL) {
fmd_hdl_debug(hdl,
"\nCould not restore cpu %s\n",
Lxcache->Lxcache_cpu_bufname);
return (NULL);
}
recovered_Lxcache = Lxcache; /* save the recovered Lxcache */
for (Lxcache = cmd_list_next(&cpu->cpu_Lxcaches); Lxcache != NULL;
Lxcache = cmd_list_next(Lxcache)) {
if (strcmp(Lxcache->Lxcache_bufname, ptr->ptr_name) == 0)
break;
}
if (Lxcache == NULL) {
switch (recovered_Lxcache->Lxcache_version) {
case CMD_LxCACHE_VERSION_1:
Lxcache = Lxcache_wrapv1(hdl,
(cmd_Lxcache_pers_t *)recovered_Lxcache,
Lxcachesz);
break;
default:
fmd_hdl_abort(hdl, "unknown version (found %d) "
"for Lxcache state referenced by case %s.\n",
recovered_Lxcache->Lxcache_version,
fmd_case_uuid(hdl, cp));
break;
}
cmd_fmri_restore(hdl, &Lxcache->Lxcache_asru);
/*
* We need to cleanup the information associated with
* the timeout routine because these are not checkpointed
* and cannot be retored.
*/
Lxcache->Lxcache_timeout_id = -1;
Lxcache->Lxcache_retry_count = 0;
Lxcache->Lxcache_nvl = NULL;
Lxcache->Lxcache_ep = NULL;
Lxcache->Lxcache_serdnm = NULL;
cmd_list_append(&cpu->cpu_Lxcaches, Lxcache);
}
serdnm = cmd_Lxcache_serdnm_create(hdl, cpu->cpu_cpuid,
Lxcache->Lxcache_type, Lxcache->Lxcache_index,
Lxcache->Lxcache_way, Lxcache->Lxcache_bit);
fmd_hdl_debug(hdl,
"cpu_id %d: serdname for the case is %s\n",
cpu->cpu_cpuid, serdnm);
fmd_hdl_debug(hdl,
"cpu_id %d: restoring the case for index %d way %d bit %d\n",
cpu->cpu_cpuid, Lxcache->Lxcache_index,
Lxcache->Lxcache_way, Lxcache->Lxcache_bit);
cmd_case_restore(hdl, &Lxcache->Lxcache_case, cp, serdnm);
return (Lxcache);
}
/*ARGSUSED*/
void
cmd_Lxcache_validate(fmd_hdl_t *hdl, cmd_cpu_t *cpu)
{
cmd_Lxcache_t *Lxcache, *next;
for (Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
Lxcache != NULL; Lxcache = next) {
next = cmd_list_next(Lxcache);
if (fmd_nvl_fmri_unusable(hdl, Lxcache->Lxcache_asru_nvl)) {
cmd_Lxcache_destroy(hdl, cpu, Lxcache);
}
}
}
void
cmd_Lxcache_dirty(fmd_hdl_t *hdl, cmd_Lxcache_t *Lxcache)
{
if (fmd_buf_size(hdl, NULL, Lxcache->Lxcache_bufname) !=
sizeof (cmd_Lxcache_pers_t))
fmd_buf_destroy(hdl, NULL, Lxcache->Lxcache_bufname);
/* No need to rewrite the FMRIs in the Lxcache - they don't change */
fmd_buf_write(hdl, NULL,
Lxcache->Lxcache_bufname, &Lxcache->Lxcache_pers,
sizeof (cmd_Lxcache_pers_t));
}
void
cmd_Lxcache_fini(fmd_hdl_t *hdl, cmd_cpu_t *cpu)
{
cmd_Lxcache_t *Lxcache;
while ((Lxcache = cmd_list_next(&cpu->cpu_Lxcaches)) != NULL)
cmd_Lxcache_free(hdl, cpu, Lxcache, FMD_B_FALSE);
}
char *
cmd_Lxcache_serdnm_create(fmd_hdl_t *hdl, uint32_t cpu_id,
cmd_ptrsubtype_t pstype,
int32_t index, int8_t way, int16_t bit)
{
const char *fmt = "cpu_%d:%s_%d_%d_%d_serd";
const char *serdbase;
size_t sz;
char *nm;
serdbase = cmd_type_to_str(pstype);
sz = (snprintf(NULL, 0, fmt, cpu_id, serdbase, index, way, bit) + 1);
nm = fmd_hdl_alloc(hdl, sz, FMD_SLEEP);
(void) snprintf(nm, sz, fmt, cpu_id, serdbase, index, way, bit);
return (nm);
}
char *
cmd_Lxcache_anonymous_serdnm_create(fmd_hdl_t *hdl, uint32_t cpu_id,
cmd_ptrsubtype_t pstype,
int32_t index, int8_t way, int16_t bit)
{
const char *fmt = "cpu_%d:%s_%d_%d_%d_anonymous_serd";
const char *serdbase;
size_t sz;
char *nm;
serdbase = cmd_type_to_str(pstype);
sz = (snprintf(NULL, 0, fmt, cpu_id, serdbase, index, way, bit) + 1);
nm = fmd_hdl_alloc(hdl, sz, FMD_SLEEP);
(void) snprintf(nm, sz, fmt, cpu_id, serdbase, index, way, bit);
return (nm);
}
/*
* Count the number of SERD type 2 ways retired for a given cpu
* These are defined to be L3 Cache data retirements
*/
uint32_t
cmd_Lx_index_count_type2_ways(cmd_cpu_t *cpu)
{
cmd_Lxcache_t *cache = NULL;
uint32_t ret_count = 0;
for (cache = cmd_list_next(&cpu->cpu_Lxcaches); cache != NULL;
cache = cmd_list_next(cache)) {
if ((cache->Lxcache_flags & CMD_LxCACHE_F_RETIRED) &&
(cache->Lxcache_type == CMD_PTR_CPU_L3DATA)) {
ret_count++;
}
}
return (ret_count);
}
/*
* Count the number of SERD type 1 ways retired for a given cpu
* These are defined to be L2 Data, tag and L3 Tag retirements
*/
uint32_t
cmd_Lx_index_count_type1_ways(cmd_cpu_t *cpu)
{
cmd_Lxcache_t *cache = NULL;
uint32_t ret_count = 0;
for (cache = cmd_list_next(&cpu->cpu_Lxcaches); cache != NULL;
cache = cmd_list_next(cache)) {
if ((cache->Lxcache_flags & CMD_LxCACHE_F_RETIRED) &&
((cache->Lxcache_type == CMD_PTR_CPU_L2DATA) ||
IS_TAG(cache->Lxcache_type))) {
ret_count++;
}
}
return (ret_count);
}
void
cmd_fault_the_cpu(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype,
const char *fltnm)
{
fmd_case_t *cp;
const char *uuid;
cp = cmd_case_create(hdl, &cpu->cpu_header, pstype,
&uuid);
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d Created case %s to retire CPU\n",
fltnm, cpu->cpu_cpuid);
if ((errno = fmd_nvl_fmri_expand(hdl, cpu->cpu_asru_nvl)) != 0)
fmd_hdl_abort(hdl, "failed to build CPU fmri");
cmd_cpu_create_faultlist(hdl, cp, cpu, fltnm, NULL, HUNDRED_PERCENT);
fmd_case_solve(hdl, cp);
}
void
cmd_retire_cpu_if_limits_exceeded(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
cmd_ptrsubtype_t pstype, const char *fltnm)
{
int cpu_retired_1, cpu_retired_2;
/* Retrieve the number of retired ways for each category */
cpu_retired_1 = cmd_Lx_index_count_type1_ways(cpu);
cpu_retired_2 = cmd_Lx_index_count_type2_ways(cpu);
fmd_hdl_debug(hdl,
"\n%s:CPU %d retired Type 1 way count is: %d\n",
fltnm, cpu->cpu_cpuid, cpu_retired_1);
fmd_hdl_debug(hdl, "\n%s:CPU %d retired Type 2 way count is: %d\n",
fltnm, cpu->cpu_cpuid, cpu_retired_2);
if (((cpu_retired_1 > CMD_CPU_SERD_AGG_1) ||
(cpu_retired_2 > CMD_CPU_SERD_AGG_2)) &&
(cpu->cpu_faulting != FMD_B_TRUE)) {
cmd_fault_the_cpu(hdl, cpu, pstype, fltnm);
}
}
void
cmd_Lxcache_fault(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache,
const char *fltnm, nvlist_t *rsrc, uint_t cert)
{
char fltmsg[64];
nvlist_t *flt;
(void) snprintf(fltmsg, sizeof (fltmsg), "fault.cpu.%s.%s-line",
cmd_cpu_type2name(hdl, cpu->cpu_type), fltnm);
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d: fltmsg = %s\n",
fltnm, cpu->cpu_cpuid, fltmsg);
if (Lxcache->Lxcache_flags & CMD_LxCACHE_F_FAULTING) {
return;
}
Lxcache->Lxcache_flags |= CMD_LxCACHE_F_FAULTING;
flt = fmd_nvl_create_fault(hdl, fltmsg, cert,
Lxcache->Lxcache_asru.fmri_nvl, cpu->cpu_fru_nvl, rsrc);
if (nvlist_add_boolean_value(flt, FM_SUSPECT_MESSAGE, B_FALSE) != 0)
fmd_hdl_abort(hdl, "failed to add no-message member to fault");
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d: adding suspect list to case %s\n",
fltnm, cpu->cpu_cpuid,
fmd_case_uuid(hdl, Lxcache->Lxcache_case.cc_cp));
fmd_case_add_suspect(hdl, Lxcache->Lxcache_case.cc_cp, flt);
fmd_case_solve(hdl, Lxcache->Lxcache_case.cc_cp);
if (Lxcache->Lxcache_retired_fmri[0] == 0) {
if (cmd_fmri_nvl2str(hdl, Lxcache->Lxcache_asru.fmri_nvl,
Lxcache->Lxcache_retired_fmri,
sizeof (Lxcache->Lxcache_retired_fmri)) == -1)
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d: Failed to save the"
" retired fmri string\n",
fltnm, cpu->cpu_cpuid);
else
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d:Saved the retired fmri string %s\n",
fltnm, cpu->cpu_cpuid,
Lxcache->Lxcache_retired_fmri);
}
Lxcache->Lxcache_flags &= ~(CMD_LxCACHE_F_FAULTING);
}
void
cmd_Lxcache_close(fmd_hdl_t *hdl, void *arg)
{
cmd_cpu_t *cpu;
cmd_Lxcache_t *Lxcache;
cmd_case_t *cc;
Lxcache = (cmd_Lxcache_t *)arg;
fmd_hdl_debug(hdl, "cmd_Lxcache_close called for %s\n",
Lxcache->Lxcache_bufname);
cc = &Lxcache->Lxcache_case;
for (cpu = cmd_list_next(&cmd.cmd_cpus); cpu != NULL;
cpu = cmd_list_next(cpu)) {
if (strcmp(cpu->cpu_bufname,
Lxcache->Lxcache_cpu_bufname) == 0)
break;
}
if (cpu == NULL)
fmd_hdl_abort(hdl, "failed to find the cpu %s for %s\n",
Lxcache->Lxcache_cpu_bufname,
Lxcache->Lxcache_bufname);
/*
* We will destroy the case and serd engine.
* The rest will be destroyed when we retire the CPU
* until then we keep the Lxcache strutures alive.
*/
if (cc->cc_cp != NULL) {
cmd_case_fini(hdl, cc->cc_cp, FMD_B_TRUE);
cc->cc_cp = NULL;
}
if (cc->cc_serdnm != NULL) {
if (fmd_serd_exists(hdl, cc->cc_serdnm))
fmd_serd_destroy(hdl, cc->cc_serdnm);
fmd_hdl_strfree(hdl, cc->cc_serdnm);
cc->cc_serdnm = NULL;
}
}
cmd_Lxcache_t *
cmd_Lxcache_lookup_by_timeout_id(id_t id)
{
cmd_cpu_t *cpu;
cmd_Lxcache_t *cmd_Lxcache;
for (cpu = cmd_list_next(&cmd.cmd_cpus); cpu != NULL;
cpu = cmd_list_next(cpu)) {
for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
cmd_Lxcache != NULL;
cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
if (cmd_Lxcache->Lxcache_timeout_id == id)
return (cmd_Lxcache);
}
}
return (NULL);
}
void
cmd_Lxcache_gc(fmd_hdl_t *hdl)
{
cmd_cpu_t *cpu;
for (cpu = cmd_list_next(&cmd.cmd_cpus); cpu != NULL;
cpu = cmd_list_next(cpu))
cmd_Lxcache_validate(hdl, cpu);
}
cmd_evdisp_t
get_tagdata(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype,
int32_t index, uint64_t *tag_data)
{
int fd;
cache_info_t cache_info;
fd = open(mem_cache_device, O_RDONLY);
if (fd == -1) {
(void) printf(
"cpu_id = %d could not open %s to read tag info.\n",
cpu->cpu_cpuid, mem_cache_device);
return (CMD_EVD_BAD);
}
switch (pstype) {
case CMD_PTR_CPU_L2TAG:
case CMD_PTR_CPU_L2DATA:
cache_info.cache = L2_CACHE_TAG;
break;
case CMD_PTR_CPU_L3TAG:
case CMD_PTR_CPU_L3DATA:
cache_info.cache = L3_CACHE_TAG;
break;
}
cache_info.cpu_id = cpu->cpu_cpuid;
cache_info.index = index;
cache_info.datap = tag_data;
cache_info.way = 0;
if (test_mode) {
if (ioctl(fd, MEM_CACHE_READ_ERROR_INJECTED_TAGS, &cache_info)
== -1) {
(void) printf("cpu_id = %d ioctl"
" MEM_CACHE_READ_ERROR_INJECTED_TAGS failed"
" errno = %d\n",
cpu->cpu_cpuid, errno);
(void) close(fd);
return (CMD_EVD_BAD);
}
} else {
if (ioctl(fd, MEM_CACHE_READ_TAGS, &cache_info)
== -1) {
(void) printf("cpu_id = %d ioctl"
" MEM_CACHE_READ_TAGS failed"
" errno = %d\n",
cpu->cpu_cpuid, errno);
(void) close(fd);
return (CMD_EVD_BAD);
}
}
(void) close(fd);
return (CMD_EVD_OK);
}
int
get_index_retired_ways(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype, int32_t index)
{
int i, retired_ways;
uint64_t tag_data[PN_CACHE_NWAYS];
if (get_tagdata(cpu, pstype, index, tag_data) != 0) {
return (-1);
}
retired_ways = 0;
for (i = 0; i < PN_CACHE_NWAYS; i++) {
if ((tag_data[i] & CH_ECSTATE_MASK) ==
PN_ECSTATE_NA)
retired_ways++;
}
return (retired_ways);
}
boolean_t
cmd_cache_way_retire(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
{
const char *fltnm;
cache_info_t cache_info;
int ret, fd;
fltnm = cmd_type_to_str(Lxcache->Lxcache_type);
fd = open(mem_cache_device, O_RDWR);
if (fd == -1) {
fmd_hdl_debug(hdl,
"fltnm:cpu_id %d open of %s failed\n",
fltnm, cpu->cpu_cpuid, mem_cache_device);
return (B_FALSE);
}
cache_info.cpu_id = cpu->cpu_cpuid;
cache_info.way = Lxcache->Lxcache_way;
cache_info.bit = Lxcache->Lxcache_bit;
cache_info.index = Lxcache->Lxcache_index;
switch (Lxcache->Lxcache_type) {
case CMD_PTR_CPU_L2TAG:
cache_info.cache = L2_CACHE_TAG;
break;
case CMD_PTR_CPU_L2DATA:
cache_info.cache = L2_CACHE_DATA;
break;
case CMD_PTR_CPU_L3TAG:
cache_info.cache = L3_CACHE_TAG;
break;
case CMD_PTR_CPU_L3DATA:
cache_info.cache = L3_CACHE_DATA;
break;
}
fmd_hdl_debug(hdl,
"\n%s:cpu %d: Retiring index %d, way %d bit %d\n",
fltnm, cpu->cpu_cpuid, cache_info.index, cache_info.way,
(int16_t)cache_info.bit);
ret = ioctl(fd, MEM_CACHE_RETIRE, &cache_info);
(void) close(fd);
if (ret == -1) {
fmd_hdl_debug(hdl,
"fltnm:cpu_id %d MEM_CACHE_RETIRE ioctl failed\n",
fltnm, cpu->cpu_cpuid);
return (B_FALSE);
}
return (B_TRUE);
}
boolean_t
cmd_cache_way_unretire(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
{
const char *fltnm;
cache_info_t cache_info;
int ret, fd;
fltnm = cmd_type_to_str(Lxcache->Lxcache_type);
fd = open(mem_cache_device, O_RDWR);
if (fd == -1) {
fmd_hdl_debug(hdl,
"fltnm:cpu_id %d open of %s failed\n",
fltnm, cpu->cpu_cpuid, mem_cache_device);
return (B_FALSE);
}
cache_info.cpu_id = cpu->cpu_cpuid;
cache_info.way = Lxcache->Lxcache_way;
cache_info.bit = Lxcache->Lxcache_bit;
cache_info.index = Lxcache->Lxcache_index;
switch (Lxcache->Lxcache_type) {
case CMD_PTR_CPU_L2TAG:
cache_info.cache = L2_CACHE_TAG;
break;
case CMD_PTR_CPU_L2DATA:
cache_info.cache = L2_CACHE_DATA;
break;
case CMD_PTR_CPU_L3TAG:
cache_info.cache = L3_CACHE_TAG;
break;
case CMD_PTR_CPU_L3DATA:
cache_info.cache = L3_CACHE_DATA;
break;
}
fmd_hdl_debug(hdl,
"\n%s:cpu %d: Unretiring index %d, way %d bit %d\n",
fltnm, cpu->cpu_cpuid, cache_info.index, cache_info.way,
(int16_t)cache_info.bit);
ret = ioctl(fd, MEM_CACHE_UNRETIRE, &cache_info);
(void) close(fd);
if (ret == -1) {
fmd_hdl_debug(hdl,
"fltnm:cpu_id %d MEM_CACHE_UNRETIRE ioctl failed\n",
fltnm, cpu->cpu_cpuid);
return (B_FALSE);
}
return (B_TRUE);
}
static cmd_Lxcache_t *
cmd_Lxcache_lookup_by_type_index_way_flags(cmd_cpu_t *cpu,
cmd_ptrsubtype_t type, int32_t index, int8_t way, int32_t flags)
{
cmd_Lxcache_t *cmd_Lxcache;
for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
cmd_Lxcache != NULL;
cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
if ((cmd_Lxcache->Lxcache_index == index) &&
(cmd_Lxcache->Lxcache_way == way) &&
(cmd_Lxcache->Lxcache_type == type) &&
(cmd_Lxcache->Lxcache_flags & flags))
return (cmd_Lxcache);
}
return (NULL);
}
static int8_t
cmd_Lxcache_get_bit_array_of_available_ways(cmd_cpu_t *cpu,
cmd_ptrsubtype_t type, int32_t index)
{
uint8_t bit_array_of_unavailable_ways;
uint8_t bit_array_of_available_ways;
cmd_ptrsubtype_t match_type;
cmd_Lxcache_t *cmd_Lxcache;
uint8_t bit_array_of_retired_ways;
/*
* We scan the Lxcache structures for this CPU and collect
* the following 2 information.
* - bit_array_of_retired_ways
* - bit_array_of_unavailable_ways
* If type is Lx_TAG then unavailable_ways will not include ways that
* were retired due to DATA faults, because these ways can still be
* re-retired for TAG faults.
* If 3 ways have been retired then we protect the only remaining
* unretired way by marking it as unavailable.
*/
bit_array_of_unavailable_ways = 0;
bit_array_of_retired_ways = 0;
switch (type) {
case CMD_PTR_CPU_L2TAG:
match_type = CMD_PTR_CPU_L2DATA;
break;
case CMD_PTR_CPU_L2DATA:
match_type = CMD_PTR_CPU_L2TAG;
break;
case CMD_PTR_CPU_L3TAG:
match_type = CMD_PTR_CPU_L3DATA;
break;
case CMD_PTR_CPU_L3DATA:
match_type = CMD_PTR_CPU_L3TAG;
break;
}
for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
cmd_Lxcache != NULL;
cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
if ((cmd_Lxcache->Lxcache_index == index) &&
((cmd_Lxcache->Lxcache_type == type) ||
(cmd_Lxcache->Lxcache_type == match_type)) &&
(cmd_Lxcache->Lxcache_flags &
(CMD_LxCACHE_F_RETIRED | CMD_LxCACHE_F_RERETIRED))) {
bit_array_of_retired_ways |=
(1 << cmd_Lxcache->Lxcache_way);
/*
* If we are calling this while handling TAG errors
* we can reretire the cachelines retired due to DATA
* errors. We will ignore the cachelnes that are
* retired due to DATA faults.
*/
if ((type == CMD_PTR_CPU_L2TAG) &&
(cmd_Lxcache->Lxcache_type == CMD_PTR_CPU_L2DATA))
continue;
if ((type == CMD_PTR_CPU_L3TAG) &&
(cmd_Lxcache->Lxcache_type == CMD_PTR_CPU_L3DATA))
continue;
bit_array_of_unavailable_ways |=
(1 << cmd_Lxcache->Lxcache_way);
}
}
if (cmd_num_of_bits[bit_array_of_retired_ways & 0xf] == 3) {
/*
* special case: 3 ways are already retired.
* The Lone unretired way is set as 1, rest are set as 0.
* We now OR this with bit_array_of_unavailable_ways
* so that this unretired way will not be allocated.
*/
bit_array_of_retired_ways ^= 0xf;
bit_array_of_retired_ways &= 0xf;
bit_array_of_unavailable_ways |= bit_array_of_retired_ways;
}
bit_array_of_available_ways =
((bit_array_of_unavailable_ways ^ 0xf) & 0xf);
return (bit_array_of_available_ways);
}
/*
* Look for a way next to the specified way that is
* not in a retired state.
* We stop when way 3 is reached.
*/
int8_t
cmd_Lxcache_get_next_retirable_way(cmd_cpu_t *cpu,
int32_t index, cmd_ptrsubtype_t pstype, int8_t specified_way)
{
uint8_t bit_array_of_ways;
int8_t mask;
if (specified_way == 3)
return (-1);
bit_array_of_ways = cmd_Lxcache_get_bit_array_of_available_ways(
cpu,
pstype, index);
if (specified_way == 2)
mask = 0x8;
else if (specified_way == 1)
mask = 0xc;
else
mask = 0xe;
return (cmd_lowest_way[bit_array_of_ways & mask]);
}
int8_t
cmd_Lxcache_get_lowest_retirable_way(cmd_cpu_t *cpu,
int32_t index, cmd_ptrsubtype_t pstype)
{
uint8_t bit_array_of_ways;
bit_array_of_ways = cmd_Lxcache_get_bit_array_of_available_ways(
cpu,
pstype, index);
return (cmd_lowest_way[bit_array_of_ways]);
}
cmd_Lxcache_t *
cmd_Lxcache_lookup_by_type_index_way_reason(cmd_cpu_t *cpu,
cmd_ptrsubtype_t pstype, int32_t index, int8_t way, int32_t reason)
{
cmd_Lxcache_t *cmd_Lxcache;
for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
cmd_Lxcache != NULL;
cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
if ((cmd_Lxcache->Lxcache_index == (uint32_t)index) &&
(cmd_Lxcache->Lxcache_way == (uint32_t)way) &&
(cmd_Lxcache->Lxcache_reason & reason) &&
(cmd_Lxcache->Lxcache_type == pstype)) {
return (cmd_Lxcache);
}
}
return (NULL);
}
cmd_Lxcache_t *
cmd_Lxcache_lookup_by_type_index_bit_reason(cmd_cpu_t *cpu,
cmd_ptrsubtype_t pstype, int32_t index, int16_t bit, int32_t reason)
{
cmd_Lxcache_t *cmd_Lxcache;
for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
cmd_Lxcache != NULL;
cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
if ((cmd_Lxcache->Lxcache_index == (uint32_t)index) &&
(cmd_Lxcache->Lxcache_bit == (uint16_t)bit) &&
(cmd_Lxcache->Lxcache_reason & reason) &&
(cmd_Lxcache->Lxcache_type == pstype)) {
return (cmd_Lxcache);
}
}
return (NULL);
}
void
cmd_Lxcache_destroy_anonymous_serd_engines(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
cmd_ptrsubtype_t type, int32_t index, int16_t bit)
{
cmd_Lxcache_t *cmd_Lxcache;
cmd_case_t *cc;
for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
cmd_Lxcache != NULL;
cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
if ((cmd_Lxcache->Lxcache_type == type) &&
(cmd_Lxcache->Lxcache_index == (uint32_t)index) &&
(cmd_Lxcache->Lxcache_bit == (uint16_t)bit) &&
(cmd_Lxcache->Lxcache_way == (uint32_t)CMD_ANON_WAY)) {
cc = &cmd_Lxcache->Lxcache_case;
if (cc == NULL)
continue;
if (cc->cc_serdnm != NULL) {
if (fmd_serd_exists(hdl, cc->cc_serdnm)) {
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d destroying SERD"
" engine %s\n",
cmd_type_to_str(type),
cpu->cpu_cpuid, cc->cc_serdnm);
fmd_serd_destroy(hdl, cc->cc_serdnm);
}
fmd_hdl_strfree(hdl, cc->cc_serdnm);
cc->cc_serdnm = NULL;
}
}
}
}
ssize_t
cmd_fmri_nvl2str(fmd_hdl_t *hdl, nvlist_t *nvl, char *buf, size_t buflen)
{
uint8_t type;
uint32_t cpuid, way;
uint32_t index;
uint16_t bit;
char *serstr = NULL;
char missing_list[128];
missing_list[0] = 0;
if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, &cpuid) != 0)
(void) strcat(missing_list, FM_FMRI_CPU_ID);
if (nvlist_lookup_string(nvl, FM_FMRI_CPU_SERIAL_ID, &serstr) != 0)
(void) strcat(missing_list, FM_FMRI_CPU_SERIAL_ID);
if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_CACHE_INDEX, &index) != 0)
(void) strcat(missing_list, FM_FMRI_CPU_CACHE_INDEX);
if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_CACHE_WAY, &way) != 0)
(void) strcat(missing_list, FM_FMRI_CPU_CACHE_WAY);
if (nvlist_lookup_uint16(nvl, FM_FMRI_CPU_CACHE_BIT, &bit) != 0)
(void) strcat(missing_list, FM_FMRI_CPU_CACHE_BIT);
if (nvlist_lookup_uint8(nvl, FM_FMRI_CPU_CACHE_TYPE, &type) != 0)
(void) strcat(missing_list, FM_FMRI_CPU_CACHE_TYPE);
if (strlen(missing_list) != 0) {
fmd_hdl_debug(hdl,
"\ncmd_fmri_nvl2str: missing %s in fmri\n",
missing_list);
return (-1);
}
return (snprintf(buf, buflen,
"cpu:///%s=%u/%s=%s/%s=%u/%s=%u/%s=%d/%s=%d",
FM_FMRI_CPU_ID, cpuid,
FM_FMRI_CPU_SERIAL_ID, serstr,
FM_FMRI_CPU_CACHE_INDEX, index,
FM_FMRI_CPU_CACHE_WAY, way,
FM_FMRI_CPU_CACHE_BIT, bit,
FM_FMRI_CPU_CACHE_TYPE, type));
}
boolean_t
cmd_create_case_for_Lxcache(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
cmd_Lxcache_t *cmd_Lxcache)
{
const char *fltnm;
const char *uuid;
if (cmd_Lxcache->Lxcache_case.cc_cp != NULL)
return (B_TRUE);
cmd_Lxcache->Lxcache_case.cc_cp = cmd_case_create(hdl,
&cmd_Lxcache->Lxcache_header, CMD_PTR_LxCACHE_CASE,
&uuid);
fltnm = cmd_type_to_str(cmd_Lxcache->Lxcache_type);
if (cmd_Lxcache->Lxcache_case.cc_cp == NULL) {
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d:Failed to create a case for"
" index %d way %d bit %d\n",
fltnm, cpu->cpu_cpuid,
cmd_Lxcache->Lxcache_index,
cmd_Lxcache->Lxcache_way, cmd_Lxcache->Lxcache_bit);
return (B_FALSE);
}
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d: New case %s created.\n",
fltnm, cpu->cpu_cpuid, uuid);
if (cmd_Lxcache->Lxcache_ep)
fmd_case_add_ereport(hdl, cmd_Lxcache->Lxcache_case.cc_cp,
cmd_Lxcache->Lxcache_ep);
return (B_TRUE);
}
static int
cmd_repair_fmri(fmd_hdl_t *hdl, char *buf)
{
int err;
err = fmd_repair_asru(hdl, buf);
if (err) {
fmd_hdl_debug(hdl,
"Failed to repair %s err = %d\n", buf, err);
}
return (err);
}
boolean_t
cmd_Lxcache_unretire(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
cmd_Lxcache_t *unretire_this_Lxcache, const char *fltnm)
{
cmd_ptrsubtype_t data_type;
cmd_Lxcache_t *previously_retired_Lxcache;
int found_reretired_cacheline = 0;
int certainty;
/*
* If we are unretiring a cacheline retired due to suspected TAG
* fault, then we must first check if we are using a cacheline
* that was retired earlier for DATA fault.
* If so we will not unretire the cacheline.
* We will change the flags to reflect the current condition.
* We will return success, though.
*/
if (IS_TAG(unretire_this_Lxcache->Lxcache_type)) {
if (unretire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L2TAG)
data_type = CMD_PTR_CPU_L2DATA;
if (unretire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L3TAG)
data_type = CMD_PTR_CPU_L3DATA;
fmd_hdl_debug(hdl,
"\n%s:cpuid %d checking if there is a %s"
" cacheline re-retired at this index %d and way %d\n",
fltnm, cpu->cpu_cpuid, cmd_type_to_str(data_type),
unretire_this_Lxcache->Lxcache_index,
unretire_this_Lxcache->Lxcache_way);
previously_retired_Lxcache =
cmd_Lxcache_lookup_by_type_index_way_flags(
cpu, data_type, unretire_this_Lxcache->Lxcache_index,
unretire_this_Lxcache->Lxcache_way,
CMD_LxCACHE_F_RERETIRED);
if (previously_retired_Lxcache) {
fmd_hdl_debug(hdl,
"\n%s:cpuid %d Found a %s cacheline re-retired at"
" this index %d and way %d. Will mark this"
" RETIRED\n",
fltnm, cpu->cpu_cpuid, cmd_type_to_str(data_type),
unretire_this_Lxcache->Lxcache_index,
unretire_this_Lxcache->Lxcache_way);
/*
* We call the cmd_Lxcache_fault to inform fmd
* about the suspect fmri. The cacheline is already
* retired but the existing suspect fmri is for TAG
* fault which will be removed in this routine.
*/
if (previously_retired_Lxcache->Lxcache_reason
== CMD_LXCONVICTED)
certainty = HUNDRED_PERCENT;
else
certainty = SUSPECT_PERCENT;
cmd_Lxcache_fault(hdl, cpu, previously_retired_Lxcache,
fltnm, cpu->cpu_fru_nvl, certainty);
previously_retired_Lxcache->Lxcache_flags =
CMD_LxCACHE_F_RETIRED;
/*
* Update persistent storage
*/
cmd_Lxcache_write(hdl, previously_retired_Lxcache);
found_reretired_cacheline = 1;
}
} else {
/*
* We have been called to unretire a cacheline retired
* earlier due to DATA errors.
* If this cacheline is marked RERETIRED then it means that
* the cacheline has been retired due to TAG errors and
* we should not be unretiring the cacheline.
*/
if (unretire_this_Lxcache->Lxcache_flags &
CMD_LxCACHE_F_RERETIRED) {
fmd_hdl_debug(hdl,
"\n%s:cpuid %d The cacheline at index %d and"
" way %d which we are attempting to unretire"
" is in RERETIRED state. Therefore we will not"
" unretire it but will mark it as RETIRED.\n",
fltnm, cpu->cpu_cpuid,
unretire_this_Lxcache->Lxcache_index,
unretire_this_Lxcache->Lxcache_way);
found_reretired_cacheline = 1;
}
}
/*
* if we did not find a RERETIRED cacheline above
* unretire the cacheline.
*/
if (!found_reretired_cacheline) {
if (cmd_cache_way_unretire(hdl, cpu, unretire_this_Lxcache)
== B_FALSE)
return (B_FALSE);
}
unretire_this_Lxcache->Lxcache_flags = CMD_LxCACHE_F_UNRETIRED;
/*
* We have exonerated the cacheline. We need to inform the fmd
* that we have repaired the suspect fmri that we retired earlier.
* The cpumem agent will not unretire cacheline in response to
* the list.repair events it receives.
*/
if (unretire_this_Lxcache->Lxcache_retired_fmri[0] != 0) {
fmd_hdl_debug(hdl,
"\n%s:cpuid %d Repairing the retired fmri %s",
fltnm, cpu->cpu_cpuid,
unretire_this_Lxcache->Lxcache_retired_fmri);
if (cmd_repair_fmri(hdl,
unretire_this_Lxcache->Lxcache_retired_fmri) != 0) {
fmd_hdl_debug(hdl,
"\n%s:cpuid %d Failed to repair retired fmri.",
fltnm, cpu->cpu_cpuid);
/*
* We need to retire the cacheline that we just
* unretired.
*/
if (cmd_cache_way_retire(hdl, cpu,
unretire_this_Lxcache) == B_FALSE) {
/*
* A hopeless situation.
* cannot maintain consistency of cacheline
* sate between fmd and DE.
* Aborting the DE.
*/
fmd_hdl_abort(hdl,
"\n%s:cpuid %d We are unable to repair"
" the fmri we just unretired and are"
" unable to restore the DE and fmd to"
" a sane state.\n",
fltnm, cpu->cpu_cpuid);
}
return (B_FALSE);
} else {
unretire_this_Lxcache->Lxcache_retired_fmri[0] = 0;
}
}
return (B_TRUE);
}
boolean_t
cmd_Lxcache_retire(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
cmd_Lxcache_t *retire_this_Lxcache, const char *fltnm, uint_t cert)
{
cmd_Lxcache_t *previously_retired_Lxcache;
cmd_ptrsubtype_t data_type;
const char *uuid;
char suspect_list[128];
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d: cmd_Lxcache_retire called for index %d"
" way %d bit %d\n",
fltnm, cpu->cpu_cpuid, retire_this_Lxcache->Lxcache_index,
retire_this_Lxcache->Lxcache_way, retire_this_Lxcache->Lxcache_bit);
if (fmd_case_solved(hdl, retire_this_Lxcache->Lxcache_case.cc_cp)) {
/*
* Case solved implies that the cache line is already
* retired as SUSPECT_0_TAG and we are here to retire this
* as SUSPECT_1_TAG.
* We will first repair the retired cacheline
* so that it does not get retired during replay for
* wrong reason.
* If we are able to repair the retired cacheline we close the
* case and open a new case for it.
*/
if (retire_this_Lxcache->Lxcache_reason !=
CMD_LXSUSPECT_0_TAG) {
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d: Unexpected condition encountered."
" Expected the reason for retirement as"
" SUSPECT_0_TAG however found the reason"
" to be %s\n",
fltnm, cpu->cpu_cpuid,
cmd_reason_to_str(
retire_this_Lxcache->Lxcache_reason));
return (B_FALSE);
}
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d: We are re-retiring SUSPECT_0_TAG as"
" SUSPECT_1_TAG index %d way %d bit %d\n",
fltnm, cpu->cpu_cpuid,
retire_this_Lxcache->Lxcache_index,
retire_this_Lxcache->Lxcache_way,
retire_this_Lxcache->Lxcache_bit);
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d: The existing case for this Lxcache has"
" has been already solved. We will first repair the suspect"
" cacheline and if we are successful then close this case,"
" and open a new case.\n",
fltnm, cpu->cpu_cpuid);
/*
* repair the retired cacheline.
*/
if (retire_this_Lxcache->Lxcache_retired_fmri[0] != 0) {
fmd_hdl_debug(hdl,
"\n%s:cpuid %d Repairing the retired suspect"
" cacheline %s\n",
fltnm, cpu->cpu_cpuid,
retire_this_Lxcache->Lxcache_retired_fmri);
if (cmd_repair_fmri(hdl,
retire_this_Lxcache->Lxcache_retired_fmri) != 0) {
fmd_hdl_debug(hdl,
"\n%s:cpuid %d Failed to repair the"
" retired fmri.",
fltnm, cpu->cpu_cpuid);
return (B_FALSE);
} else {
retire_this_Lxcache->Lxcache_retired_fmri[0] =
0;
}
}
uuid = fmd_case_uuid(hdl,
retire_this_Lxcache->Lxcache_case.cc_cp);
fmd_hdl_debug(hdl,
"\n%s:cpuid %d: Closing the case %s\n",
fltnm, cpu->cpu_cpuid, uuid);
cmd_case_fini(hdl, retire_this_Lxcache->Lxcache_case.cc_cp,
FMD_B_TRUE);
retire_this_Lxcache->Lxcache_case.cc_cp = NULL;
if (cmd_create_case_for_Lxcache(hdl, cpu, retire_this_Lxcache)
== B_FALSE)
return (B_FALSE);
} else {
/*
* Not a SUSPECT_0_TAG.
* We should be entering this path if the cacheline is
* transitioning from ACTIVE/UNRETIRED to RETIRED state.
* If the cacheline state is not as expected we print debug
* message and return failure.
*/
if ((retire_this_Lxcache->Lxcache_flags !=
CMD_LxCACHE_F_ACTIVE) &&
(retire_this_Lxcache->Lxcache_flags
!= CMD_LxCACHE_F_UNRETIRED)) {
/*
* Unexpected condition.
*/
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d:Unexpected state %s for the"
" cacheline at index %d way %d encountered.\n",
fltnm, cpu->cpu_cpuid,
cmd_flags_to_str(
retire_this_Lxcache->Lxcache_flags),
retire_this_Lxcache->Lxcache_index,
retire_this_Lxcache->Lxcache_way);
return (B_FALSE);
}
}
suspect_list[0] = 0;
(void) cmd_fmri_nvl2str(hdl, retire_this_Lxcache->Lxcache_asru.fmri_nvl,
suspect_list, sizeof (suspect_list));
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d:current suspect list is %s\n",
fltnm, cpu->cpu_cpuid, suspect_list);
cmd_Lxcache_fault(hdl, cpu, retire_this_Lxcache, fltnm,
cpu->cpu_fru_nvl,
cert);
retire_this_Lxcache->Lxcache_flags = CMD_LxCACHE_F_RETIRED;
if (IS_TAG(retire_this_Lxcache->Lxcache_type)) {
/*
* If the cacheline we just retired was retired earlier
* due to DATA faults we mark the Lxcache
* corresponding to DATA as RERETIRED.
*/
if (retire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L2TAG)
data_type = CMD_PTR_CPU_L2DATA;
if (retire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L3TAG)
data_type = CMD_PTR_CPU_L3DATA;
fmd_hdl_debug(hdl,
"\n%s:cpuid %d checking if there is a %s"
" cacheline retired at this index %d way %d\n",
fltnm, cpu->cpu_cpuid,
cmd_type_to_str(data_type),
retire_this_Lxcache->Lxcache_index,
retire_this_Lxcache->Lxcache_way);
previously_retired_Lxcache =
cmd_Lxcache_lookup_by_type_index_way_flags(cpu,
data_type, retire_this_Lxcache->Lxcache_index,
retire_this_Lxcache->Lxcache_way, CMD_LxCACHE_F_RETIRED);
if (previously_retired_Lxcache) {
fmd_hdl_debug(hdl,
"\n%s:cpu_id %d: Found index %d way %d"
" retired earlier. Will mark this Lxcache"
" as RERETIRED.\n",
fltnm, cpu->cpu_cpuid,
retire_this_Lxcache->Lxcache_index,
retire_this_Lxcache->Lxcache_way);
/*
* First repair the retired cacheline and if successful
* close the existing case and create a new case.
*/
/*
* This cacheline has already been retired for
* TAG fault.
* Repair the previously retired DATA fault cacheline so
* that it does not get retired by fmd during replay.
*/
if (previously_retired_Lxcache->Lxcache_retired_fmri[0]
!= 0) {
fmd_hdl_debug(hdl,
"\n%s:cpuid %d Repairing the cacheline"
" retired due to data errors. %s\n",
fltnm, cpu->cpu_cpuid,
previously_retired_Lxcache->
Lxcache_retired_fmri);
if (cmd_repair_fmri(hdl,
previously_retired_Lxcache->
Lxcache_retired_fmri)
!= 0) {
fmd_hdl_debug(hdl,
"\n%s:cpuid %d Failed to repair the"
" retired fmri.",
fltnm, cpu->cpu_cpuid);
return (B_FALSE);
} else {
previously_retired_Lxcache->
Lxcache_retired_fmri[0] = 0;
}
}
cmd_case_fini(hdl,
previously_retired_Lxcache->Lxcache_case.cc_cp,
FMD_B_TRUE);
previously_retired_Lxcache->Lxcache_case.cc_cp = NULL;
previously_retired_Lxcache->Lxcache_flags =
CMD_LxCACHE_F_RERETIRED;
/*
* Update persistent storage
*/
cmd_Lxcache_write(hdl, previously_retired_Lxcache);
/*
* Create a new case so that this Lxcache structure
* gets restored on replay.
*/
if (cmd_create_case_for_Lxcache(hdl, cpu,
previously_retired_Lxcache) == B_FALSE)
return (B_FALSE);
}
}
cmd_retire_cpu_if_limits_exceeded(hdl, cpu,
retire_this_Lxcache->Lxcache_type,
fltnm);
return (B_TRUE);
}