/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Support routines for managing per-Lxcache state.
*/
#include <cmd_Lxcache.h>
#include <cmd_mem.h>
#include <cmd_cpu.h>
#include <cmd.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <strings.h>
#include <sys/cheetahregs.h>
#include <sys/mem_cache.h>
/*
* These values are our threshold values for SERDing CPU's based on the
* the # of times we have retired a cache line for each category.
*/
/* 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf */
-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
/* 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf */
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
void
{
sizeof (cmd_Lxcache_pers_t));
}
const char *
{
switch (pstype) {
case CMD_PTR_CPU_L2DATA:
return ("l2data");
break;
case CMD_PTR_CPU_L3DATA:
return ("l3data");
break;
case CMD_PTR_CPU_L2TAG:
return ("l2tag");
break;
case CMD_PTR_CPU_L3TAG:
return ("l3tag");
break;
default:
return ("unknown");
break;
}
}
const char *
{
switch (flags) {
case CMD_LxCACHE_F_ACTIVE:
return ("ACTIVE");
case CMD_LxCACHE_F_FAULTING:
return ("FAULTING");
case CMD_LxCACHE_F_RETIRED:
return ("RETIRED");
case CMD_LxCACHE_F_UNRETIRED:
return ("UNRETIRED");
case CMD_LxCACHE_F_RERETIRED:
return ("RERETIRED");
default:
return ("Unknown_flags");
}
}
const char *
{
switch (reason) {
case CMD_LXSUSPECT_DATA:
return ("SUSPECT_DATA");
case CMD_LXSUSPECT_0_TAG:
return ("SUSPECT_0_TAG");
case CMD_LXSUSPECT_1_TAG:
return ("SUSPECT_1_TAG");
case CMD_LXCONVICTED:
return ("CONVICTED");
case CMD_LXFUNCTIONING:
return ("FUNCTIONING");
default:
return ("Unknown_reason");
}
}
static void
{
"\n"
" cpu = %s\n"
" type = %s\n"
" index = %d\n"
" way = %d\n"
" bit = %d\n"
" reason = %s\n"
" flags = %s\n",
}
void
int destroy)
{
}
}
if (Lxcache->Lxcache_nvl) {
}
/*
* Clean up the SERD engine created to handle recheck of TAGS.
* This SERD engine was created to save the event pointer.
*/
}
}
Lxcache->Lxcache_retry_count = 0;
if (destroy)
}
void
{
}
{
return (Lxcache);
}
return (NULL);
}
{
const char *pstype_name;
"\n%s:cpu_id %d:Creating new Lxcache for index=%d way=%d bit=%d\n",
Lxcache->Lxcache_retry_count = 0;
Lxcache->Lxcache_clcode = 0;
switch (pstype) {
case CMD_PTR_CPU_L2DATA:
break;
case CMD_PTR_CPU_L3DATA:
break;
case CMD_PTR_CPU_L2TAG:
break;
case CMD_PTR_CPU_L3TAG:
break;
default:
break;
}
"\n%s:cpu_id %d: new Lxcache name is %s\n",
index)) != 0 ||
bit)) != 0 ||
fmri_Lxcache_type)) != 0 ||
return (Lxcache);
}
{
return (cache);
}
}
return (NULL);
}
static cmd_Lxcache_t *
{
if (psz != sizeof (cmd_Lxcache_pers_t)) {
"version 1 state (%u bytes).\n",
sizeof (cmd_Lxcache_pers_t));
}
return (Lxcache);
}
void *
{
char *serdnm;
/*
* We need to first extract the cpu name by reading directly
* from fmd buffers in order to begin our search for Lxcache in
* the appropriate cpu list.
* After we identify the cpu list using buf name we look
* in cpu list for our Lxcache states.
*/
"not exist in saved state\n",
} else if (Lxcachesz != sizeof (cmd_Lxcache_pers_t)) {
"is %d bytes. Expected size is %d bytes\n",
sizeof (cmd_Lxcache_pers_t));
}
}
"\nCould not restore cpu %s\n",
return (NULL);
}
break;
}
switch (recovered_Lxcache->Lxcache_version) {
case CMD_LxCACHE_VERSION_1:
break;
default:
"for Lxcache state referenced by case %s.\n",
break;
}
/*
* We need to cleanup the information associated with
* the timeout routine because these are not checkpointed
* and cannot be retored.
*/
Lxcache->Lxcache_retry_count = 0;
}
"cpu_id %d: serdname for the case is %s\n",
"cpu_id %d: restoring the case for index %d way %d bit %d\n",
return (Lxcache);
}
/*ARGSUSED*/
void
{
}
}
}
void
{
sizeof (cmd_Lxcache_pers_t))
/* No need to rewrite the FMRIs in the Lxcache - they don't change */
sizeof (cmd_Lxcache_pers_t));
}
void
{
}
char *
{
const char *serdbase;
char *nm;
return (nm);
}
char *
{
const char *serdbase;
char *nm;
return (nm);
}
/*
* Count the number of SERD type 2 ways retired for a given cpu
* These are defined to be L3 Cache data retirements
*/
{
ret_count++;
}
}
return (ret_count);
}
/*
* Count the number of SERD type 1 ways retired for a given cpu
* These are defined to be L2 Data, tag and L3 Tag retirements
*/
{
ret_count++;
}
}
return (ret_count);
}
void
const char *fltnm)
{
const char *uuid;
&uuid);
"\n%s:cpu_id %d Created case %s to retire CPU\n",
}
void
{
/* Retrieve the number of retired ways for each category */
"\n%s:CPU %d retired Type 1 way count is: %d\n",
if (((cpu_retired_1 > CMD_CPU_SERD_AGG_1) ||
(cpu_retired_2 > CMD_CPU_SERD_AGG_2)) &&
}
}
void
{
"\n%s:cpu_id %d: fltmsg = %s\n",
return;
}
"\n%s:cpu_id %d: adding suspect list to case %s\n",
if (Lxcache->Lxcache_retired_fmri[0] == 0) {
"\n%s:cpu_id %d: Failed to save the"
" retired fmri string\n",
else
"\n%s:cpu_id %d:Saved the retired fmri string %s\n",
}
}
void
{
Lxcache->Lxcache_cpu_bufname) == 0)
break;
}
/*
* We will destroy the case and serd engine.
* The rest will be destroyed when we retire the CPU
* until then we keep the Lxcache strutures alive.
*/
}
}
}
{
cmd_Lxcache != NULL;
return (cmd_Lxcache);
}
}
return (NULL);
}
void
{
}
{
int fd;
if (fd == -1) {
(void) printf(
"cpu_id = %d could not open %s to read tag info.\n",
return (CMD_EVD_BAD);
}
switch (pstype) {
case CMD_PTR_CPU_L2TAG:
case CMD_PTR_CPU_L2DATA:
break;
case CMD_PTR_CPU_L3TAG:
case CMD_PTR_CPU_L3DATA:
break;
}
cache_info.way = 0;
if (test_mode) {
== -1) {
(void) printf("cpu_id = %d ioctl"
" MEM_CACHE_READ_ERROR_INJECTED_TAGS failed"
" errno = %d\n",
return (CMD_EVD_BAD);
}
} else {
== -1) {
(void) printf("cpu_id = %d ioctl"
" MEM_CACHE_READ_TAGS failed"
" errno = %d\n",
return (CMD_EVD_BAD);
}
}
return (CMD_EVD_OK);
}
int
{
int i, retired_ways;
return (-1);
}
retired_ways = 0;
for (i = 0; i < PN_CACHE_NWAYS; i++) {
if ((tag_data[i] & CH_ECSTATE_MASK) ==
retired_ways++;
}
return (retired_ways);
}
{
const char *fltnm;
if (fd == -1) {
"fltnm:cpu_id %d open of %s failed\n",
return (B_FALSE);
}
switch (Lxcache->Lxcache_type) {
case CMD_PTR_CPU_L2TAG:
break;
case CMD_PTR_CPU_L2DATA:
break;
case CMD_PTR_CPU_L3TAG:
break;
case CMD_PTR_CPU_L3DATA:
break;
}
"\n%s:cpu %d: Retiring index %d, way %d bit %d\n",
if (ret == -1) {
"fltnm:cpu_id %d MEM_CACHE_RETIRE ioctl failed\n",
return (B_FALSE);
}
return (B_TRUE);
}
{
const char *fltnm;
if (fd == -1) {
"fltnm:cpu_id %d open of %s failed\n",
return (B_FALSE);
}
switch (Lxcache->Lxcache_type) {
case CMD_PTR_CPU_L2TAG:
break;
case CMD_PTR_CPU_L2DATA:
break;
case CMD_PTR_CPU_L3TAG:
break;
case CMD_PTR_CPU_L3DATA:
break;
}
"\n%s:cpu %d: Unretiring index %d, way %d bit %d\n",
if (ret == -1) {
"fltnm:cpu_id %d MEM_CACHE_UNRETIRE ioctl failed\n",
return (B_FALSE);
}
return (B_TRUE);
}
static cmd_Lxcache_t *
{
cmd_Lxcache != NULL;
return (cmd_Lxcache);
}
return (NULL);
}
static int8_t
{
/*
* We scan the Lxcache structures for this CPU and collect
* the following 2 information.
* - bit_array_of_retired_ways
* - bit_array_of_unavailable_ways
* If type is Lx_TAG then unavailable_ways will not include ways that
* were retired due to DATA faults, because these ways can still be
* re-retired for TAG faults.
* If 3 ways have been retired then we protect the only remaining
* unretired way by marking it as unavailable.
*/
switch (type) {
case CMD_PTR_CPU_L2TAG:
break;
case CMD_PTR_CPU_L2DATA:
break;
case CMD_PTR_CPU_L3TAG:
break;
case CMD_PTR_CPU_L3DATA:
break;
}
cmd_Lxcache != NULL;
/*
* If we are calling this while handling TAG errors
* we can reretire the cachelines retired due to DATA
* errors. We will ignore the cachelnes that are
* retired due to DATA faults.
*/
if ((type == CMD_PTR_CPU_L2TAG) &&
continue;
if ((type == CMD_PTR_CPU_L3TAG) &&
continue;
}
}
/*
* special case: 3 ways are already retired.
* The Lone unretired way is set as 1, rest are set as 0.
* We now OR this with bit_array_of_unavailable_ways
* so that this unretired way will not be allocated.
*/
bit_array_of_retired_ways ^= 0xf;
bit_array_of_retired_ways &= 0xf;
}
return (bit_array_of_available_ways);
}
/*
* Look for a way next to the specified way that is
* not in a retired state.
* We stop when way 3 is reached.
*/
{
if (specified_way == 3)
return (-1);
cpu,
if (specified_way == 2)
mask = 0x8;
else if (specified_way == 1)
mask = 0xc;
else
mask = 0xe;
}
{
cpu,
return (cmd_lowest_way[bit_array_of_ways]);
}
{
cmd_Lxcache != NULL;
return (cmd_Lxcache);
}
}
return (NULL);
}
{
cmd_Lxcache != NULL;
return (cmd_Lxcache);
}
}
return (NULL);
}
void
{
cmd_Lxcache != NULL;
continue;
"\n%s:cpu_id %d destroying SERD"
" engine %s\n",
}
}
}
}
}
{
missing_list[0] = 0;
if (strlen(missing_list) != 0) {
"\ncmd_fmri_nvl2str: missing %s in fmri\n",
return (-1);
}
"cpu:///%s=%u/%s=%s/%s=%u/%s=%u/%s=%d/%s=%d",
}
{
const char *fltnm;
const char *uuid;
return (B_TRUE);
&uuid);
"\n%s:cpu_id %d:Failed to create a case for"
" index %d way %d bit %d\n",
return (B_FALSE);
}
"\n%s:cpu_id %d: New case %s created.\n",
if (cmd_Lxcache->Lxcache_ep)
return (B_TRUE);
}
static int
{
int err;
if (err) {
}
return (err);
}
{
int found_reretired_cacheline = 0;
int certainty;
/*
* If we are unretiring a cacheline retired due to suspected TAG
* fault, then we must first check if we are using a cacheline
* that was retired earlier for DATA fault.
* If so we will not unretire the cacheline.
* We will change the flags to reflect the current condition.
* We will return success, though.
*/
"\n%s:cpuid %d checking if there is a %s"
" cacheline re-retired at this index %d and way %d\n",
if (previously_retired_Lxcache) {
"\n%s:cpuid %d Found a %s cacheline re-retired at"
" this index %d and way %d. Will mark this"
" RETIRED\n",
/*
* We call the cmd_Lxcache_fault to inform fmd
* about the suspect fmri. The cacheline is already
* retired but the existing suspect fmri is for TAG
* fault which will be removed in this routine.
*/
== CMD_LXCONVICTED)
else
/*
* Update persistent storage
*/
}
} else {
/*
* We have been called to unretire a cacheline retired
* earlier due to DATA errors.
* If this cacheline is marked RERETIRED then it means that
* the cacheline has been retired due to TAG errors and
* we should not be unretiring the cacheline.
*/
"\n%s:cpuid %d The cacheline at index %d and"
" way %d which we are attempting to unretire"
" is in RERETIRED state. Therefore we will not"
" unretire it but will mark it as RETIRED.\n",
}
}
/*
* if we did not find a RERETIRED cacheline above
* unretire the cacheline.
*/
if (!found_reretired_cacheline) {
== B_FALSE)
return (B_FALSE);
}
/*
* We have exonerated the cacheline. We need to inform the fmd
* that we have repaired the suspect fmri that we retired earlier.
* The cpumem agent will not unretire cacheline in response to
* the list.repair events it receives.
*/
if (unretire_this_Lxcache->Lxcache_retired_fmri[0] != 0) {
"\n%s:cpuid %d Repairing the retired fmri %s",
if (cmd_repair_fmri(hdl,
unretire_this_Lxcache->Lxcache_retired_fmri) != 0) {
"\n%s:cpuid %d Failed to repair retired fmri.",
/*
* We need to retire the cacheline that we just
* unretired.
*/
unretire_this_Lxcache) == B_FALSE) {
/*
* A hopeless situation.
* cannot maintain consistency of cacheline
* sate between fmd and DE.
* Aborting the DE.
*/
"\n%s:cpuid %d We are unable to repair"
" the fmri we just unretired and are"
" unable to restore the DE and fmd to"
" a sane state.\n",
}
return (B_FALSE);
} else {
}
}
return (B_TRUE);
}
{
const char *uuid;
"\n%s:cpu_id %d: cmd_Lxcache_retire called for index %d"
" way %d bit %d\n",
/*
* Case solved implies that the cache line is already
* retired as SUSPECT_0_TAG and we are here to retire this
* as SUSPECT_1_TAG.
* We will first repair the retired cacheline
* so that it does not get retired during replay for
* wrong reason.
* If we are able to repair the retired cacheline we close the
* case and open a new case for it.
*/
if (retire_this_Lxcache->Lxcache_reason !=
"\n%s:cpu_id %d: Unexpected condition encountered."
" Expected the reason for retirement as"
" SUSPECT_0_TAG however found the reason"
" to be %s\n",
return (B_FALSE);
}
"\n%s:cpu_id %d: We are re-retiring SUSPECT_0_TAG as"
" SUSPECT_1_TAG index %d way %d bit %d\n",
"\n%s:cpu_id %d: The existing case for this Lxcache has"
" has been already solved. We will first repair the suspect"
" cacheline and if we are successful then close this case,"
" and open a new case.\n",
/*
* repair the retired cacheline.
*/
if (retire_this_Lxcache->Lxcache_retired_fmri[0] != 0) {
"\n%s:cpuid %d Repairing the retired suspect"
" cacheline %s\n",
if (cmd_repair_fmri(hdl,
retire_this_Lxcache->Lxcache_retired_fmri) != 0) {
"\n%s:cpuid %d Failed to repair the"
" retired fmri.",
return (B_FALSE);
} else {
0;
}
}
"\n%s:cpuid %d: Closing the case %s\n",
== B_FALSE)
return (B_FALSE);
} else {
/*
* Not a SUSPECT_0_TAG.
* We should be entering this path if the cacheline is
* If the cacheline state is not as expected we print debug
* message and return failure.
*/
if ((retire_this_Lxcache->Lxcache_flags !=
!= CMD_LxCACHE_F_UNRETIRED)) {
/*
* Unexpected condition.
*/
"\n%s:cpu_id %d:Unexpected state %s for the"
" cacheline at index %d way %d encountered.\n",
return (B_FALSE);
}
}
suspect_list[0] = 0;
suspect_list, sizeof (suspect_list));
"\n%s:cpu_id %d:current suspect list is %s\n",
cert);
/*
* If the cacheline we just retired was retired earlier
* due to DATA faults we mark the Lxcache
* corresponding to DATA as RERETIRED.
*/
"\n%s:cpuid %d checking if there is a %s"
" cacheline retired at this index %d way %d\n",
if (previously_retired_Lxcache) {
"\n%s:cpu_id %d: Found index %d way %d"
" retired earlier. Will mark this Lxcache"
" as RERETIRED.\n",
/*
* First repair the retired cacheline and if successful
* close the existing case and create a new case.
*/
/*
* This cacheline has already been retired for
* TAG fault.
* Repair the previously retired DATA fault cacheline so
* that it does not get retired by fmd during replay.
*/
!= 0) {
"\n%s:cpuid %d Repairing the cacheline"
" retired due to data errors. %s\n",
if (cmd_repair_fmri(hdl,
!= 0) {
"\n%s:cpuid %d Failed to repair the"
" retired fmri.",
return (B_FALSE);
} else {
Lxcache_retired_fmri[0] = 0;
}
}
/*
* Update persistent storage
*/
/*
* Create a new case so that this Lxcache structure
* gets restored on replay.
*/
return (B_FALSE);
}
}
fltnm);
return (B_TRUE);
}