cma_main.c revision 491f61a1e1c1fc54a47bbcf53dbbbe1293b93b27
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <cma.h>
#include <unistd.h>
#include <fcntl.h>
#include <strings.h>
#include <errno.h>
#include <time.h>
#include <sys/systeminfo.h>
#ifdef sun4v
#endif
#ifdef i386
#endif
extern const char *fmd_fmri_get_platform();
cma_stats_t cma_stats = {
};
typedef struct cma_subscriber {
const char *subr_class;
const char *subr_sname;
static const cma_subscriber_t cma_subrs[] = {
#if defined(i386)
/*
* On x86, the ASRUs are expected to be in hc scheme. When
* cpumem-retire wants to retire a cpu or mem page, it calls the
* methods registered in the topo node to do that. The topo
* enumerator, which necessarily knows all the config info that
* away much of that complexity from the agent into the entity
*/
{ "fault.memory.generic-x86.page_ce", FM_FMRI_SCHEME_HC,
{ "fault.memory.generic-x86.page_ue", FM_FMRI_SCHEME_HC,
{ "fault.memory.intel.page_ce", FM_FMRI_SCHEME_HC,
{ "fault.memory.intel.page_ue", FM_FMRI_SCHEME_HC,
NULL },
NULL },
NULL },
NULL },
{ "fault.memory.generic-x86.dimm_ce", FM_FMRI_SCHEME_HC,
{ "fault.memory.generic-x86.dimm_ue", FM_FMRI_SCHEME_HC,
{ "fault.memory.intel.dimm_ce", FM_FMRI_SCHEME_HC,
{ "fault.memory.intel.dimm_ue", FM_FMRI_SCHEME_HC,
{ "fault.memory.intel.fbd.*", FM_FMRI_SCHEME_HC,
{ "fault.memory.dimm_testfail", FM_FMRI_SCHEME_HC,
NULL },
NULL },
{ "fault.cpu.intel.quickpath.mem_scrubbing", FM_FMRI_SCHEME_HC,
{ "fault.cpu.intel.quickpath.*", FM_FMRI_SCHEME_HC,
{ "fault.cpu.generic-x86.mc", FM_FMRI_SCHEME_HC,
{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_HC,
{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_CPU,
/*
* The ASRU for cpu faults are in cpu scheme on native and in hc
* scheme on xpv. So each cpu fault class needs to be listed twice.
*/
/*
* The following faults do NOT retire a cpu thread,
* and therefore must be intercepted before
* the default "fault.cpu.*" dispatch to cma_cpu_hc_retire.
*/
NULL },
{ "fault.cpu.amd.dramchannel", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.generic-x86.bus_interconnect_memory", FM_FMRI_SCHEME_HC,
{ "fault.cpu.generic-x86.bus_interconnect_memory", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.generic-x86.bus_interconnect_io", FM_FMRI_SCHEME_HC,
{ "fault.cpu.generic-x86.bus_interconnect_io", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.generic-x86.bus_interconnect", FM_FMRI_SCHEME_HC,
{ "fault.cpu.generic-x86.bus_interconnect", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.intel.bus_interconnect_memory", FM_FMRI_SCHEME_HC,
{ "fault.cpu.intel.bus_interconnect_memory", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.intel.bus_interconnect_io", FM_FMRI_SCHEME_HC,
{ "fault.cpu.intel.bus_interconnect_io", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.intel.bus_interconnect", FM_FMRI_SCHEME_HC,
{ "fault.cpu.intel.bus_interconnect", FM_FMRI_SCHEME_CPU,
NULL },
NULL },
NULL },
NULL },
/*
* The following are PI sun4v faults
*/
{ "fault.memory.memlink", FM_FMRI_SCHEME_HC,
{ "fault.memory.memlink-uc", FM_FMRI_SCHEME_HC,
{ "fault.memory.memlink-failover", FM_FMRI_SCHEME_HC,
{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_HC,
{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_HC,
{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_HC,
NULL },
NULL },
NULL },
NULL },
{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_MEM,
{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_MEM,
{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_MEM,
NULL },
NULL },
NULL },
NULL },
NULL },
NULL },
NULL },
NULL },
/*
* The following ultraSPARC-T1/T2 faults do NOT retire a cpu thread,
* and therefore must be intercepted before
* the default "fault.cpu.*" dispatch to cma_cpu_hc_retire.
*/
{ "fault.cpu.*.l2cachedata", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.*.l2cachetag", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.*.l2cachectl", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.*.l2data-c", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.*.l2data-u", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.*.mau", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.*.lfu-u", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.*.lfu-f", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.*.lfu-p", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.ultraSPARC-T1.freg", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.ultraSPARC-T1.l2cachedata", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.ultraSPARC-T1.l2cachetag", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.ultraSPARC-T1.l2cachectl", FM_FMRI_SCHEME_CPU,
{ "fault.cpu.ultraSPARC-T1.mau", FM_FMRI_SCHEME_CPU,
NULL },
{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_MEM,
{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_MEM,
{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_MEM,
NULL },
{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.se-offlinereq",
{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.ce-offlinereq",
{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.se-offlinereq",
{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.ce-offlinereq",
#else
/*
* For platforms excluding i386, sun4v and opl.
*/
NULL },
NULL },
NULL },
NULL },
{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_MEM,
{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_MEM,
{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_MEM,
{ "fault.memory.dimm_testfail", FM_FMRI_SCHEME_MEM,
NULL },
NULL },
/*
* The following faults do NOT retire a cpu thread,
* and therefore must be intercepted before
* the default "fault.cpu.*" dispatch to cma_cpu_cpu_retire.
*/
{ "fault.cpu.ultraSPARC-IVplus.l2cachedata-line",
{ "fault.cpu.ultraSPARC-IVplus.l3cachedata-line",
{ "fault.cpu.ultraSPARC-IVplus.l2cachetag-line",
{ "fault.cpu.ultraSPARC-IVplus.l3cachetag-line",
/*
* Default "fault.cpu.*" for "cpu" scheme ASRU dispatch.
*/
#endif
};
static const cma_subscriber_t *
{
const cma_subscriber_t *sp;
char *scheme;
retire == 0) {
return (NULL);
}
return (NULL);
}
return (sp);
}
}
return (NULL);
}
static void
{
int err = 0;
if (err != 0) {
return;
}
const cma_subscriber_t *subr;
int has_fault;
continue;
/*
* A handler returns CMA_RA_SUCCESS to indicate that
* from this suspects point-of-view the case may be
* closed, CMA_RA_FAILURE otherwise.
* A handler must not close the case itself.
*/
if (has_fault == 1)
uuid, 0);
} else {
if (has_fault == 0)
uuid, 1);
}
if (err == CMA_RA_SUCCESS)
keepopen--;
}
}
/*
* Run though again to catch any new faults in list.updated.
*/
const cma_subscriber_t *subr;
int has_fault;
continue;
if (has_fault == 1)
}
}
/*
* Do not close the case if we are handling cache faults.
*/
&index) != 0) {
FM_LIST_SUSPECT_CLASS) == 0) {
}
}
}
}
static void
{
const cma_subscriber_t *subr;
return;
}
}
/*ARGSUSED*/
static void
{
return;
else
}
/*ARGSUSED*/
static void
{
#ifdef sun4v
/*
* ldom request can be asynchronous.
*/
#endif
}
#ifdef sun4v
static void *
{
}
static void
{
}
#endif
static const fmd_hdl_ops_t fmd_ops = {
cma_recv, /* fmdo_recv */
cma_timeout, /* fmdo_timeout */
NULL, /* fmdo_close */
NULL, /* fmdo_stats */
NULL, /* fmdo_gc */
};
static const fmd_prop_t fmd_props[] = {
#ifdef sun4v
#endif /* sun4v */
#ifdef opl
#else
#endif /* opl */
};
static const fmd_hdl_info_t fmd_info = {
};
void
{
#ifdef i386
const char *dom0 = "control_d";
/*
* Abort the cpumem-retire module if Solaris is running under DomU.
*/
return;
return;
} else {
if (fd != -1) {
return;
}
}
}
#endif /* i386 */
return; /* invalid data in configuration file */
#ifdef opl
#endif
#ifdef sun4v
#endif
"cpu_forced_offline");
"cpu_blacklist_enable");
"cpu_unblacklist_enable");
"page_unretire_enable");
#ifdef sun4v
#endif
}
void
{
#ifdef sun4v
#endif
}