cmd_mem.h revision d00f0155af9a9a671eb08a0dc30f5ea0a379c36c
1N/A/*
1N/A * CDDL HEADER START
1N/A *
1N/A * The contents of this file are subject to the terms of the
1N/A * Common Development and Distribution License, Version 1.0 only
1N/A * (the "License"). You may not use this file except in compliance
1N/A * with the License.
1N/A *
1N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1N/A * or http://www.opensolaris.org/os/licensing.
1N/A * See the License for the specific language governing permissions
1N/A * and limitations under the License.
1N/A *
1N/A * When distributing Covered Code, include this CDDL HEADER in each
1N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1N/A * If applicable, add the following below this CDDL HEADER, with the
1N/A * fields enclosed by brackets "[]" replaced with your own identifying
1N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1N/A *
1N/A * CDDL HEADER END
1N/A */
1N/A/*
1N/A * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
1N/A * Use is subject to license terms.
1N/A */
1N/A
1N/A#ifndef _CMD_MEM_H
1N/A#define _CMD_MEM_H
1N/A
1N/A#pragma ident "%Z%%M% %I% %E% SMI"
1N/A
1N/A/*
1N/A * Support routines for managing state related to memory modules.
1N/A *
1N/A * Correctable errors generally cause changes to the DIMM-related state (see
1N/A * cmd_dimm.c), whereas uncorrectable errors tend to use the bank-related
1N/A * routines (see cmd_bank.c). The primary exception to this division (though
1N/A * it eventually devolves to one of the two) is the RxE/FRx pair emitted by
1N/A * UltraSPARC-IIIi processors. With these errors, a complete pair must be
1N/A * received and matched before we know whether we're dealing with a CE or a UE.
1N/A */
1N/A
1N/A#include <cmd.h>
1N/A#include <cmd_state.h>
1N/A#include <cmd_fmri.h>
1N/A#include <sys/errclassify.h>
1N/A#include <cmd_cpu.h>
1N/A
1N/A#ifdef __cplusplus
1N/Aextern "C" {
1N/A#endif
1N/A
1N/A#define CMD_MEM_F_FAULTING 0x1
1N/A
1N/A/*
1N/A * Used to store as-yet unmatched IOxEs, RxEs, and FRxs. When a new IOxE,
1N/A * RxE or FRx arrives, we traverse the cmd.cmd_iorxefrx list, looking for
1N/A * matching entries. Matching has a cpuid-based component, as well as a
1N/A * temporal one. We can compare the cpuids directly, using the cmd_iorxefrx_t
1N/A * and the newly-received event. Temporal comparison isn't performed directly.
1N/A * Instead, we ensure that entries in the iorxefrx list are removed when they
1N/A * expire by means of timers. This frees the matching code from the need to
1N/A * worry about time.
1N/A */
1N/Atypedef struct cmd_iorxefrx {
1N/A cmd_list_t rf_list; /* List of cmd_iorxefrx_t's */
1N/A cmd_errcl_t rf_errcl; /* Error type (CMD_ERRCL_*) */
1N/A uint_t rf_afsr_agentid; /* Remote Agent ID (from AFSR) */
1N/A uint_t rf_det_agentid; /* Locat Agent ID (from detector) */
1N/A id_t rf_expid; /* Timer ID for entry expiration */
1N/A uint64_t rf_afar; /* Valid for RxE only */
1N/A uint8_t rf_afar_status; /* Valid for RxE only */
1N/A ce_dispact_t rf_type; /* Valid for RxE only */
1N/A uint16_t rf_synd; /* Valid for FRx only */
1N/A uint8_t rf_synd_status; /* Valid for FRx only */
1N/A uint64_t rf_afsr; /* Valid for FRx only */
1N/A uint64_t rf_disp; /* Valid for RCE only */
1N/A} cmd_iorxefrx_t;
1N/A
1N/Atypedef struct cmd_dimm cmd_dimm_t;
1N/Atypedef struct cmd_bank cmd_bank_t;
1N/A
1N/A/*
1N/A * Correctable and Uncorrectable memory errors
1N/A *
1N/A * CEs of "Unknown" or "Intermittent" classification are not used in diagnosis.
1N/A *
1N/A * "Persistent" CEs are added to per-DIMM SERD engines. When the
1N/A * engine for a given DIMM fires, the page corresponding to the CE that
1N/A * caused the engine to fire is retired, and the SERD engine for that
1N/A * DIMM is reset.
1N/A *
1N/A * "Possibly Persistent" CEs are at least Persistent and so are treated
1N/A * as "Persistent" errors above, being added to the same SERD engines.
1N/A *
1N/A * "Leaky" CEs and "Sticky" CEs trigger immediate page retirement.
1N/A *
1N/A * "Possibly Sticky" CEs to which no valid partner test has been applied
1N/A * are not used in diagnosis. Where a valid partner test has been applied
1N/A * but did not confirm "Sticky" status there is a _suggestion_ that the
1N/A * original cpu may be a bad reader or writer or suffering from other
1N/A * datapath issues. To avoid retiring pages for such non-DIMM problems
1N/A * these classifications are also not used in diagnosis.
1N/A *
1N/A * UEs immediately trigger page retirements, but do not affect the CE SERD
1N/A * engines. In addition, UEs are recorded in the UE caches of the detecting
1N/A * CPUs. When a page is to be retired, a fault.memory.page fault is
1N/A * generated.
1N/A *
1N/A */
1N/A
1N/Atypedef cmd_evdisp_t cmd_xe_handler_f(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
1N/A const char *, uint64_t, uint8_t, uint16_t, uint8_t, ce_dispact_t, uint64_t,
1N/A nvlist_t *);
1N/A
1N/Aextern ce_dispact_t cmd_mem_name2type(const char *, int);
1N/Aextern cmd_evdisp_t cmd_ce(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
1N/A const char *, cmd_errcl_t);
1N/Aextern cmd_evdisp_t cmd_ue(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
1N/A const char *, cmd_errcl_t);
1N/Aextern cmd_evdisp_t cmd_ce_common(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
1N/A const char *, uint64_t, uint8_t, uint16_t, uint8_t,
1N/A ce_dispact_t, uint64_t, nvlist_t *);
1N/Aextern cmd_evdisp_t cmd_ue_common(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
1N/A const char *, uint64_t, uint8_t, uint16_t, uint8_t,
1N/A ce_dispact_t, uint64_t, nvlist_t *);
1N/Aextern cmd_evdisp_t cmd_mem_synd_check(fmd_hdl_t *, uint64_t, uint8_t,
1N/A uint16_t, uint8_t, cmd_cpu_t *);
1N/Aextern void cmd_dimm_close(fmd_hdl_t *, void *);
1N/Aextern void cmd_bank_close(fmd_hdl_t *, void *);
1N/A
1N/A/*
1N/A * US-IIIi I/O, Remote and Foreign Read memory errors
1N/A *
1N/A * When one processor or I/O bridge attempts to read memory local to
1N/A * another processor, one each of IOCE/IOUE/RCE/RUE and FRC/FRU will be
1N/A * generated, depending on the type of error. Both the IOxE/RxE and the FRx
1N/A * are needed, as each contains data necessary to the diagnosis of the error.
1N/A * Upon receipt of one of the errors, we wait until we receive the other.
1N/A * When the pair has been successfully received and matched, a CE or UE,
1N/A * as appropriate, is synthesized from the data in the matched ereports.
1N/A * The synthesized ereports are handled by the normal CE and UE mechanisms.
1N/A */
1N/Aextern cmd_evdisp_t cmd_frx(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
1N/A const char *, cmd_errcl_t);
1N/Aextern cmd_evdisp_t cmd_rxe(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
1N/A const char *, cmd_errcl_t);
1N/Aextern cmd_evdisp_t cmd_ioxe(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
1N/A const char *, cmd_errcl_t);
1N/Aextern cmd_evdisp_t cmd_ioxe_sec(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
1N/A const char *, cmd_errcl_t);
1N/Aextern cmd_evdisp_t cmd_rxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep,
1N/A nvlist_t *nvl, const char *class, cmd_errcl_t clcode,
1N/A cmd_errcl_t matchmask);
1N/A
1N/A/*
1N/A * A list of received IOxE/RxE/FRx ereports is maintained for correlation
1N/A * purposes (see above). These two routines manage the addition of new
1N/A * ereports, and the retrieval of existing ones. Pruning of the list is
1N/A * handled automatically.
1N/A */
1N/Aextern void cmd_iorxefrx_queue(fmd_hdl_t *, cmd_iorxefrx_t *);
1N/Aextern void cmd_iorxefrx_free(fmd_hdl_t *, cmd_iorxefrx_t *);
1N/A
1N/Aextern const char *cmd_fmri_get_unum(nvlist_t *);
1N/Aextern nvlist_t *cmd_mem_fmri_create(const char *);
1N/Aextern nvlist_t *cmd_mem_fmri_derive(fmd_hdl_t *, uint64_t, uint64_t, uint16_t);
1N/A
1N/Aextern void cmd_mem_case_restore(fmd_hdl_t *, cmd_case_t *, fmd_case_t *,
1N/A const char *, const char *);
1N/Aextern char *cmd_mem_serdnm_create(fmd_hdl_t *, const char *, const char *);
1N/Aextern void cmd_mem_retirestat_create(fmd_hdl_t *, fmd_stat_t *, const char *,
1N/A uint64_t);
1N/Aextern int cmd_mem_thresh_check(fmd_hdl_t *, uint_t);
1N/A
1N/Aextern void cmd_mem_timeout(fmd_hdl_t *, id_t);
1N/Aextern void cmd_mem_gc(fmd_hdl_t *);
1N/Aextern void cmd_mem_fini(fmd_hdl_t *);
1N/A
1N/A#ifdef __cplusplus
1N/A}
1N/A#endif
1N/A
1N/A#endif /* _CMD_MEM_H */
1N/A