faulty.c revision 25c6ff4b77fcddf4097ce78a8277275ca603b46c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <fmadm.h>
#include <errno.h>
#include <limits.h>
#include <strings.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <dlfcn.h>
#include <sys/systeminfo.h>
#include <libintl.h>
#include <locale.h>
#include <libdevinfo.h>
#include <stdlib.h>
/*
* catalog_setup() must be called to setup support functions.
* Fault records are added to catalog by calling add_fault_record_to_catalog()
* records are stored in order of importance to the system.
* If -g flag is set or not_suppressed is not set and the class fru, fault,
* type are the same then details are merged into an existing record, with uuid
* records are stored in time order.
* For each record information is extracted from nvlist and merged into linked
* list each is checked for identical records for which percentage certainty are
* added together.
* print_catalog() is called to print out catalog and release external resources
*
* /---------------\
* status_rec_list -> | | -|
* \---------------/
* \/
* /---------------\ /-------\ /-------\
* status_fru_list | status_record | -> | uurec | -> | uurec | -|
* \/ | | |- | | <- | |
* /-------------\ | | \-------/ \-------/
* | | -> | | \/ \/
* \-------------/ | | /-------\ /-------\
* \/ | | -> | asru | -> | asru |
* --- | | | | <- | |
* | | \-------/ \-------/
* status_asru_list | class |
* \/ | resource | /-------\ /-------\
* /-------------\ | fru | -> | list | -> | list |
* | | -> | serial | | | <- | |
* \-------------/ | | \-------/ \-------/
* \/ \---------------/
* --- \/ /\
* /---------------\
* | status_record |
* \---------------/
*
* Fmadm faulty takes a number of options which affect the format of the
* output displayed. By default, the display reports the FRU and ASRU along
* with other information on per-case basis as in the example below.
*
* --------------- ------------------------------------ -------------- -------
* TIME EVENT-ID MSG-ID SEVERITY
* --------------- ------------------------------------ -------------- -------
* Sep 21 10:01:36 d482f935-5c8f-e9ab-9f25-d0aaafec1e6c AMD-8000-2F Major
*
* Fault class : fault.memory.dimm_sb
* Affects : mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0
* faulted but still in service
* FRU : "CPU 0 DIMM 0" (hc://.../memory-controller=0/dimm=0)
* faulty
*
* Description : The number of errors associated with this memory module has
* exceeded acceptable levels. Refer to
* http://sun.com/msg/AMD-8000-2F for more information.
*
* Response : Pages of memory associated with this memory module are being
* removed from service as errors are reported.
*
* Impact : Total system memory capacity will be reduced as pages are
* retired.
*
* Action : Schedule a repair procedure to replace the affected memory
* module. Use fmdump -v -u <EVENT_ID> to identify the module.
*
* The -v flag is similar, but adds some additonal information such as the
* resource. The -s flag is also similar but just gives the top line summary.
* All these options (ie without the -f or -r flags) use the print_catalog()
* function to do the display.
*
* The -f flag changes the output so that it appears sorted on a per-fru basis.
* The output is somewhat cut down compared to the default output. If -f is
* used, then print_fru() is used to print the output.
*
* -----------------------------------------------------------------------------
* "SLOT 2" (hc://.../hostbridge=3/pciexrc=3/pciexbus=4/pciexdev=0) faulty
* 5ca4aeb3-36...f6be-c2e8166dc484 2 suspects in this FRU total certainty 100%
*
* Description : A problem was detected for a PCI device.
* Refer to http://sun.com/msg/PCI-8000-7J for more information.
*
* Response : One or more device instances may be disabled
*
* Impact : Possible loss of services provided by the device instances
* associated with this fault
*
* Action : Schedule a repair procedure to replace the affected device.
* Use fmdump -v -u <EVENT_ID> to identify the device or contact
* Sun for support.
*
* The -r flag changes the output so that it appears sorted on a per-asru basis.
* The output is very much cut down compared to the default output, just giving
* the asru fmri and state. Here print_asru() is used to print the output.
*
* mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 degraded
*
* For all fmadm faulty options, the sequence of events is
*
* 1) Walk through all the cases in the system using fmd_adm_case_iter() and
* for each case call dfault_rec(). This will call add_fault_record_to_catalog()
* This will extract the data from the nvlist and call catalog_new_record() to
* save the data away in various linked lists in the catalogue.
*
* 2) Once this is done, the data can be supplemented by using
* fmd_adm_rsrc_iter(). However this is now only necessary for the -i option.
*
* 3) Finally print_catalog(), print_fru() or print_asru() are called as
* appropriate to display the information from the catalogue sorted in the
* requested way.
*
*/
typedef struct name_list {
char *name;
int status;
char *label;
} name_list_t;
typedef struct ari_list {
char *ari_uuid;
} ari_list_t;
typedef struct uurec {
char *uuid;
} uurec_t;
typedef struct uurec_select {
struct uurec_select *next;
char *uuid;
typedef struct host_id {
char *chassis;
char *server;
char *platform;
} hostid_t;
typedef struct host_id_list {
struct host_id_list *next;
typedef struct status_record {
int nrecs;
char *severity; /* in C locale */
char *msgid;
char *url;
typedef struct sr_list {
struct status_record *status_record;
} sr_list_t;
typedef struct resource_list {
struct resource_list *next;
struct resource_list *prev;
char *resource;
typedef struct tgetlabel_data {
char *label;
char *fru;
static char *locale;
static char *nlspath;
static int max_display;
static int max_fault = 0;
static topo_hdl_t *topo_handle;
static char *topo_handle_uuid;
static host_id_list_t *host_list;
static int n_server;
static int opt_g;
static char *
{
"record time is too large for 32-bit utility\n");
} else {
}
return (buf);
}
static hostid_t *
{
platform = "-";
server = "-";
while (hostp) {
break;
}
}
n_server++;
}
return (rt);
}
static hostid_t *
{
&platform);
&chassis);
}
return (rt);
}
static void
catalog_setup(void)
{
char *tp;
int pl;
/*
* All FMA event dictionaries use msgfmt(1) message objects to produce
* messages, even for the C locale. We therefore want to use dgettext
* for all message lookups, but its defined behavior in the C locale is
* to return the input string. Since our input strings are event codes
* and not format strings, this doesn't help us. We resolve this nit
* by setting NLSPATH to a non-existent file: the presence of NLSPATH
* is defined to force dgettext(3C) to do a full lookup even for C.
*/
else {
}
}
static char *
get_dict_url(char *id)
{
char *url = "http://sun.com/msg/";
char *cp;
return (cp);
}
static char *
{
char mbuf[128];
char *msg;
char dbuf[32];
char *p;
int restore_env = 0;
int restore_locale = 0;
} else {
restore_locale = 1;
}
restore_locale = 1;
}
restore_env = 1;
}
if (restore_locale)
if (restore_env && nlspath)
}
if (unknown)
msg = "unknown";
else
}
return (msg);
}
/*
* compare two fru strings which are made up of substrings seperated by '/'
* return true if every substring is the same in the two strings, or if a
* substring is null in one.
*/
static int
{
int i = 0;
for (;;) {
} else if (i == 0) {
if (c1 == '/') {
do {
f2++;
break;
} else if (c2 == '/') {
do {
f1++;
break;
} else
break;
} else
break;
return (0);
f1++;
f2++;
}
return (1);
}
static int
{
int err;
int rt = TOPO_WALK_NEXT;
}
}
}
return (rt);
}
static void
label_get_topo(void)
{
int err;
if (topo_handle) {
}
}
static void
label_release_topo(void)
{
if (topo_handle_uuid)
if (topo_handle) {
}
}
static char *
get_fmri_label(char *fru)
{
int err;
if (topo_handle == NULL)
if (topo_handle_uuid) {
if (twp) {
}
}
}
static char *
{
char *tname;
int err;
char buf[128];
if (topo_handle == NULL)
} else {
mod_name) {
}
}
return (name);
}
static int
set_priority(char *s)
{
int rt = 0;
if (s) {
if (strcmp(s, "Minor") == 0)
rt = 1;
else if (strcmp(s, "Major") == 0)
rt = 10;
else if (strcmp(s, "Critical") == 0)
rt = 100;
}
return (rt);
}
static int
{
int rt;
if (rt == 0) {
rt = 1;
rt = -1;
else
}
return (rt);
}
/*
* merge two lists into one, by comparing enties in new and moving into list if
* name is not there or free off memory for names which are already there
* add_pct indicates if pct is the sum or highest pct
*/
static name_list_t *
{
int max_pct;
while (np) {
while (lp) {
break;
}
else
if (lp) {
if (add_pct) {
}
}
}
}
if (np) {
if (lp) {
} else {
}
}
} else {
}
}
}
return (rt);
}
/*
* compare entries in two lists return true if the two lists have identical
* content. The two lists may not have entries in the same order, so we compare
* the size of the list as well as trying to find every entry from one list in
* the other.
*/
static int
{
while (lp2) {
l2++;
break;
}
while (lp1) {
l1++;
while (lp2) {
common++;
break;
}
break;
}
break;
}
return (0);
else
return (1);
}
static name_list_t *
{
return (nlp);
}
static void
{
if (list) {
do {
}
}
static status_record_t *
{
return (status_rec_p);
}
/*
* add record to given list maintaining order higher priority first.
*/
static void
{
int order;
} else {
/* insert new record in front of lower priority */
if (order > 0) {
} else {
}
}
}
}
static void
{
int order;
} else {
/*
* insert new record in front of lower priority
*/
if (order > 0) {
} else {
}
}
}
}
static void
{
int order;
while (np) {
/*
* remove from list and add again using
* new priority
*/
} else {
&np->status_rec_list);
}
break;
}
break;
}
}
}
}
static void
{
while (fp) {
break;
}
}
/*
* add record to rec, fru and asru lists.
*/
static void
{
if (status_rec_p->fru)
if (status_rec_p->asru)
}
/*
* add uuid and diagnoses time to an existing record for similar fault on the
* same fru
*/
static void
{
status_rec_p->nrecs++;
/* add uurec in time order */
} else {
}
}
static status_record_t *
{
while (status_rec_p) {
break;
break;
} else {
}
}
return (srp);
}
static void
{
char *name;
int j;
char buf[64];
&serint) == 0) {
serint);
}
for (j = 1; j < nelem; j++) {
}
}
&serial) == 0) {
}
}
}
}
static void
{
char *name;
char *label;
}
&label) == 0)
}
}
}
}
}
}
}
static void
const char *url)
{
char *msgid = "-";
boolean_t any_present = 0;
if (size != 0) {
for (i = 0; i < size; i++) {
if (!(ba[i] & FM_SUSPECT_NOT_PRESENT) &&
(ba[i] & FM_SUSPECT_FAULTY))
any_present = 1;
}
/*
* also suppress if no resources present
*/
if (any_present == 0)
not_suppressed = 0;
}
if (not_suppressed && !opt_g)
status_rec_p = NULL;
else
if (status_rec_p) {
} else {
}
}
static void
{
if (srp) {
for (;;) {
while (uurp) {
ari_list = (ari_list_t *)
malloc(sizeof (ari_list_t));
return;
}
}
break;
}
}
}
static void
{
char c;
int i;
int lsz;
char *padding;
for (i = 0; i < lsz; i++)
padding[i] = ' ';
padding[i] = 0;
c = *ep;
(void) printf("\n");
while (c) {
i = lsz;
if (c == ' ')
else if (c == '\n') {
i = 0;
*ep = 0;
do {
ep++;
break;
}
ep++;
i++;
}
if (i >= 80 && wp) {
*wp = 0;
c = *ep;
}
}
}
static void
{
const char *cp;
char *l_url;
char *buf;
int bufsz;
if (cp) {
if (url)
else
if (!url)
}
if (cp) {
}
if (cp) {
}
if (cp) {
}
}
static void
{
if (func)
if (fru) {
*np += 1;
} else {
*np += 1;
}
if (full) {
} else {
(void) printf(" %s %d%%\n",
}
} else {
}
} else {
(void) printf("\n");
}
}
static void
{
switch (status) {
case 0:
break;
case FM_SUSPECT_DEGRADED:
"but associated components no longer faulty");
break;
case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED:
"providing degraded service");
break;
case FM_SUSPECT_FAULTY:
break;
case FM_SUSPECT_UNUSABLE:
"but associated components no longer faulty");
break;
case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE:
break;
default:
break;
}
if (msg) {
}
}
static void
{
if (status & FM_SUSPECT_NOT_PRESENT)
else if (status & FM_SUSPECT_FAULTY)
else if (status & FM_SUSPECT_REPLACED)
else if (status & FM_SUSPECT_REPAIRED)
else if (status & FM_SUSPECT_ACQUITTED)
else
}
static void
{
char *padding;
int i, j, l, n;
for (i = 0; i < l; i++)
padding[i] = ' ';
padding[l] = 0;
else {
if (fru) {
} else
}
if (full) {
} else {
(void) printf(" %s %d%%\n",
}
} else {
}
} else {
(void) printf("\n");
}
if (func1)
n = 1;
j = 0;
if (func1)
} else
j++;
}
if (j == 1) {
} else if (j > 1) {
" use -v option for full list"));
}
}
static int
{
if (status == -1) {
continue;
}
status = -1;
break;
}
}
return (status);
}
static int
{
int nserial = 0;
int found = 0;
char buf[128];
while (sp) {
nserial++;
while (fp) {
found++;
break;
}
break;
}
break;
}
}
static void
{
}
static void
{
char buf[32];
int n, j, k, max;
int status;
n = 0;
if (max < 0) {
max = 0;
}
j = max / 2;
max -= j;
if (opt_i) {
while (ari_list) {
(void) printf("%-15s %s\n",
}
} else {
(void) printf("%-15s %s\n",
}
} else if (n == j)
n++;
}
(void) printf("\n");
if (n_server > 1)
if (status != -1) {
} else
}
full);
}
}
if (status != -1) {
"FRU :"), get_fmri_label, 0,
} else
"FRU :"), get_fmri_label, 0,
}
}
(void) printf("\n");
}
static void
{
char buf[32];
char *severity;
static int header = 0;
char *head;
if (nlspath)
else
if (opt_i) {
head = "--------------- "
"------------------------------------ "
"-------------- ---------\n"
"TIME CACHE-ID"
" MSG-ID"
" SEVERITY\n--------------- "
"------------------------------------ "
" -------------- ---------";
} else {
head = "--------------- "
"------------------------------------ "
"-------------- ---------\n"
"TIME EVENT-ID"
" MSG-ID"
" SEVERITY\n--------------- "
"------------------------------------ "
" -------------- ---------";
}
header = 1;
}
if (opt_i) {
while (ari_list) {
(void) printf("%-15s %-37s %-14s %-9s\n",
}
} else {
(void) printf("%-15s %-37s %-14s %-9s\n",
}
if (!summary)
}
static void
{
if (slp) {
for (;;) {
if (page_feed)
(void) printf("\f\n");
}
break;
}
}
}
static name_list_t *
{
while (fru) {
break;
}
break;
}
return (rt);
}
static void
{
100);
} else {
}
}
static void
{
int status;
while (tp) {
if (page_feed)
(void) printf("\f\n");
if (!summary)
(void) printf("-----------------------------"
"---------------------------------------"
"----------\n");
do {
if (fru) {
(void) printf("\"%s\" (%s) ",
else if ((fru_label = get_fmri_label(
(void) printf("\"%s\" (%s) ",
} else
(void) printf("%s ",
break;
}
status = 0;
do {
while (fru) {
break;
}
if (status & FM_SUSPECT_NOT_PRESENT)
else if (status & FM_SUSPECT_FAULTY)
else if (status & FM_SUSPECT_REPLACED)
else if (status & FM_SUSPECT_REPAIRED)
"repair attempted\n"));
else if (status & FM_SUSPECT_ACQUITTED)
else
do {
if (fru) {
if (opt_i) {
while (ari_list) {
ari_list =
}
} else {
}
}
if (!summary) {
}
do {
}
}
}
if (tp == status_fru_list)
break;
}
}
static void
print_asru(int opt_a)
{
char *msg;
int status;
while (tp) {
status = 0;
do {
while (asru) {
break;
}
switch (status) {
case 0:
break;
case FM_SUSPECT_DEGRADED:
break;
case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED:
break;
case FM_SUSPECT_FAULTY:
break;
case FM_SUSPECT_UNUSABLE:
break;
case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE:
break;
default:
msg = "";
break;
}
}
if (tp == status_asru_list)
break;
}
}
static int
{
while (uurecp) {
return (1);
}
return (0);
}
static int
{
int rt = 0;
char *uuid = "-";
&uuid);
} else {
rt = -1;
}
return (rt);
}
/*ARGSUSED*/
static int
{
return (0);
}
static int
{
int rt = FMADM_EXIT_SUCCESS;
/*
* These calls may fail with Protocol error if message payload is to big
*/
die("failed to get case list from fmd");
die("failed to get case status from fmd");
return (rt);
}
/*
* fmadm faulty command
*
* -a show hidden fault records
* -f show faulty fru's
* -g force grouping of similar faults on the same fru
* -n number of fault records to display
* -p pipe output through pager
* -r show faulty asru's
* -s print summary of first fault
* -u print listed uuid's only
* -v full output
*/
int
{
int opt_i = 0;
char *pager;
switch (c) {
case 'a':
opt_a++;
break;
case 'f':
opt_f++;
break;
case 'g':
opt_g++;
break;
case 'i':
opt_i++;
break;
case 'n':
break;
case 'p':
opt_p++;
break;
case 'r':
opt_r++;
break;
case 's':
opt_s++;
break;
case 'u':
opt_a = 1;
break;
case 'v':
opt_v++;
break;
default:
return (FMADM_EXIT_USAGE);
}
}
return (FMADM_EXIT_USAGE);
if (opt_p) {
opt_p = 0;
} else {
}
}
if (opt_f)
if (opt_r)
if (opt_p) {
}
return (rt);
}
int
{
int i, status = FMADM_EXIT_SUCCESS;
return (FMADM_EXIT_USAGE);
for (i = 1; i < argc; i++) {
} else
}
return (status);
}
int
{
int err;
return (FMADM_EXIT_USAGE);
return (FMADM_EXIT_USAGE);
/*
* argument could be a uuid, an fmri (asru, fru or resource)
* or a label. Try uuid first, If that fails try the others.
*/
if (err != 0)
if (err != 0)
return (FMADM_EXIT_SUCCESS);
}
int
{
int err;
return (FMADM_EXIT_USAGE);
return (FMADM_EXIT_USAGE);
/*
* argument could be an fmri (asru, fru or resource) or a label.
*/
if (err != 0)
return (FMADM_EXIT_SUCCESS);
}
int
{
int err;
return (FMADM_EXIT_USAGE);
return (FMADM_EXIT_USAGE);
/*
* argument could be an fmri (asru, fru or resource) or a label.
*/
if (err != 0)
return (FMADM_EXIT_SUCCESS);
}
int
{
int err;
return (FMADM_EXIT_USAGE);
return (FMADM_EXIT_USAGE);
/*
* argument could be a uuid, an fmri (asru, fru or resource)
* or a label. Or it could be a uuid and an fmri or label.
*/
if (err != 0)
} else {
if (err != 0)
}
if (err != 0)
return (FMADM_EXIT_SUCCESS);
}