fault_analyze.c revision 24db46411fd54f70c35b94bb952eb7ba040e43b4
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/byteorder.h>
#include <fcntl.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>
#include <errno.h>
#include <utility.h>
#include "util.h"
#include "sfx4500-disk.h"
/*
* The functions defined below are used to query SCSI (or SCSI-like)
* disk devices for their Information Exceptions (IE) page via LOG SENSE.
* SATA disks in Solaris implement command translation that transforms
* the SATA SMART information into the appropriate IE page data.
*
* The general algorithm for determining if a disk has detected an imminent
* failure via the IE mechanism is as follows:
*
* STEP 1 - INITIALIZATION
* 1) Check to see if the IE mechanism is enabled via MODE SENSE for the
* IE Control page (page 0x1C), checking the DEXCPT field (1 = IE is
* disabled). If it is enabled, goto step 3; else if there was an error
* getting the mode page, abort IE processing, otherwise, continue to
* step 2.
* 2) Enable the IE mechanism by sending a MODE SELECT for page 0x1C
* with the DEXCPT = 0, PERF = 1, MRIE = 6, EWASC = 1, TEST = 0,
* REPORT COUNT = 0001h, LOGERR = 1 (enable IE, minimize delay associated
* with SMART processing, only report IE condition on request,
* enable warnings, testing disabled, limit to 1 the number
* of times to report each IE, and enable logging of errors).
* 3) Check to see if the IE log page is supported by issuing a LOG
* SENSE with page == 0x2F. If the page list returned includes the
* IE page, examine the log page and ensure that the parameter 0 length
* is at least 4 (some drives that are pre-SCSI3 return smaller lengths
* with non-sensical values for parameter 0).
* Check for the IBM extensions to the IE log page (the first byte of the
* vendor-specific area is non-zero if the temperature is present).
* and make a note of it.
* If there is no support for the IE Log page, we can still check SMART
* status by issuing a REQUEST SENSE by itself (since that's how we
* configured the MRIE field in the IE Control mode page). The presence
* of the IE log page makes life easier by aggregating almost all the
* the temperature information).
* 4) Check for self-test logs by issuing a LOG SENSE for page 0x10 and
* examining the returned page. If the page makes sense, make a note
* of it.
* 5) Check for a temperature log page. If it exists, make a note of it.
* (Prefer the temperature log page for monitoring because the SCSI-3 spec
* specifies an optional threshold temperature parameter (and most
* drives that support the temperature log page include the threshold).
* [Relying on the IE Log page for temperature constraint information
* is not reliable because the threshold information in the IE log
* page is an IBM extension and is not present on all drives. Plus,
* not many drives actually implement the IE log page.)]
* 6) Clear the GLTSD bit in Control mode page 0xA. This will allow the
* drive to save each of the log pages described above to nonvolatile
* storage. This is essential if the drive is to remember its failures
* across power-offs (it would be very bad for a previously-bad drive to
* go through another set of failures, just to recognize its badness after
* a power cycle). If the MODE SELECT for this operation fails, issue a
* warning, but continue anyway.
*
* STEP 2 - MONITORING
* 1) To determine if a predictable failure is imminent, either send the
* device an unsolicited REQUEST SENSE command or a LOG SENSE for the
* Informational Exceptions page, and use the sense information from
* either of the sources to determine if a failure is imminent.
* 2) If self-test logs are present, check them. If a self-test occurred
* since the last time the monitoring function was called, check to see its
* results. If there was a self-test failure, a self-test failure is
* returned.
* 3) Based on the available temperature information from the drive (either
* from the temperature log page or from the temperature information
* available on the IE page), determine if the drive has exceeded its
* maximum operating temperature. If so, a drive over-temp failure is
* returned. (If the drive is within 5% of its maximum operating
* temperature, return a warning). If there is no threshold, use the
* threshold value passed in.
*
*/
struct log_parameter_header *lphp);
struct log_parameter_header *lphp);
struct log_parameter_header *lphp);
struct log_parameter_header *lphp);
struct log_parameter_header *lphp);
struct log_parameter_header *lphp);
static struct logpage_validation_entry logpage_validation_list[] = {
"Informational Exceptions", B_TRUE,
"Temperature", B_TRUE,
"Self-test", B_TRUE,
{ 0xFF, 0, 0,
};
static char *
{
}
*sep = 0;
return (path);
}
static void
{
char *path;
int pathlen;
path,
}
static void
{
char *path;
int pathlen;
path,
}
static void
{
char *path;
int pathlen;
path,
}
static int
{
int result;
struct scsi_extended_sense sense;
int senselen = sizeof (struct scsi_extended_sense);
} else
options &= ~MODE_SELECT_SP;
if (cdb_len == MODE_CMD_LEN_6) {
/* The following fields are reserved during mode select: */
} else if (cdb_len == MODE_CMD_LEN_10) {
/* The following fields are reserved during mode select: */
}
if (result != 0)
return (result);
}
static int
{
int result;
struct scsi_extended_sense sense;
int senselen = sizeof (struct scsi_extended_sense);
if (cdb_len == MODE_CMD_LEN_6) {
} else if (cdb_len == MODE_CMD_LEN_10) {
}
if (result != 0)
return (result);
}
static int
{
int senselen = sizeof (struct scsi_extended_sense);
int sensebuflen = sizeof (struct scsi_extended_sense);
int result;
if (result == 0)
else
return (result);
}
static int
{
struct info_except_page new_iec_page;
struct scsi_ms_hdrs hdrs;
int result;
sizeof (struct info_except_page));
/*
* Enable IE reporting:
*
* (1) DEXCPT = 0
* (2) PERF = <as passed in> (minimize delay due to IE processing)
* (3) MRIE = 6 (IE_REPORT_ON_REQUEST)
* (4) EWASC = 1
* (5) TEST = <as passed in>
* (6) REPORT COUNT = 0x0001
* (7) LOGERR = 1
*
*/
new_iec_page.dexcpt = 0;
/*
* Now compare the new mode page with the existing one.
* if there's no difference, there's no need for a mode select
*/
MODEPAGE_INFO_EXCPT_LEN) == 0) {
result = 0;
} else {
}
}
return (result);
}
static boolean_t
{
uint_t i = 0;
/*
* The mode page list contains all mode pages supported by
* the device, one after the other. Since the pages have headers
* that describe the page code and their length, we can use pointer
* arithmetic to hop to the next page.
*/
while (i < pgdatalen) {
break;
}
i += MODESENSE_PAGE_LEN(pg);
}
return (found);
}
/*
* version) by executing a MODE SENSE command for a page that should be
* implemented by the lun. If the lun doesn't support the Return All Pages
* mode page (0x3F), then that information is returned as an invalid field in
* cdb error. This function updates the diskinfo structure with the command
* length that's supported.
*/
static int
{
/*
* allpages_buflen is USHRT_MAX - size of the header because some luns
* return nothing if the buffer length is too big -- it must be sized
* properly (the maximum buffer size is therefore the maximum that
* will fit in a 16-bit integer minus the size of the header.)
*/
struct scsi_ms_header smh;
struct scsi_ms_header_g1 smh_g1;
struct scsi_extended_sense sense;
int resid;
int result;
uint_t datalength = 0;
/*
* Attempt a mode sense(6). If that fails, try a mode sense(10)
*
* allpages is allocated to be of the maximum size for either a
* mode sense(6) or mode sense(10) MODEPAGE_ALLPAGES response.
*
* Note that the length passed into uscsi_mode_sense should be
* set to the maximum size of the parameter response, which in
*
*/
resid = sizeof (struct scsi_extended_sense);
/*
* Compute the data length of the page that contains all
* mode sense pages. This is a bit tricky because the
* format of the response from the lun is:
*
* header: <length> <medium type byte> <dev specific byte>
* <block descriptor length>
* [<optional block descriptor>]
* data: [<mode page data> <mode page data> ...]
*
* Since the length field in the header describes the
* length of the entire response (including the header,
* but NOT including itself (1 or 2 bytes depending on
* which mode sense type (6- or 10- byte) being executed).
*
* So, the data length equals the length value in the header
* plus 1 (because the length byte was not included in the
* length count), minus [[the sum of the length of the
* header and the length of the block descriptor]].
*/
(sizeof (struct mode_header) +
} else {
resid = sizeof (struct scsi_extended_sense);
if (result == 0) {
(sizeof (struct mode_header_g1) +
} else
}
}
if (result == 0) {
/*
* One of the sets of the commands (above) succeeded, so now
* look for the mode pages we need and record them appropriately
*/
} else /* result != 0 */
return (result);
}
static int
{
struct scsi_ms_hdrs junk_hdrs;
int result;
sizeof (struct info_except_page));
sizeof (struct info_except_page));
== 0) {
}
return (result);
}
static int
{
int result;
if (result != 0) {
"current Control mode page -- skipping GLTSD "
"initialization.\n");
"Control mode page -- skipping GLTSD "
"initialization.\n");
!= 0) {
"changeable Control mode page -- skipping GLTSD "
"initialization.\n");
"changeable. This disk will not save log "
"parameters implicitly.\n");
} else if (control_pg_cur.gltsd) {
control_pg_cur.gltsd = 0;
}
return (result);
}
static int
{
int return_code = IE_SUCCESS;
/*
* Now that we know we can execute a valid mode sense command
* (and that the IE control mode page is supported), load the IEC page
* so we can check is IE is disabled. If it is disabled and it's
* NOT changeable, then we can't do anything else here.
*/
/*
* Something went wrong when grabbing the IEC mode page,
* so bail out.
*/
/*
* We need to be able to change the IE disable bit if
* IEs are currently disabled. We also need to be able to
* change the MRIE bits if they're not set to the right values,
* so if we can't enable IEs properly, we're done here.
*/
ascqp) != 0) {
/*
* Something went wrong when grabbing the IEC mode page (again),
* so bail out.
*/
/*
* NOTE: Failed to clear the GLTSD bit in the control page;
* meaning this disk doesn't support the GLTSD flag.
*/
if (*ascp != ASC_INVALID_CDB_FIELD)
"[KEY=0x%x ASC=0x%x ASCQ=0x%x]. Disk "
"failures may not be recognized after a power "
}
if (return_code == IE_SUCCESS) {
/* Save the update interval */
}
return (return_code);
}
static int
{
int i;
return (logpage_validation_list[i].supp_bit);
}
return (0);
}
static logpage_validation_fn_t
{
int i;
return (logpage_validation_list[i].validate_fn);
}
return (NULL);
}
static logpage_analyze_fn_t
{
int i;
return (logpage_validation_list[i].analyze_fn);
}
return (NULL);
}
static uchar_t
{
int i;
return (logpage_validation_list[i].pc);
}
/* If no PC is specifically defined for this page code, use current */
return (PC_CURRENT);
}
static int
{
/*
* buflen is USHRT_MAX - size of the header because some luns
* return nothing if the buffer length is too big -- it must be sized
* properly (the maximum buffer size is therefore the maximum that
* will fit in a 16-bit integer minus the size of the header.)
*/
struct scsi_extended_sense sense;
int resid = sizeof (struct scsi_extended_sense);
int result;
int bitset;
int i = 0;
while (i < pagecount) {
i++;
}
}
if (result != 0)
return (result);
}
static int
{
struct info_excpt_log_param *iep;
int result = 0;
/*
* Ensure that parameter code 0 has a length of
* at LEAST 4 as per the SCSI SPC3 spec. If it
* does not, don't use this log page (its format
* is unknown).
*/
"not using it.\n");
result = -1;
/*
* Determine if the vendor-specific area lists a
* temperature threshold
*/
if (iep->ex_temp_threshold != 0)
}
}
return (result);
}
static int
struct log_parameter_header *lphp)
{
int result = 0;
struct temperature_log_param_reftemp *rtp;
/* The temperature log page has two parameter codes defined: 0 & 1 */
/* 0 is current temperature, and 1 is the threshold (but is optional) */
/*
* Don't compare the current temperature to 0xff; we don't flag that
* as an error now because the condition that caused the drive not to
* be able to report a temperature reading could be transitory.
*/
switch (param_code) {
case LOGPARAM_TEMP_CURTEMP:
result = -1;
}
break;
case LOGPARAM_TEMP_REFTEMP:
result = -1;
}
break;
}
if (result < 0)
"-- not using it.\n");
return (result);
}
static int
struct log_parameter_header *lphp)
{
int result = 0;
/* Parameter codes range from 0x01-0x14 */
result = -1;
result = -1;
}
return (result);
}
static fault_monitor_info_t *
new_disk_fault_info(void)
{
int opts;
/*
* This will always succeed. See sfx4500-disk.c for the default values.
*/
(void) dm_prop_lookup_int(dm_global_proplist(),
return (fmi);
}
void
{
}
}
static void
{
}
static void
{
struct disk_fault *flt;
struct disk_fault *newflt;
/* Do not add duplicate faults */
return;
newflt = (struct disk_fault *)
dzmalloc(sizeof (struct disk_fault));
} else
} else {
}
else {
}
}
void
{
"Fake SMART impending failure fault", 0,
0x5D /* IE Failure threshold exceeded */,
0xFF /* false positive */, 0, 0, 0);
"Fake self-test failure fault",
0, 0, 0, SELFTEST_FAILURE_SEG_FIRST, 0, 0);
"Fake disk overtemp fault",
0,
0xb /* Warning */,
1 /* specified temperature exceeded */, 0,
0xff /* curtemp */, 0xfe /* threshold */);
}
static int
struct log_parameter_header *lphp)
{
struct info_excpt_log_param *iep;
int result = 0;
char buf[MSG_BUFLEN];
/*
* There are two faults that the IE parameter helps
* detect -- the general IE predictive failure, and
* an overtemp failure (but only if the temperature
* threshold information is included.
*/
0, 0, 0);
result = -1;
}
/*
* If the length of this parameter includes the temperature
* threshold, use it to compare the temperature, but only if
* there is no temperature log page supported (or, if there
* is a temperature log page but no reference temperature in
* the temperature log page).
*/
(iep->ex_temp_threshold != 0) &&
"celsius) is above the threshold (%d celsius)",
result = -1;
}
}
return (result);
}
static int
struct log_parameter_header *lphp)
{
char buf[MSG_BUFLEN];
int result = 0;
struct temperature_log_param_curtemp *ctp =
(struct temperature_log_param_curtemp *)lphp;
/*
* If this log page has a reference temperature, it must have
* been recorded in the diskinfo structure, so use it
* to compare the current temperature reading (if the
* reading is valid).
*/
/* The temperature log page has two parameter codes defined: 0 & 1 */
/* 0 is current temperature, and 1 is the threshold (but is optional) */
/*
* Don't compare the current temperature to 0xff; we don't flag that
* as an error now because the condition that caused the drive not to
* be able to report a temperature reading could be transitory.
*/
if (param_code == LOGPARAM_TEMP_CURTEMP &&
"celsius) is above the threshold (%d celsius)",
result = -1;
}
return (result);
}
static char *
int buflen)
{
const char *s;
s = "An unknown error occurred while the "
"device server was processing the self-test "
"and the device server was unable to complete "
"the self-test.";
break;
s = "The self-test completed with a failure in a test "
"segment, and the test segment that failed is not known.";
break;
s = "The first segment of the self-test failed.";
break;
s = "The second segment of the self-test failed.";
break;
/* If the test number was 0, the failure segment is unknown */
if (stlp->test_number == 0)
s = "The self-test failed in an unknown test segment.";
else
s = "The self-test failed in test segment %d.";
break;
default:
s = "Unknown self-test result code (0x%x (%d))";
break;
}
return (buf);
}
static int
struct log_parameter_header *lphp)
{
struct selftest_log_parameter *stlp =
(struct selftest_log_parameter *)lphp;
int result = 0;
const char *fmt;
char buf[MSG_BUFLEN];
char tsstring[MSG_BUFLEN];
char lbastring[MSG_BUFLEN];
char stcause[MSG_BUFLEN];
/*
* If the self-test failed, log a fault.
*/
if (param_code >= LOGPAGE_SELFTEST_MIN_PARAM_CODE &&
"%u disk-hours";
/* The lba failure field is only valid if it's not all 1's */
"failure: 0x%llx (%llu)" : "";
"[self-test parameter #%d, time of failure: %s%s]: %s",
result = -1;
}
return (result);
}
static int
{
struct log_header *lhp;
struct log_parameter_header *lphp;
struct scsi_extended_sense sense;
int buflen;
int resid;
int log_length;
int result = 0;
int i = 0;
int this_param_len = 0;
/*
* buflen is USHRT_MAX - size of the header because some luns
* return nothing if the buffer length is too big -- it must be sized
* properly (the maximum buffer size is therefore the maximum that
* will fit in a 16-bit integer minus the size of the header.)
*/
resid = sizeof (struct scsi_extended_sense);
if ((validate_fn != NULL) &&
sizeof (struct log_header));
while (i < log_length) {
lphp = (struct log_parameter_header *)
/*
* If the validation fn returns a negative value,
* that's the signal to clear the supported bit
* for this log page and break out of the loop.
*/
break;
}
sizeof (struct log_parameter_header);
i += this_param_len;
}
}
if (result != 0)
return (result);
}
static int
{
int result = 0;
int i;
if ((fip->log_pages_supported &
logpage_validation_list[i].supp_bit) == 0) {
continue;
}
/*
* verify_logpage will clear the bit from
* log_pages_supported if verification fails
* (which means that the page is not usable)
*/
/*
* If something goes wrong here, this is not a fatal
* error -- just log the error and continue.
*/
log_warn("Error during %s log page verification: "
"KEY=0x%x ASC=0x%x ASCQ=0x%x",
*ascqp);
result -= 1;
}
}
return (result);
}
/*
* This function calls the analysis function that corresponds to the log page
* passed-in. If the analysis function detects a fault in the log page
* parameter it was called with, it fills-in the disk_fault structure passed-in
* with the fault specifics, and log parameter processing stops.
*/
static int
{
struct log_header *lhp;
struct log_parameter_header *lphp;
struct scsi_extended_sense sense;
int buflen;
int resid;
int log_length;
int result = 0;
int i = 0;
int this_param_len = 0;
/*
* buflen is USHRT_MAX - size of the header because some luns
* return nothing if the buffer length is too big -- it must be sized
* properly (the maximum buffer size is therefore the maximum that
* will fit in a 16-bit integer minus the size of the header.)
*/
resid = sizeof (struct scsi_extended_sense);
if ((analyze_fn != NULL) &&
sizeof (struct log_header));
while (i < log_length) {
lphp = (struct log_parameter_header *)
/*
* If the analysis fn returns a negative value,
* then a disk fault identified with this page
* has been identified.
*/
"0x%x, parameter 0x%x.\n", logpage_code,
sizeof (struct log_parameter_header);
i += this_param_len;
}
}
if (result != 0)
return (result);
}
static int
{
int result = 0;
int i;
if ((fip->log_pages_supported &
logpage_validation_list[i].supp_bit) == 0) {
continue;
}
/*
* analyze_logpage will return a negative value if something
* went wrong during a LOG SENSE of the current log page.
*/
/*
* If something goes wrong here, this is not a fatal
* error -- just remember it and continue.
*/
*failidx = i;
result -= 1;
}
}
return (result);
}
static int
{
int result;
/*
* Save the result of a successful REQUEST SENSE
* because error information is cleared after it's
* sent to the host.
*/
if (result == 0) {
}
return (result);
}
void
{
/*
* The only thing that consumes memory is the fault list, so free
* that now:
*/
}
}
int
{
int fd;
char path[MAXPATHLEN];
int return_code = IE_SUCCESS;
"node");
return (-1);
}
/* Reset fault-tracking statistics */
diskinfop->analysis_generation = 0;
/* Initialize key fields: */
/* Assume we support no extensions */
fip->extensions = 0;
/* Assume we support no log pages */
fip->log_pages_supported = 0;
/* Assume we support no mode pages */
fip->mode_pages_supported = 0;
!= 0) {
/*
* If the error was an invalid opcode, then mode sense
* isn't supported, and, by extension, IE isn't supported.
* If the error is "mode page unsupported", then this lun
* is equally as useless.
*/
else {
log_err("modepages_init failed: "
ascq);
}
"No IEC mode page present -- IE (SMART) not supported.\n");
!= 0) {
/*
* If there's an error retrieving the list of supported log
* pages, then continue with a warning.
*/
"Error during LOG SENSE of supported pages: "
"KEY=0x%x ASC=0x%x ASCQ=0x%x -- not using any "
} else {
}
return (return_code);
}
static int
{
int result;
result = -1;
} else {
/*
* If the sense key is NO SENSE, and the ASC is
* any nonzero value, then we have an impending failure
*/
}
result = 0;
}
return (result);
}
/*
* Returns n>0 if there are disk faults (n faults)
* 0 if there are no disk faults
* <0 if there was a problem accessing the disk
*/
int
{
int i, fd;
int faults = 0;
struct disk_fault *flt;
char path[MAXPATHLEN];
"node");
return (-1);
}
/*
* Grab the fault list mutex here because any of the functions below
* can add to it.
*/
"KEY=0x%x ASC=0x%x ASCQ=0x%x\n",
}
/*
* We only need the unsolicited request-sense if we don't have the
* IE log page.
*/
}
/*
* If any disk faults were added to the diskinfo structure, then
* we may have a disk fault condition.
*/
return (0);
}
faults++;
} else if (OPT_ENABLED(fip,
faults++;
faults++;
} else
if (print_msg)
}
return (faults);
}