/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file contains routines to analyze the surface of a disk.
*/
#include "global.h"
#include "analyze.h"
#include <stdlib.h>
#include <errno.h>
#include "misc.h"
#include "defect.h"
#include "label.h"
#include "param.h"
#include "checkdev.h"
/*
* These global variables control the surface analysis process. They
* are set from a command in the defect menu.
*/
/*
* These are summary variables to print out info after analysis.
* Values less than 0 imply they are invalid.
*/
/*
* This variable is used to tell whether the most recent surface
* analysis error was caused by a media defect or some other problem.
*/
/*
* These are the data patterns used if random patterns are not chosen.
* They are designed to show pattern dependent errors.
*/
static unsigned int scan_patterns[] = {
0xc6dec6de,
0x6db6db6d,
0x00000000,
0xffffffff,
0xaaaaaaaa,
};
/*
* These are the data patterns from the SunFed requirements document.
*/
0xaaaaaaaa, /* 10101010... */
0x55555555, /* 01010101... == UUUU... */
0xaaaaaaaa, /* 10101010... */
0xaaaaaaaa, /* 10101010... */
};
/* Function prototypes */
#ifdef __STDC__
static int handle_error_conditions(void);
#else /* __STDC__ */
static int scan_repair();
static int analyze_blocks();
static int handle_error_conditions();
static int verify_blocks();
#endif /* __STDC__ */
/*
* This routine performs a surface analysis based upon the global
* parameters. It is called from several commands in the defect menu,
* and from the format command in the command menu (if post-format
* analysis is enable).
*/
int
{
int error = 0;
int pattern = 0;
int xfercnt;
/*
* Check to be sure we aren't correcting without a defect list
* if the controller can correct the defect.
*/
err_print("Current Defect List must be initialized ");
err_print("to do automatic repair.\n");
return (-1);
}
/*
* Define the bounds of the scan.
*/
if (scan_entire) {
start = 0;
if (cur_label == L_TYPE_SOLARIS) {
else
} else if (cur_label == L_TYPE_EFI) {
}
} else {
start = scan_lower;
end = scan_upper;
}
/*
* Make sure the user knows if we are scanning over a mounted
* partition.
*/
err_print("Cannot do analysis on a mounted partition.\n");
return (-1);
}
/*
* Make sure the user knows if we are scanning over a
* partition being used for swapping.
*/
err_print("Cannot do analysis on a partition \
which is currently being used for swapping.\n");
return (-1);
}
/*
* Check to see if any partitions used for svm, vxvm, ZFS zpool
* or live upgrade are on the disk.
*/
(diskaddr_t)-1, 0, 0))) {
err_print("Cannot do analysis on a partition "
"while it in use as described above.\n");
return (-1);
}
/*
* If we are scanning destructively over certain sectors,
*/
if (cur_label == L_TYPE_SOLARIS) {
if (!EMBEDDED_SCSI) {
}
cur_flags |= LABEL_DIRTY;
}
}
if (start == 0) {
cur_flags |= LABEL_DIRTY;
}
}
/*
* Initialize the summary info on sectors repaired.
*/
scan_blocks_fixed = 0;
/*
* Loop through the passes of the scan. If required, loop forever.
*/
/*
* Determine the data pattern to use if pattern testing
* is to be done.
*/
if (flags & SCAN_PATTERN) {
if (scan_random)
else
if (flags & SCAN_PURGE) {
case NPPATTERNS:
pattern = 0;
if (!error) {
"\nThe last %d passes were successful, running alpha pattern pass", NPPATTERNS);
} else {
pattern++;
};
break;
case READPATTERN:
default:
pattern++;
break;
}
}
} else
fmt_print("\n");
/*
* Mark the pattern buffer as corrupt, since it
* hasn't been initialized.
*/
needinit = 1;
/*
* Print the first block number to the log file if
* logging is on so there is some record of what
* analysis was performed.
*/
if (log_file) {
log_print("\n");
}
/*
* Loop through this pass, each time analyzing an amount
* specified by the global parameters.
*/
xfercnt = 0;
else
/*
* Print out where we are, so we don't look dead.
* Also store it in summary info for logging.
*/
nolog_print(" ");
nolog_print(" \015");
disk_error = 0;
/*
* Do the actual analysis.
*/
&xfercnt);
/*
* If there were no errors, the pattern buffer is
* still initialized, and we just loop to next chunk.
*/
needinit = 0;
if (!status)
continue;
/*
* There was an error. Check if surface analysis
* can be continued.
*/
if (handle_error_conditions()) {
return (-1);
}
/*
* There was an error. Mark the pattern buffer
* corrupt so it will get reinitialized.
*/
needinit = 1;
/*
* If it was not a media error, ignore it.
*/
if (!media_error)
continue;
/*
* Loop 5 times through each sector of the chunk,
* analyzing them individually.
*/
nolog_print(" ");
nolog_print(" \015");
founderr = 0;
for (j = 0; j < size * 5; j++) {
i = j % size;
disk_error = 0;
needinit = 0;
if (!status)
continue;
/*
* There was an error. Check if surface analysis
* can be continued.
*/
if (handle_error_conditions()) {
return (-1);
}
/*
* An error occurred. Mark the buffer
* corrupt and see if it was media
* related.
*/
needinit = 1;
if (!media_error)
continue;
/*
* We found a bad sector. Print out a message
* and fix it if required.
*/
founderr = 1;
error = -1;
}
} else
err_print("\n");
/*
* Stop after the error if required.
*/
if (scan_stop)
goto out;
}
/*
* Mark the pattern buffer corrupt to be safe.
*/
needinit = 1;
/*
* We didn't find an individual sector that was bad.
* Print out a warning.
*/
if (!founderr) {
err_print("Warning: unable to pinpoint ");
err_print("defective block.\n");
}
}
/*
* Print the end of each pass to the log file.
*/
if (log_file) {
log_print("\n");
}
scan_cur_block = -1;
fmt_print("\n");
/*
* alternate the read and write for SCAN_VERIFY test
*/
if (flags & SCAN_VERIFY) {
}
}
out:
/*
* We got here either by giving up after an error or falling
* through after all passes were completed.
*/
fmt_print("\n");
/*
* If the defect list is dirty, write it to disk,
* if scan_restore_defects (the default) is true.
*/
(scan_restore_defects)) {
}
/*
* If the label is dirty, write it to disk.
* if scan_restore_label (the default) is true.
*/
cur_flags &= ~LABEL_DIRTY;
(void) write_label();
}
/*
* If we dropped down to here after an error, we need to write
* the final block number to the log file for record keeping.
*/
if (log_file && scan_cur_block >= 0) {
log_print("\n");
}
fmt_print("Total of %lld defective blocks repaired.\n",
/*
* Reinitialize the logging variables so they don't get used
* when they are not really valid.
*/
return (error);
}
/*
* This routine is called to repair a bad block discovered
* during a scan operation. Return 0 for success, 1 for failure.
* (This has been extracted out of do_scan(), to simplify it.)
*/
static int
int mode;
{
int status;
char *buf;
int buf_is_good;
int i;
err_print("Warning: Controller does ");
err_print("not support repairing.\n\n");
return (result);
}
err_print("Warning: no memory.\n\n");
return (result);
}
/*
* Determine if the error appears to be hard or soft. We
* already assume there's an error. If we can get any
* good data out of the sector, write that data back
* after the repair.
*/
buf_is_good = 0;
for (i = 0; i < 5; i++) {
if (status == 0) {
buf_is_good = 1;
break;
}
}
fmt_print("Repairing %s error on %llu (",
fmt_print(")...");
if (status) {
/*
* If the repair failed, we note it and will return the
* failure. However, the analysis goes on.
*/
fmt_print("failed.\n\n");
} else {
/*
* The repair worked. Write the good data we could
* recover from the failed block, if possible.
* If not, zero the block. In doing so, try to
* determine if the new block appears ok.
*/
if (!buf_is_good) {
} else {
fmt_print("ok.\n");
}
if (status == 0) {
}
if (status) {
fmt_print("The new block also appears defective.\n");
}
fmt_print("\n");
/*
* add the defect to the list and write the list out.
* Also, kill the working list so it will get resynced
* with the current list.
*
* For embedded scsi, we don't require a defect list.
* However, if we have one, add the defect if the
* list includes the grown list. If not, kill it
* to force a resync if we need the list later.
*/
if (EMBEDDED_SCSI) {
} else {
}
}
/*
* The next "if" statement reflects the fix for
* bug id 1026096 where format keeps adding the
* same defect to the defect list.
*/
fmt_print("Current list updated\n");
} else {
}
/* Log the repair. */
/* return ok */
result = 0;
}
return (result);
}
/*
* This routine analyzes a set of sectors on the disk. It simply returns
* an error if a defect is found. It is called by do_scan().
*/
static int
register unsigned data;
int *xfercntp;
{
int corrupt = 0;
int status;
media_error = 0;
if (flags & SCAN_VERIFY) {
driver_flags, xfercntp));
}
/*
* Initialize the pattern buffer if necessary.
*/
for (i = 0; i < nints; i++)
*((int *)((int *)pattern_buf + i)) = data;
}
/*
* Lock out interrupts so we can insure valid data will get
* restored. This is necessary because there are modes
* of scanning that corrupt the disk data then restore it at
* the end of the analysis.
*/
/*
* If the disk data is valid, read it into the data buffer.
*/
if (flags & SCAN_VALID) {
if (status)
goto bad;
}
/*
* If we are doing pattern testing, write and read the pattern
* from the pattern buffer.
*/
if (flags & SCAN_PATTERN) {
/*
* If the disk data was valid, mark it corrupt so we know
* to restore it later.
*/
if (flags & SCAN_VALID)
corrupt++;
/*
* Only write if we're not on the read pass of SCAN_PURGE.
*/
if (!(flags & SCAN_PURGE_READ_PASS)) {
xfercntp);
if (status)
goto bad;
}
/*
* Only read if we are on the read pass of SCAN_PURGE, if we
* are purging.
*/
xfercntp);
if (status)
goto bad;
}
}
/*
* If we are doing a data compare, make sure the pattern
* came back intact.
* Only compare if we are on the read pass of SCAN_PURGE, or
* we wrote random data instead of the expected data pattern.
*/
err_print("Data miscompare error (expecting ");
*((int *)((int *)pattern_buf +
(nints - i))));
err_print(", offset = 0x%llx.\n",
(nints - i) * sizeof (int));
goto bad;
}
}
/*
* If we are supposed to write data out, do so.
*/
if (flags & SCAN_WRITE) {
if (status)
goto bad;
}
/*
* No errors occurred, return ok.
*/
return (0);
bad:
/*
* There was an error. If the data was corrupted, we write it
* out from the data buffer to restore it.
*/
if (corrupt) {
err_print("Warning: unable to restore original data.\n");
}
/*
* Return the error.
*/
return (-1);
}
/*
* This routine analyzes a set of sectors on the disk. It simply returns
* an error if a defect is found. It is called by analyze_blocks().
* For simplicity, this is done as a separate function instead of
* making the analyze_block routine complex.
*
* This routine implements the 'verify' command. It writes the disk
* by writing unique data for each block; after the write pass, it
* reads the data and verifies for correctness. Note that the entire
* disk (or the range of disk) is fully written first and then read.
* This should eliminate any caching effect on the drives.
*/
static int
unsigned data,
int driver_flags,
int *xfercntp)
{
/*
* Initialize the pattern buffer if we are in write pass.
* Use the block number itself as data, each block has unique
* buffer data that way.
*/
if (!(flags & SCAN_VERIFY_READ_PASS)) {
for (i = 0; i < nints; i++) {
}
}
}
/*
* Only write if we're not on the read pass of SCAN_VERIFY.
*/
if (!(flags & SCAN_VERIFY_READ_PASS)) {
if (status)
goto bad;
} else {
/*
* Only read if we are on the read pass of SCAN_VERIFY
*/
if (status)
goto bad;
/*
* compare and make sure the pattern came back intact.
*/
for (i = 0; i < nints; i++) {
ptr--;
err_print("Data miscompare error "
"(expecting 0x%x, got 0x%x) at ",
err_print(", offset = 0x%x.\n",
sizeof (int));
goto bad;
}
}
}
}
/*
* No errors occurred, return ok.
*/
return (0);
bad:
return (-1);
}
static int
{
/*
* Check if the errno is ENXIO.
*/
fmt_print("\n\nWarning:Cannot access drive, ");
fmt_print("aborting surface analysis.\n");
return (-1);
}
/*
* check for disk errors
*/
switch (disk_error) {
case DISK_STAT_RESERVED:
case DISK_STAT_UNAVAILABLE:
fmt_print("\n\nWarning:Drive may be reserved ");
fmt_print("or has been removed, ");
fmt_print("aborting surface analysis.\n");
return (-1);
case DISK_STAT_NOTREADY:
fmt_print("\n\nWarning: Drive not ready, ");
fmt_print("aborting surface analysis.\n");
return (-1);
case DISK_STAT_DATA_PROTECT:
fmt_print("\n\nWarning: Drive is write protected, ");
fmt_print("aborting surface analysis.\n");
return (-1);
default:
break;
}
return (0);
}