ufs_panic.c revision 66c9f83d84f7bf332be455a4dfb4b1346faf2823
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/pathname.h>
/* handy */
#define abs(x) ((x) < 0? -(x): (x))
#if defined(DEBUG)
#define DBGLVL_NONE 0x00000000
#define DBGLVL_MAJOR 0x00000100
#define DBGLVL_MINOR 0x00000200
#define DBGLVL_MINUTE 0x00000400
#define DBGLVL_TRIVIA 0x00000800
#define DBGLVL_HIDEOUS 0x00001000
#define DBGFLG_NONE 0x00000000
#define DBGFLG_NOPANIC 0x00000001
#define DBGFLG_LVLONLY 0x00000002
#define DBGFLG_FIXWOULDPANIC 0x00000004
#define DBGFLG_FLAGMASK 0x0000000F
#define DBGFLG_LEVELMASK ~DBGFLG_FLAGMASK
{ \
if (DEBUG_LEVEL != DBGLVL_NONE) { \
if (DEBUG_FLAGS & DBGFLG_LVLONLY) { \
if (DEBUG_LEVEL & dbg_level) { \
call; \
} \
} else { \
if (dbg_level <= DEBUG_LEVEL) { \
call; \
} \
} \
} \
}
#else /* !DEBUG */
#endif /* DEBUG */
#define NULSTRING ""
/* somewhat arbitrary limits, in seconds */
/* all probably ought to be different, but these are convenient for debugging */
/* all of these are in units of seconds used for retry period while ... */
#define NO_ERROR 0
/*
* per filesystem flags
*/
const int UFSFX_REPAIR_START = 0x10000000;
/* return protocols */
typedef enum triage_return_code {
TRIAGE_DEAD = -1,
} triage_t;
typedef enum statefunc_return_code {
SFRC_SUCCESS = 1,
SFRC_FAIL = 0
} sfrc_t;
/* external references */
/* in ufs_thread.c */
/* globals */
/*
* patchable constants:
* These are set in ufsfx_init() [called at modload]
*/
struct ufs_failure_tunable {
long uft_too_long; /* limit repair startup time */
long uft_fixstart_period; /* pre-repair start period */
long uft_fixpoll_period; /* post-fsck start period */
long uft_short_err_period; /* post-error short period */
long uft_long_err_period; /* post-error long period */
} ufsfx_tune;
/* internal statistics of events */
struct uf_statistics {
} uf_stats;
typedef enum state_action {
UFA_FOUND, /* found uf in state */
UFA_SET /* change uf to state */
} ufsa_t;
/* state definition */
typedef struct uf_state_desc {
int ud_v; /* value */
char *ud_name; /* name */
/* per-state actions */
struct uf_state_desc_attr {
/* encountering the error */
unsigned unused;
} ud_attr;
} ufsd_t;
/*
* forward references
*/
/* thread to watch for failures */
static void ufsfx_thread_fix_failures(void *);
static int ufsfx_do_failure_q(void);
static void ufsfx_kill_fix_failure_thread(void *);
/* routines called when failure occurs */
__KVPRINTFLIKE(2);
__KVPRINTFLIKE(2);
static void queue_failure(ufs_failure_t *);
/*PRINTFLIKE2*/
static void real_panic(ufs_failure_t *, const char *, ...)
__KPRINTFLIKE(2);
__KVPRINTFLIKE(2);
/* routines called when failure record is acted upon */
static int terminal_state(ufs_failure_states_t);
/* support routines, called by sf_nonterm_cmn and sf_term_cmn */
static void pester_msg(ufs_failure_t *, int);
static int lockfs_failure(ufs_failure_t *);
static int lockfs_success(ufs_failure_t *);
static int fsck_active(ufs_failure_t *);
/* low-level support routines */
static char *fs_name(ufs_failure_t *);
#if defined(DEBUG)
static char *state_name(ufs_failure_states_t);
static char *err_name(int);
static void dump_uf_list(char *msg);
static void dump_uf(ufs_failure_t *, int i);
#endif /* DEBUG */
/*
*
* State Transitions:
*
* normally:
* if flagged to be locked but not unmounted: (UFSMNT_ONERROR_LOCK)
* UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED
*
* The only difference between these two is that the fsck must be started
* manually.
*
* if flagged to be unmounted: (UFSMNT_ONERROR_UMOUNT)
* UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX
*
* if flagged to panic: (UFSMNT_ONERROR_PANIC)
* UNDEF -> INIT -> PANIC
*
* if a secondary panic on a file system which has an active failure
* record:
* UNDEF -> INIT -> QUEUE -> REPLICA
*
* UNDEF, INIT, QUEUE all are set in the context of the failing thread.
* All other states (except possibly PANIC) are set in by the monitor
* (lock) thread.
*
*/
ufsd_t state_desc[] =
{
{ 0, 1, 0 } },
{ 0, 1, 0 } },
{ 0, 1, 0 } },
{ 0, 1, 0 } },
UF_QUEUE, { 0, 0, 0 } },
#if defined(DEBUG)
UF_PANIC |
#endif /* DEBUG */
UF_LOCKED, { 0, 0, 0 } },
UF_FIXING, { 1, 0, 0 } },
#if defined(DEBUG)
UF_PANIC |
#endif /* DEBUG */
{ 1, 0, 0 } },
UF_QUEUE, { 1, 0, 0 } },
/* XXX make this narrower */ UF_ALLSTATES, { 0, 0, 0 } },
UF_UNDEF, { 0, 0, 0 } }
};
/* unified collection */
struct ufsfx_info {
struct uf_statistics *ufi_statp;
struct ufs_failure_tunable *ufi_tunep;
} uffsinfo;
#if defined(DEBUG)
struct action_description {
char *ad_name;
};
#define EUNK (-1)
struct error_description {
int ed_errno;
char *ed_name;
} err_desc[] =
{
{ EUNK, "<unexpected errno?>" },
{ EINVAL, "EINVAL" },
{ EACCES, "EACCES" },
{ EPERM, "EPERM" },
{ EIO, "EIO" },
{ EDEADLK, "EDEADLK" },
{ EBUSY, "EBUSY" },
{ EAGAIN, "EAGAIN" },
{ ERESTART, "ERESTART" },
{ ETIMEDOUT, "ETIMEDOUT" },
{ NO_ERROR, "Ok" },
};
struct action_description act_desc[] =
{
{ UFA_ERROR, "<unexpected action?>" },
{ UFA_FOUND, "\"found\"" },
{ UFA_SET, "\"set\"" },
};
#define LOCKFS_BADLOCK (-1)
struct lock_description {
int ld_type;
char *ld_name;
} lock_desc[] =
{
{ LOCKFS_BADLOCK, "<unexpected lock?>" },
{ LOCKFS_ULOCK, "Unlock" },
{ LOCKFS_ELOCK, "Error Lock" },
{ LOCKFS_HLOCK, "Hard Lock" },
{ LOCKFS_OLOCK, "Old Lock" },
{ LOCKFS_BADLOCK, NULL }
};
#endif /* DEBUG */
/*
* ufs_fault, ufs_fault_v
*
* called instead of cmn_err(CE_PANIC, ...) by ufs routines
* when a failure is detected to put the file system into an
* error state (if possible) or to devolve to a panic otherwise
*
* vnode is some vnode in this file system, used to find the way
* to ufsvfs, vfsp etc. Since a panic can be called from many
* levels, the vnode is the most convenient hook to pass through.
*
*/
/*PRINTFLIKE2*/
int
{
int error;
MINOR(("[ufs_fault"));
return (error);
}
const char *nullfmt = "<null format?>";
static int
{
int need_vfslock;
MINOR(("[ufs_fault_v"));
if (vp) {
/*
* Something bad has happened. That is why we are here.
*
* In order for the bad thing to be recorded in the superblock
* we need to write to the superblock directly.
* In the case that logging is enabled the logging code
* would normally intercept our write as a delta to the log,
* thus we mark the filesystem FSBAD in any case.
*/
if (need_vfslock) {
}
if (need_vfslock) {
}
}
switch (fix) {
default:
case TRIAGE_DEAD:
case TRIAGE_NO_SPIRIT:
/* LINTED: warning: logical expression always true: op "||" */
#if defined(DEBUG)
if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) {
break;
}
/* FALLTHROUGH */
#else
break;
#endif /* DEBUG */
case TRIAGE_ATTEND_TO:
/* q thread not running yet? */
if (!ufs_fix.uq_threadp) {
} else {
/*
* We got the lock but we are not the current
* threadp so we have to release the lock.
*/
}
} else {
MINOR((": fix failure thread already running "));
/*
* No need to log another failure as one is already
* being logged.
*/
break;
}
} else {
/*
* Must check if we actually still own the lock and
* if so then release the lock and move on with life.
*/
}
break;
}
break;
}
MINOR(("] "));
return (err);
}
/*
* triage()
*
* Attempt to fix iff:
* - the system is not already panicking
* - this file system isn't explicitly marked not to be fixed
* - we can connect to the user-level daemon
* These conditions are detectable later, but if we can determine
* them in the failing threads context the core dump may be more
* useful.
*
*/
static triage_t
{
int need_unlock_vfs;
int fs_flags;
MINUTE(("[triage"));
if (panicstr) {
MINUTE((
": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr));
return (TRIAGE_DEAD);
}
MINUTE((
": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n"));
return (TRIAGE_DEAD);
}
/* use tryenter and continue no matter what since we're panicky */
if (need_unlock_vfs)
if (need_unlock_vfs)
if (fs_flags & UFSFX_PANIC) {
MINUTE((
": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n"));
return (TRIAGE_NO_SPIRIT);
}
if (ufs_checkaccton(vp) != 0) {
MINUTE((
": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n"));
return (TRIAGE_DEAD);
}
if (ufs_checkswapon(vp) != 0) {
MINUTE((
": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n"));
return (TRIAGE_DEAD);
}
MINUTE((": return TRIAGE_ATTEND_TO] "));
return (TRIAGE_ATTEND_TO);
}
/*
* init failure
*
* This routine allocates a failure struct and initializes
* it's member elements.
* Space is allocated for copies of dynamic identifying fs structures
* passed in. Without a much more segmented kernel architecture
* this is as protected as we can make it (for now.)
*/
static ufs_failure_t *
{
int initialization_worked = 0;
int need_vfs_unlock;
MINOR(("[init_failure"));
if (!new) {
MINOR((": kmem_zalloc failed]\n"));
return (NULL);
}
/*
* enough information to make a fix attempt possible?
*/
goto errout;
goto errout;
goto errout;
goto errout;
/* intialize values */
if (need_vfs_unlock) {
/*
* not much alternative here, but we're panicking
* already, it couldn't be worse - so just
* proceed optimistically and take note.
*/
MINOR((": couldn't get vfs lock"))
need_vfs_unlock = 0;
}
}
}
if (need_vfs_unlock)
if (initialization_worked) {
MINOR(("] "));
return (new);
}
/* FALLTHROUGH */
if (new)
MINOR((": failed]\n"));
return (NULL);
}
static void
{
MINOR(("[queue_failure"));
else
}
"queue_failure: NULL panic str?"));
MINOR(("] "));
}
/*PRINTFLIKE2*/
static void
{
MINUTE(("[real_panic "));
MINUTE((": return?!]\n"));
}
static void
{
int seriousness = CE_PANIC;
int need_unlock;
MINUTE(("[real_panic_v "));
if (f && f->uf_ufsvfsp)
TRANS_SETERROR(f->uf_ufsvfsp);
#if defined(DEBUG)
if (DEBUG_FLAGS & DBGFLG_NOPANIC) {
}
#endif /* DEBUG */
if (!f && fmt)
else
"real_panic: <unknown panic?>");
if (f) {
if (need_unlock) {
mutex_enter(&f->uf_mutex);
}
f->uf_retry = -1;
if (need_unlock) {
mutex_exit(&f->uf_mutex);
}
}
MINUTE((": return?!]\n"));
}
/*
* initializes ufs panic structs, locks, etc
*/
void
ufsfx_init(void)
{
MINUTE(("[ufsfx_init"));
/* patchable; unchanged while running, so no lock is needed */
MINUTE(("] "));
}
/*
* initializes per-ufs values
* returns 0 (ok) or errno
*/
int
{
MINUTE((": %s: fx_flags:%ld,",
/*
* onerror={panic ^ lock only ^ unmount}
*/
MINUTE((" PANIC"));
MINUTE((" LCKONLY"));
MINUTE((" LCKUMOUNT"));
} else {
(UFSMNT_ONERROR_FLGMASK >> 4));
MINUTE((" DEFAULT"));
}
MINUTE(("]\n"));
return (0);
}
/*
* ufsfx_unmount
*
* called during unmount
*/
void
{
ufs_failure_t *f;
int must_unlock_list;
MINUTE(("[ufsfx_unmount"));
if (!ufsvfsp) {
MINUTE((": no ufsvfsp]"));
return;
}
int must_unlock_failure;
if (must_unlock_failure) {
mutex_enter(&f->uf_mutex);
}
if (f->uf_ufsvfsp == ufsvfsp) {
/*
* if we owned the failure record lock, then this
* is probably a fix failure-triggered unmount, so
* the warning is not appropriate or needed
*/
/* XXX if rebooting don't print this? */
"Unmounting %s while error-locked",
fs_name(f));
}
f->uf_ufsvfsp = NULL;
f->uf_vfs_ufsfxp = NULL;
f->uf_vfs_lockp = NULL;
f->uf_retry = -1;
}
if (must_unlock_failure)
mutex_exit(&f->uf_mutex);
}
if (must_unlock_list)
MINUTE(("] "));
}
/*
* ufsfx_(un)lockfs
*
* This is called after it is certain that the (un)lock will succeed.
*/
void
{
ufs_failure_t *f;
int need_unlock;
int need_unlock_list;
int informed = 0;
MINUTE(("[ufsfx_unlockfs"));
if (!ufsvfsp)
return;
if (need_unlock_list)
if (need_unlock)
mutex_enter(&f->uf_mutex);
/*
* This might happen if we don't notice that
* the fs gets marked FSFIX before it is
* marked FSCLEAN, as might occur if the
* the superblock was hammered directly.
*/
if (!informed) {
informed = 1;
"Unlock of %s succeeded before fs_clean marked FSFIX?",
fs_name(f));
}
/*
* pass through fixing state so
* transition protocol is satisfied
*/
MINUTE((": failed] "));
}
}
/* it's already fixed, so don't panic now */
MINUTE((": failed] "));
}
}
if (need_unlock)
mutex_exit(&f->uf_mutex);
}
if (need_unlock_list)
MINUTE(("] "));
}
void
{
ufs_failure_t *f;
int need_unlock;
int need_unlock_list;
MINUTE(("[ufsfx_lockfs"));
if (!ufsvfsp)
return;
if (need_unlock_list)
if (need_unlock)
mutex_enter(&f->uf_mutex);
switch (f->uf_s) {
default:
"fs %s not in state UF_TRYLCK, UF_LOCKED or UF_FIXING",
fs_name(f));
break;
case UF_TRYLCK:
MINUTE((": failed] "));
}
break;
case UF_LOCKED:
MINUTE((": failed] "));
}
break;
case UF_FIXING:
break;
}
}
if (need_unlock)
mutex_exit(&f->uf_mutex);
}
if (need_unlock_list)
MINUTE(("] "));
}
/*
* error lock, trigger fsck and unlock those fs with failures
* blatantly copied from the hlock routine, although this routine
* triggers differently in order to use uq_ne as meaningful data.
*/
/* ARGSUSED */
void
ufsfx_thread_fix_failures(void *ignored)
{
int retry;
"ufsfixfail");
MINUTE(("[ufsfx_thread_fix_failures] "));
for (;;) {
/* sleep until there is work to do */
/* process failures on our q */
do {
retry = ufsfx_do_failure_q();
if (retry) {
}
} while (retry);
}
/* NOTREACHED */
}
/*
* watch for fix-on-panic work
*
* returns # of seconds to sleep before trying again
* and zero if no retry is needed
*/
int
ufsfx_do_failure_q(void)
{
ufs_failure_t *f;
long retry = 1;
ufsd_t *s;
MAJOR(("[ufsfx_do_failure_q"));
return (retry);
retry = 0;
/*
* walk down failure list
* depending on state of each failure, do whatever
* is appropriate to move it to the next state
* taking note of whether retry gets set
*
* retry protocol:
* wakeup in shortest required time for any failure
* retry == 0; nothing more to do (terminal state)
* retry < 0; reprocess queue immediately, retry will
* be abs(retry) for the next cycle
* retry > 0; schedule wakeup for retry seconds
*/
if (!mutex_tryenter(&f->uf_mutex)) {
retry = 1;
continue;
}
s = get_state_desc(f->uf_s);
MINOR((": found%s: %s, \"%s: %s\"\n",
mutex_exit(&f->uf_mutex);
continue;
}
if (s->ud_sfp)
if (f->uf_retry != 0) {
if (f->uf_retry < 0)
}
mutex_exit(&f->uf_mutex);
}
if (retry < 0) {
goto rescan_q;
}
return (retry);
}
static void
{
MINUTE(("[pester_msg"));
/*
* XXX if seems too long for this fs, poke administrator
* XXX to run fsck manually (and change retry time?)
*/
"Waiting for repair of %s to %s",
fs_name(f),
MINUTE(("]"));
}
static time_t
{
MINUTE(("[trylock_time_exceeded"));
MINUTE(("] "));
}
static int
{
MINUTE(("[get_lockfs_status"));
if (!f->uf_ufsvfsp) {
MINUTE((": ufsvfsp is NULL]\n"));
return (0);
}
if (f->uf_lf_err) {
}
MINUTE(("] "));
return (1);
}
static sfrc_t
{
ufsd_t *s;
int need_unlock;
ASSERT(f);
/*
* if someone else is panicking, just let panic sync proceed
*/
if (panicstr) {
HIDEOUS((": state reset: not fixed] "));
return (sfrc);
}
/*
* bad state transition, an internal error
*/
/* recursion */
MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ",
return (sfrc);
}
s = get_state_desc(new_state);
if (need_unlock)
fs_name(f));
}
if (need_unlock)
/* NULL state functions always succeed */
f->uf_entered_tm = time;
f->uf_counter = 0;
}
HIDEOUS(("]\n"));
return (sfrc);
}
static ufsd_t *
{
ufsd_t *s;
HIDEOUS(("[get_state_desc"));
HIDEOUS(("] "));
return (s);
}
}
HIDEOUS(("] "));
return (&state_desc[0]); /* default */
}
static sfrc_t
{
TRIVIA(("[sf_undef, action is %s, state is %s\n",
act_name(a), state_name(s)));
/* shouldn't find null failure records or ever set one */
TRIVIA(("] "));
return (rc);
}
static sfrc_t
ufs_failure_t *f,
ufsa_t a,
{
switch (a) {
case UFA_SET:
f->uf_begin_tm = time;
f->uf_retry = 1;
if (!f->uf_ufsvfsp) {
TRIVIA((": NULL ufsvfsp]\n"));
return (rc);
}
/*
* because we can call panic from many different levels,
* we can't be sure that we've got the vfs_lock at this
* point. However, there's not much alternative and if
* we don't (have the lock) the worst case is we'll just
* panic again
*/
if (!f->uf_ufsvfsp->vfs_bufp) {
TRIVIA((": NULL vfs_bufp]\n"));
return (rc);
}
TRIVIA((": NULL vfs_fs]\n"));
return (rc);
}
/* vfs_fs = vfs_bufp->b_un.b_fs */
TRIVIA((": NULL vfsp or vfs_dev == NODEV"));
return (rc);
}
rc = SFRC_SUCCESS;
break;
case UFA_FOUND:
default:
/* failures marked init shouldn't even be on the queue yet */
TRIVIA((": found failure with state init]\n"));
}
TRIVIA(("] "));
return (rc);
}
static sfrc_t
ufs_failure_t *f,
ufsa_t a,
{
if (!f->uf_ufsvfsp) {
TRIVIA((": NULL ufsvfsp]\n"));
return (rc);
}
switch (a) {
case UFA_FOUND:
rc = sf_found_queue(f);
break;
case UFA_SET:
/*
* if can't get the vfs lock, just wait until
* UF_TRYLCK to set fx_current
*/
if (mutex_tryenter(f->uf_vfs_lockp)) {
f->uf_vfs_ufsfxp->fx_current = f;
mutex_exit(f->uf_vfs_lockp);
} else {
}
f->uf_retry = 1;
rc = SFRC_SUCCESS;
TRIVIA(("] "));
break;
default:
TRIVIA((": failed] "));
}
return (rc);
}
static sfrc_t
{
int replica;
TRIVIA(("[sf_found_queue"));
/*
* don't need to check for null ufsvfsp because
* unmount must own list's ufs_fix.uq_mutex
* to mark it null and we own that lock since
* we got here.
*/
if (!mutex_tryenter(f->uf_vfs_lockp)) {
TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
f->uf_retry = 1;
return (rc);
}
f->uf_vfs_ufsfxp->fx_current != f &&
/*
* copy general flags to this ufs_failure so we don't
* need to refer back to the ufsvfs, or, more importantly,
* don't need to keep acquiring (trying to acquire) vfs_lockp
*
* The most restrictive option wins:
* panic > errlock only > errlock+unmount > repair
* XXX panic > elock > elock > elock+umount
*/
TRIVIA((": marked panic but was queued?"));
real_panic(f, " ");
/*NOTREACHED*/
}
mutex_exit(f->uf_vfs_lockp);
return (rc);
}
if (replica) {
if (!set_state(f, UF_REPLICA)) {
f->uf_retry = 1;
TRIVIA((": set to replica failed] "));
} else {
TRIVIA(("] "));
}
mutex_exit(f->uf_vfs_lockp);
return (rc);
}
mutex_exit(f->uf_vfs_lockp);
TRIVIA((": failed] "));
} else {
rc = SFRC_SUCCESS;
}
return (rc);
}
static sfrc_t
{
ASSERT(!terminal_state(s));
TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n"));
return (rc);
}
switch (a) {
case UFA_SET:
switch (s) {
case UF_TRYLCK:
rc = sf_set_trylck(f);
break;
case UF_LOCKED:
rc = sf_set_locked(f);
break;
case UF_FIXING:
f->uf_flags |= UFSFX_REPAIR_START;
rc = SFRC_SUCCESS;
break;
case UF_UMOUNT:
rc = SFRC_SUCCESS;
break;
default:
TRIVIA((": failed] "));
}
break;
case UFA_FOUND:
switch (s) {
case UF_TRYLCK:
rc = sf_found_trylck(f);
break;
case UF_LOCKED:
case UF_FIXING:
rc = sf_found_lock_fix_cmn(f, s);
break;
case UF_UMOUNT:
rc = sf_found_umount(f);
break;
default:
TRIVIA((": failed] "));
break;
}
break;
default:
TRIVIA((": failed] "));
break;
}
TRIVIA(("] "));
return (rc);
}
static sfrc_t
{
TRIVIA(("[sf_set_trylck"));
if (!mutex_tryenter(f->uf_vfs_lockp)) {
TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
f->uf_retry = 1;
return (SFRC_FAIL);
}
if (!f->uf_vfs_ufsfxp->fx_current)
f->uf_vfs_ufsfxp->fx_current = f;
mutex_exit(f->uf_vfs_lockp);
TRIVIA(("] "));
return (SFRC_SUCCESS);
}
static sfrc_t
{
struct lockfs lockfs_status;
TRIVIA(("[sf_found_trylck"));
if (trylock_time_exceeded(f) > 0) {
TRIVIA((": failed] "));
return (SFRC_FAIL);
}
if (!get_lockfs_status(f, &lockfs_status)) {
TRIVIA((": failed] "));
return (SFRC_FAIL);
}
if (!set_lockfs(f, &lockfs_status)) {
TRIVIA((": failed] "));
return (SFRC_FAIL);
}
TRIVIA(("] "));
return (SFRC_SUCCESS);
}
static sfrc_t
{
TRIVIA(("[sf_set_locked"));
#if defined(DEBUG)
if (f->uf_flags & UFSFX_REPAIR_START)
TRIVIA(("clearing UFSFX_REPAIR_START "));
#endif /* DEBUG */
f->uf_flags &= ~UFSFX_REPAIR_START;
fs_name(f), f->uf_panic_str);
if (f->uf_flags & UFSFX_LCKONLY)
fs_name(f));
}
/*
* just reset to current state
*/
#if defined(DEBUG)
TRIVIA(("locked->locked "));
#endif /* DEBUG */
TRIVIA(("] "));
return (SFRC_SUCCESS);
}
static sfrc_t
{
if (s & UF_LOCKED) {
f->uf_entered_tm);
HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ",
if (f->uf_flags & UFSFX_LCKUMOUNT) {
TRIVIA(("] "));
rc = SFRC_SUCCESS;
} else {
TRIVIA((": failed] "));
f->uf_retry = 1;
}
return (rc);
}
if (!toolong) {
rc = SFRC_SUCCESS;
} else {
if (!(f->uf_flags & UFSFX_REPAIR_START)) {
(f->uf_flags & UFSFX_LCKONLY)?
"Manual": "Automatic",
fs_name(f));
} else {
"Repair of %s is not timely; operator attention is required.",
fs_name(f));
}
TRIVIA(("] "));
return (rc);
}
}
#if defined(DEBUG)
else {
}
#endif /* DEBUG */
/*
* get on disk superblock; force it to really
* come from the disk
*/
if (bp) {
}
TRIVIA((": UFS_BREAD(SBLOCK) failed]\n"));
f->uf_retry = 1;
goto out;
}
/* fsck started but we haven't noticed yet? */
TRIVIA((": failed]\n"));
f->uf_retry = 1;
goto out;
}
}
/* fsck started but didn't succeed? */
goto out;
}
/* fsck started but doesn't seem to be proceeding? */
"Repair completion timeout exceeded on %s; manual fsck may be required",
fs_name(f));
}
}
TRIVIA(("] "));
out:
if (bp)
return (rc);
}
static sfrc_t
{
int toolong = 0;
int err = 0;
TRIVIA(("[sf_found_umount"));
if (toolong) {
TRIVIA((": unmount time limit exceeded] "));
goto out;
}
goto out;
}
TRIVIA((": !not error locked?"));
goto out;
}
TRIVIA((": couldn't lock coveredvp"));
goto out;
}
/* take note, but not many alternatives here */
TRIVIA((": unmount failed] "));
} else {
}
out:
TRIVIA(("] "));
return (rc);
}
static sfrc_t
{
TRIVIA(("[sf_term_cmn, action is %s, state is %s",
act_name(a), state_name(s)));
ASSERT(terminal_state(s));
TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n"));
return (rc);
}
switch (a) {
case UFA_SET:
switch (s) {
case UF_NOTFIX:
case UF_FIXED:
{ int need_lock_vfs;
if (f->uf_ufsvfsp && f->uf_vfs_lockp)
else
need_lock_vfs = 0;
TRIVIA((": tryenter(vfslockp) fail; retry]\n"));
f->uf_retry = 1;
break;
}
f->uf_retry = 0;
if (f->uf_vfs_ufsfxp)
if (need_lock_vfs)
mutex_exit(f->uf_vfs_lockp);
"%s is now accessible", fs_name(f));
if (s & UF_FIXED) {
}
(void) timeout(ufsfx_kill_fix_failure_thread,
rc = SFRC_SUCCESS;
break;
}
case UF_REPLICA:
/* not actually a replica? */
f->uf_vfs_ufsfxp->fx_current != f &&
f->uf_retry = 0;
rc = SFRC_SUCCESS;
} else {
TRIVIA((": NULL fx_current]\n"));
f->uf_retry = 1;
}
break;
default:
TRIVIA((": failed] "));
break;
}
break;
case UFA_FOUND:
/*
* XXX de-allocate these after some period?
* XXX or move to an historical list?
* XXX or have an ioctl which reaps them?
*/
/*
* For now, since we don't expect lots of failures
* to occur (to the point of memory shortages),
* just punt
*/
/* be sure we're not wasting cpu on old failures */
if (f->uf_retry != 0) {
f->uf_retry = 0;
}
rc = SFRC_SUCCESS;
break;
default:
TRIVIA((": failed] "));
break;
}
TRIVIA(("] "));
return (rc);
}
static sfrc_t
ufs_failure_t *f,
ufsa_t a,
{
TRIVIA(("[sf_panic, action is %s, prev. state is %s",
switch (a) {
case UFA_SET:
rc = SFRC_SUCCESS;
break;
case UFA_FOUND:
default:
real_panic(f, " ");
/* LINTED: warning: logical expression always true: op "||" */
break;
}
TRIVIA(("] "));
return (rc);
}
/*
* minimum state function
*/
static sfrc_t
ufs_failure_t *f,
ufsa_t a, /* LINTED argument unused in function: ignored */
{
switch (a) {
case UFA_SET:
f->uf_retry = 0;
/* FALLTHROUGH */
case UFA_FOUND:
rc = SFRC_SUCCESS;
break;
default:
TRIVIA((": failed] "));
break;
}
TRIVIA(("] "));
return (rc);
}
static int
{
ufsd_t *s;
int valid;
HIDEOUS(("[state_trans_valid"));
return (1);
s = get_state_desc(to);
/*
* extra test is necessary since we want UF_UNDEF = 0,
* (to detect freshly allocated memory)
* but can't check for that value with a bit test
*/
return (valid);
}
static int
{
ufsd_t *s;
HIDEOUS(("[terminal_state"));
s = get_state_desc(state);
}
static void
{
MINUTE(("[alloc_lockfs_comment"));
/*
* ufs_fiolfs expects a kmem_alloc'ed comment;
* it frees the comment if the lock fails
* or else when the lock is unlocked.
*/
if (f->uf_lf.lf_comment) {
char *from;
/*
* use panic string if there's no previous comment
* or if we're setting the error lock
*/
from = f->uf_panic_str;
} else {
}
} else {
}
MINUTE(("] "));
}
static int
{
int (*handle_lockfs_rc)(ufs_failure_t *);
int rc;
MINUTE(("[set_lockfs"));
if (!f->uf_ufsvfsp) {
MINUTE((": ufsvfsp is NULL]\n"));
return (0);
}
if (!f->uf_ufsvfsp->vfs_root) {
MINUTE((": vfs_root is NULL]\n"));
return (0);
}
alloc_lockfs_comment(f, lfp);
f->uf_lf_err = 0;
if (!LOCKFS_IS_ELOCK(lfp)) {
&f->uf_lf,
/* from_user */ 0,
/* from_log */ 0);
}
rc = handle_lockfs_rc(f);
MINUTE(("] "));
return (rc);
}
static int
{
int error;
TRIVIA(("[lockfs_failure"));
if (!f->uf_ufsvfsp) {
TRIVIA((": ufsvfsp is NULL]\n"));
return (0);
}
switch (error) {
/* non-transient errors: */
case EPERM: /* inode reconciliation failed; incore inode changed? */
case EIO: /* device is hard-locked or not responding */
case EROFS: /* device is write-locked */
case EDEADLK: /* can't lockfs; deadlock would result; */
/* Swapping or saving accounting records */
/* onto this fs can cause this errno. */
MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)",
fs_name(f),
error));
/*
* if can't get lock, then fallback to panic, unless
* unless unmount was requested (although unmount will
* probably fail if the lock failed, so we'll panic
* anyway
*/
if (!set_state(f, s)) {
real_panic(f, " ");
/*NOTREACHED*/
break;
}
break;
case EBUSY:
case EAGAIN:
/*
* if we didn't know that the fix had started,
* take note
*/
TRIVIA((": failed] "));
return (0);
}
}
break;
default: /* some other non-fatal error */
MINOR(("lockfs(\"%s\") of %s returned %s (%d)",
fs_name(f),
f->uf_lf_err));
break;
case EINVAL: /* unmounted? */
break;
}
TRIVIA(("] "));
return (1);
}
static int
{
TRIVIA(("[lockfs_success"));
if (!f->uf_ufsvfsp) {
TRIVIA((": ufsvfsp is NULL]\n"));
return (0);
}
case LOCKFS_ELOCK: /* error lock worked */
TRIVIA((": failed] "));
return (0);
}
break;
case LOCKFS_ULOCK: /* unlock worked */
/*
* how'd we get here?
* This should be done from fsck's unlock,
* not from this thread's context.
*/
ufsfx_unlockfs(f->uf_ufsvfsp);
break;
default:
TRIVIA((": failed] "));
return (0);
}
break;
}
TRIVIA(("] "));
return (1);
}
/*
* when fsck is running it puts its pid into the lockfs
* comment structure, prefaced by PIDSTR
*/
const char *PIDSTR = "[pid:";
static int
{
char *cp;
TRIVIA(("[fsck_active"));
ASSERT(f);
ASSERT(f->uf_ufsvfsp);
TRIVIA((": null comment or comlen <= 0, found:0]"));
return (0);
}
}
static const char unknown_fs[] = "<unknown fs>";
static const char null_failure[] = "<NULL ufs failure record; unknown fs>";
static const char mutated_vfs_bufp[] = "<mutated vfs_bufp, unknown fs>";
static const char mutated_vfs_fs[] = "<mutated vfs_fs, unknown fs>";
static char *
fs_name(ufs_failure_t *f)
{
HIDEOUS(("[fs_name"));
if (!f) {
HIDEOUS((": failure ptr is NULL]\n"));
return ((char *)null_failure);
}
if (f->uf_fsname[0] != '\0') {
HIDEOUS((": return (uf_fsname)]\n"));
return (f->uf_fsname);
}
if (MUTEX_HELD(f->uf_vfs_lockp)) {
HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
return ((char *)mutated_vfs_bufp);
}
HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
return ((char *)mutated_vfs_fs);
}
HIDEOUS((": return (fs_fsmnt)]\n"));
}
}
HIDEOUS((": unknown file system]\n"));
return ((char *)unknown_fs);
}
#if defined(DEBUG)
static char *
{
struct lock_description *l;
char *lname;
HIDEOUS(("[lock_name"));
break;
}
}
HIDEOUS(("]"));
return (lname);
}
static char *
{
ufsd_t *s;
HIDEOUS(("[state_name"));
s = get_state_desc(state);
HIDEOUS(("]"));
return (s->ud_name);
}
static char *
{
struct error_description *e;
HIDEOUS(("[err_name"));
HIDEOUS(("]"));
return (e->ed_name);
}
}
HIDEOUS(("]"));
}
static char *
{
struct action_description *a;
HIDEOUS(("[act_name"));
HIDEOUS(("]"));
return (a->ad_name);
}
}
HIDEOUS(("]"));
}
/*
* dump failure list
*/
static void
dump_uf_list(char *msg)
{
ufs_failure_t *f;
int i;
printf("dump_uf_list: couldn't get list lock\n");
return;
}
if (msg) {
}
printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n",
printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n",
if (!mutex_tryenter(&f->uf_mutex)) {
printf("%d.\t\"skipped - try enter failed\"\n", i);
continue;
}
dump_uf(f, i);
mutex_exit(&f->uf_mutex);
}
printf("\n");
if (!list_was_locked)
}
static void
dump_uf(ufs_failure_t *f, int i)
{
if (!f) {
printf("dump_uf: NULL failure record\n");
return;
}
printf("%d.\t\"%s\" is %s.\n",
printf("\tNext: 0x%p\t\tPrev: 0x%p\n",
if (f->uf_orig)
printf("\tOriginal failure: 0x%p \"%s\"\n",
printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n",
(void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp);
if (f->uf_bp)
else
printf("\n");
printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n",
printf("\tLockfs:\ttype: %s\terror: %s (%d)\n",
}
#endif /* DEBUG */
/*
* returns # of ufs_failures in a non-terminal state on queue
* used to coordinate with hlock thread (see ufs_thread.c)
* and to determine when the error lock thread may exit
*/
int
ufsfx_get_failure_qlen(void)
{
ufs_failure_t *f;
ufsd_t *s;
int qlen = 0;
MINUTE(("[ufsfx_get_failure_qlen"));
return (-1);
/*
* walk down failure list
*/
if (!mutex_tryenter(&f->uf_mutex))
continue;
s = get_state_desc(f->uf_s);
mutex_exit(&f->uf_mutex);
continue;
}
MINUTE((": found: %s, \"%s: %s\"\n",
qlen++;
mutex_exit(&f->uf_mutex);
}
return (qlen);
}
/*
* timeout routine
* called to shutdown fix failure thread and server daemon
*/
static void
ufsfx_kill_fix_failure_thread(void *arg)
{
int qlen;
MAJOR(("[ufsfx_kill_fix_failure_thread"));
if (qlen < 0) {
if (delta <= 0)
(void) timeout(ufsfx_kill_fix_failure_thread,
MAJOR((": rescheduled"));
} else if (qlen == 0) {
MAJOR((": killed"));
}
/*
* else
* let timeout expire
*/
MAJOR(("]\n"));
}