/*
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms are permitted
* provided that: (1) source distributions retain this entire copyright
* notice and comment, and (2) distributions including binaries display
* the following acknowledgement: ``This product includes software
* developed by the University of California, Berkeley and its contributors''
* in the documentation or other materials provided with the distribution
* and in all advertising materials mentioning features or use of this
* software. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <limits.h>
#include <sys/sysmacros.h>
#define _KERNEL
#include <pwd.h>
#include "fsck.h"
static int get_indir_offsets(int, daddr_t, int *, int *);
static int clearanentry(struct inodesc *);
static void inoflush(void);
static int lookup_dotdot_ino(fsck_ino_t);
/*
* ckinode() essentially traverses the blocklist of the provided
* inode. For each block either the caller-supplied callback (id_func
* in the provided struct inodesc) or dirscan() is invoked. Which is
* chosen is controlled by what type of traversal was requested
* (id_type) - if it was for an ADDR or ACL, use the callback,
* otherwise it is assumed to be DATA (i.e., a directory) whose
* contents need to be scanned.
*
* Note that a directory inode can get passed in with a type of ADDR;
* the type field is orthogonal to the IFMT value. This is so that
* the file aspects (no duplicate blocks, etc) of a directory can be
* verified just like is done for any other file, or the actual
* contents can be scanned so that connectivity and such can be
* investigated.
*
* The traversal is controlled by flags in the return value of
* dirscan() or the callback. Five flags are defined, STOP, SKIP,
* KEEPON, ALTERED, and FOUND. Their semantics are:
*
* was searching for was found, or a serious
* inconsistency was encountered, or anything else
* appropriate.
*
* SKIP - something that made it impossible to continue was
* encountered, and the caller should go on to the next
* inode. This is more for i/o failures than for
* logical inconsistencies. Nothing actually looks for
* this.
*
* KEEPON - no more blocks of this inode need to be scanned, but
* nothing's wrong, so keep on going with the next
* inode. It is similar to STOP, except that
* ckinode()'s caller will typically advance to the next
* inode for KEEPON, whereas it ceases scanning through
* the inodes completely for STOP.
*
* ALTERED - a change was made to the inode. If the caller sees
* this set, it should make sure to flush out the
* changes. Note that any data blocks read in by the
* function need to be marked dirty by it directly;
* flushing of those will happen automatically later.
*
* FOUND - whatever was being searched for was located.
* Typically combined with STOP to avoid wasting time
* doing additional looking.
*
* During a traversal, some state needs to be carried around. At the
* least, the callback functions need to know what inode they're
* working on, which logical block, and whether or not fixing problems
* when they're encountered is desired. Rather than try to guess what
* else might be needed (and thus end up passing way more arguments
* than is reasonable), all the possibilities have been bundled in
* struct inodesc. About half of the fields are specific to directory
* traversals, and the rest are pretty much generic to any traversal.
*
* The general fields are:
*
* id_fix What to do when an error is found. Generally, this
* is set to DONTKNOW before a traversal. If a
* problem is encountered, it is changed to either FIX
* or NOFIX by the dofix() query function. If id_fix
* has already been set to FIX when dofix() is called, then
* it includes the ALTERED flag (see above) in its return
* value; the net effect is that the inode's buffer
* will get marked dirty and written to disk at some
* point. If id_fix is DONTKNOW, then dofix() will
* query the user. If it is NOFIX, then dofix()
* essentially does nothing. A few routines set NOFIX
* as the initial value, as they are performing a best-
* effort informational task, rather than an actual
* repair operation.
*
* id_func This is the function that will be called for every
* logical block in the file (assuming id_type is not
* DATA). The logical block may represent a hole, so
* the callback needs to be prepared to handle that
* case. Its return value is a combination of the flags
* described above (SKIP, ALTERED, etc).
*
* id_number The inode number whose block list or data is being
* scanned.
*
* id_parent When id_type is DATA, this is the inode number for
* the parent of id_number. Otherwise, it is
* available for use as an extra parameter or return
* value between the callback and ckinode()'s caller.
* Which, if either, of those is left completely up to
* the two routines involved, so nothing can generally
* be assumed about the id_parent value for non-DATA
* traversals.
*
* id_lbn This is the current logical block (not fragment)
* number being visited by the traversal.
*
* id_blkno This is the physical block corresponding to id_lbn.
*
* id_numfrags This defines how large a block is being processed in
* this particular invocation of the callback.
* Usually, it will be the same as sblock.fs_frag.
* However, if a direct block is being processed and
* it is less than a full filesystem block,
* id_numfrags will indicate just how many fragments
* (starting from id_lbn) are actually part of the
* file.
*
* id_truncto The pass 4 callback is used in several places to
* free the blocks of a file (the `FILE HAS PROBLEM
* FOO; CLEAR?' scenario). This has been generalized
* to allow truncating a file to a particular length
* rather than always completely discarding it. If
* id_truncto is -1, then the entire file is released,
* otherwise it is logical block number to truncate
* to. This generalized interface was motivated by a
* desire to be able to discard everything after a
* hole in a directory, rather than the entire
* directory.
*
* id_type Selects the type of traversal. DATA for dirscan(),
* ADDR or ACL for using the provided callback.
*
* There are several more fields used just for dirscan() traversals:
*
* id_filesize The number of bytes in the overall directory left to
* process.
*
* id_loc Byte position within the directory block. Should always
* point to the start of a directory entry.
*
* id_entryno Which logical directory entry is being processed (0
* is `.', 1 is `..', 2 and on are normal entries).
* This field is primarily used to enable special
* checks when looking at the first two entries.
*
* The exception (there's always an exception in fsck)
* is that in pass 1, it tracks how many fragments are
* being used by a particular inode.
*
* id_firsthole The first logical block number that was found to
* be zero. As directories are not supposed to have
* holes, this marks where a directory should be
* truncated down to. A value of -1 indicates that
* no holes were found.
*
* id_dirp A pointer to the in-memory copy of the current
* directory entry (as identified by id_loc).
*
* id_name This is a directory entry name to either create
* (callback is mkentry) or locate (callback is
* chgino, findino, or findname).
*/
int
{
if (idesc->id_filesize == 0)
/*
* Our caller should be filtering out completely-free inodes
* (mode == zero), so we'll work on the assumption that what
* we're given has some basic validity.
*
* The kernel is inconsistent about MAXPATHLEN including the
* trailing \0, so allow the more-generous length for symlinks.
*/
return (KEEPON);
pwarn("I=%d Symlink longer than supported maximum\n",
return (STOP);
}
}
/*
* This was split out from ckinode() to allow it to be used
* without having to pass in kludge flags to suppress the
* wrong-for-deletion initialization and irrelevant checks.
* This feature is no longer needed, but is being kept in case
* the need comes back.
*/
static int
enum cki_action action)
{
for (i = 0; i < NDADDR; i++) {
idesc->id_numfrags =
} else {
}
idesc->id_firsthole = i;
}
continue;
}
else
/*
* Need to clear the entry, now that we're done with
* it. We depend on freeblk() ignoring a request to
* free already-free fragments to handle the problem of
* a partial block.
*/
if ((action == CKI_TRUNCATE) &&
(idesc->id_truncto >= 0) &&
/*
* The (int) cast is safe, in that if di_size won't
* fit, it'll be a multiple of any legal fs_frag,
* thus giving a zero result. That value, in turn
* means we're doing an entire block.
*/
if (frags == 0)
frags);
inodirty();
}
return (ret);
}
#ifdef lint
/*
* Cure a lint complaint of ``possible use before set''.
* Apparently it can't quite figure out the switch statement.
*/
indir_data_blks = 0;
#endif
/*
* indir_data_blks contains the number of data blocks in all
* the previous levels for this iteration. E.g., for the
* single indirect case (i = 0, di_ib[i] != 0), NDADDR's worth
* of blocks have already been covered by the direct blocks
* (di_db[]). At the triple indirect level (i = NIADDR - 1),
* it is all of the number of data blocks that were covered
* by the second indirect, single indirect, and direct block
* levels.
*/
for (i = 0; i < NIADDR; i++) {
/*
* We'll only clear di_ib[i] if the first entry (and
* therefore all of them) is to be cleared, since we
* only go through this code on the first entry of
* each level of indirection. The +1 is to account
* for the fact that we don't modify id_lbn until
* we actually start processing on a data block.
*/
action);
if ((action == CKI_TRUNCATE) &&
}
}
return (ret);
} else {
/*
* Need to know which of the file's logical blocks
* reside in the missing indirect block. However, the
* precise location is only needed for truncating
* directories, and level-of-indirection precision is
* sufficient for that.
*/
if ((indir_data_blks < ndb) &&
(idesc->id_firsthole < 0)) {
}
}
}
return (KEEPON);
}
static int
int *last_blk)
{
int ilevel;
switch (ilevel) {
case 0: /* SINGLE */
break;
case 1: /* DOUBLE */
break;
case 2: /* TRIPLE */
break;
default:
/*
* Translate from zero-based array to
* one-based human-style counting.
*/
errexit("panic: indirection level %d not 1, 2, or 3",
ilevel + 1);
/* NOTREACHED */
}
ndb_ilevel = ilevel;
if (ilevel == ilevel_wanted) {
}
}
return (ndb_ilevel);
}
static int
enum cki_action action)
{
int i, n;
int nif;
n = KEEPON;
case ADDR:
return (n);
break;
case ACL:
break;
case DATA:
break;
default:
/* NOTREACHED */
}
}
return (SKIP);
}
ilevel--;
/*
* Trivia note: the BSD fsck has the number of bytes remaining
* as the third argument to iblock(), so the equivalent of
* fsbperindirb starts at fs_bsize instead of one. We're
* working in units of filesystem blocks here, not bytes or
* fragments.
*/
}
/*
* nif indicates the next "free" pointer (as an array index) in this
* indirect block, based on counting the blocks remaining in the
* file after subtracting all previously processed blocks.
* This figure is based on the size field of the inode.
*
* Note that in normal operation, nif may initially be calculated
* as larger than the number of pointers in this block (as when
* there are more indirect blocks following); if that is
* the case, nif is limited to the max number of pointers per
* indirect block.
*
* Also note that if an inode is inconsistent (has more blocks
* allocated to it than the size field would indicate), the sweep
* through any indirect blocks directly pointed at by the inode
* continues. Since the block offset of any data blocks referenced
* by these indirect blocks is greater than the size of the file,
* the index nif may be computed as a negative value.
* In this case, we reset nif to indicate that all pointers in
* this retrieval block should be zeroed and the resulting
* through garbage collection later.
*/
else if (nif < 0)
nif = 0;
/*
* first pass: all "free" retrieval pointers (from [nif] thru
* the end of the indirect block) should be zero. (This
* assertion does not hold for directories, which may be
* truncated without releasing their allocated space)
*/
continue;
if (preen) {
}
}
}
/*
* second pass: all retrieval pointers referring to blocks within
* a valid range [0..filesize] (both indirect and data blocks)
* are examined in the same manner as ckinode() checks the
* direct blocks in the inode. Sweep through from
* the first pointer in this retrieval block to [nif-1].
*/
for (i = 0; i < nif; i++) {
if (ilevel == 0)
if (ilevel > 0) {
/*
* Each iteration decreases "remaining block
* count" by the number of blocks accessible
* by a pointer at this indirect block level.
*/
iblks -= fsbperindirb;
} else {
/*
* If we're truncating, func will discard
* the data block for us.
*/
}
if ((action == CKI_TRUNCATE) &&
(idesc->id_truncto >= 0) &&
}
/*
* Note that truncation never gets STOP back
* under normal circumstances. Abnormal would
* be a bad acl short-circuit in iblock() or
* an out-of-range failure in pass4check().
* We still want to keep going when truncating
* under those circumstances, since the whole
* point of truncating is to get rid of all
* that.
*/
return (n);
}
} else {
(idesc->id_firsthole < 0)) {
}
/*
* No point in continuing in the indirect
* blocks of a directory, since they'll just
* get freed anyway.
*/
}
}
}
return (KEEPON);
}
/*
* Check that a block is a legal block number.
* Return 0 if in range, 1 if out of range.
*/
int
{
int c;
if (debug)
(void) printf(
"Bad fragment range: should be 1 <= %d..%d < %d\n",
return (1);
}
if (debug)
return (1);
}
if (debug)
(void) printf(
"Bad fragment position: %d..%d spans start of cg metadata\n",
return (1);
}
} else {
if (debug)
(void) printf(
"Bad frag pos: %d..%d crosses end of cg\n",
return (1);
}
}
return (0);
}
/*
* General purpose interface for reading inodes.
*/
/*
* Note that any call to ginode() can potentially invalidate any
* dinode pointers previously acquired from it. To avoid pain,
* make sure to always call inodirty() immediately after modifying
* an inode, if there's any chance of ginode() being called after
* that. Also, always call ginode() right before you need to access
* an inode, so that there won't be any surprises from functions
* called between the previous ginode() invocation and the dinode
* use.
*
* Despite all that, we aren't doing the amount of i/o that's implied,
* as we use the buffer cache that getdatablk() and friends maintain.
*/
struct dinode *
{
}
if (startinum == -1 ||
}
/*
* We don't check for errors here, because we can't
* tell our caller about it, and the zeros that will
* be in the buffer are just as good as anything we
* could fake.
*/
}
return (dp);
}
/*
* Special purpose version of ginode used to optimize first pass
* over all the inodes in numerical order. It bypasses the buffer
* system used by ginode(), etc in favour of reading the bulk of a
* cg's inodes at one time.
*/
struct dinode *
{
/*
* Will always go into the if() the first time we're called,
* so dp will always be valid.
*/
readcnt++;
currentdblk = dblk;
if (partialsize > SIZE_MAX)
"Internal error: partialsize overflow");
lastinum += partialcnt;
} else {
if (inobufsize > SIZE_MAX)
errexit("Internal error: inobufsize overflow");
}
/*
* If fsck_bread() returns an error, it will already have
* zeroed out the buffer, so we do not need to do so here.
*/
}
currentinode = dp;
return (dp++);
}
/*
* Reread the current getnext() buffer. This allows for changing inodes
* other than the current one via ginode()/inodirty()/inoflush().
*
* Just reuses all the interesting variables that getnextinode() set up
* last time it was called. This shouldn't get called often, so we don't
* try to figure out if the caller's actually touched an inode in the
* range we have cached. There could have been an arbitrary number of
* them, after all.
*/
struct dinode *
getnextrefresh(void)
{
return (NULL);
}
inoflush();
return (currentinode);
}
void
resetinodebuf(void)
{
startinum = 0;
nextino = 0;
lastinum = 0;
readcnt = 0;
if (partialcnt != 0) {
readpercg++;
} else {
}
errexit("Cannot allocate space for inode buffer\n");
while (nextino < UFSROOTINO)
(void) getnextinode(nextino);
}
void
freeinodebuf(void)
{
}
}
/*
* Routines to maintain information about directory inodes.
* This is built during the first pass and used during the
* second and third passes.
*
* Enter inodes into the cache.
*/
void
{
errexit("Cannot increase directory list\n");
listmax += 100;
errexit("cannot increase directory list");
}
}
/*
* Look up an inode cache structure.
*/
struct inoinfo *
{
return (inp);
}
/*
* Determine whether inode is in cache.
*/
int
{
}
/*
* Clean up all the inode cache structure.
*/
void
inocleanup(void)
{
return;
}
}
/*
* Routines to maintain information about acl inodes.
* This is built during the first pass and used during the
* second and third passes.
*
* Enter acl inodes into the cache.
*/
void
{
return;
aclmax += 100;
errexit("cannot increase acl list");
}
}
/*
* Generic cache search function.
* ROOT is the first entry in a hash chain (the caller is expected
* to have done the initial bucket lookup). KEY is what's being
* searched for.
*
* Returns a pointer to the entry if it is found, NULL otherwise.
*/
static struct inoinfo *
{
break;
}
return (element);
}
void
inodirty(void)
{
}
static void
inoflush(void)
{
}
/*
* Interactive wrapper for freeino(), for those times when we're
* not sure if we should throw something away.
*/
void
{
int need_parent;
return;
if (verbose == CLRI_VERBOSE) {
}
if (preen)
(void) printf(" (CLEARED)\n");
} else if (corrupting == CLRI_NOP_CORRUPT) {
iscorrupt = 1;
}
(void) printf("\n");
}
/*
* Find the directory entry for the inode noted in id_parent (which is
* not necessarily the parent of anything, we're just using a convenient
* field.
*/
int
{
return (KEEPON);
}
/*
* Find the inode number associated with the given name.
*/
int
{
return (KEEPON);
}
return (KEEPON);
}
int
{
}
static int
{
idesc->id_entryno++;
return (KEEPON);
}
}
void
{
return;
}
static void
{
char *p;
time_t t;
(void) printf(" OWNER=");
else
if (preen)
/* ctime() ignores LOCALE, so this is safe */
p = ctime(&t);
}
void
{
(void) printf("\n");
case FSTATE:
case FZLINK:
return;
case DFOUND:
case DSTATE:
case DZLINK:
return;
case SSTATE:
return;
case FCLEAR:
case DCLEAR:
case SCLEAR:
return;
default:
/* NOTREACHED */
}
}
/*
* allocate an unused inode
*/
{
int cg;
time_t t;
errexit("assertion failed: allocino() asked for "
"inode %d instead of 0 or %d",
(int)request, (int)UFSROOTINO);
/*
* We know that we're only going to get requests for UFSROOTINO
* or 0. If UFSROOTINO is wanted, then it better be available
* because our caller is trying to recreate the root directory.
* If we're asked for 0, then which one we return doesn't matter.
* We know that inodes 0 and 1 are never valid to return, so we
* the start at the lowest-legal inode number.
*
* If we got a request for UFSROOTINO, then request != 0, and
* this pair of conditionals is the only place that treats
* UFSROOTINO specially.
*/
if (request == 0)
return (0);
/*
* Doesn't do wrapping, since we know we started at
* the smallest inode.
*/
break;
return (0);
/*
* In pass5, we'll calculate the bitmaps and counts all again from
* scratch and do a comparison, but for that to work the cg has
* to know what in-memory changes we've made to it. If we have
* trouble reading the cg, cg_sanity() should kick it out so
* we can skip explicit i/o error checking here.
*/
if (reply("REPAIR") == 0)
errexit("Program terminated.");
}
cgdirty();
/*
* Don't currently support IFATTRDIR or any of the other
* types, as they aren't needed.
*/
case IFDIR:
break;
case IFREG:
case IFLNK:
break;
default:
/*
* Pretend nothing ever happened. This clears the
* dirty flag, among other things.
*/
if (debug)
(void) printf("allocino: unknown type 0%o\n",
return (0);
}
/*
* We're allocating what should be a completely-unused inode,
* so make sure we don't inherit anything from any previous
* incarnations.
*/
return (0);
}
(void) time(&t);
n_files++;
inodirty();
return (ino);
}
/*
* Release some or all of the blocks of an inode.
* Only truncates down. Assumes new_length is appropriately aligned
* to a block boundary (or a directory block boundary, if it's a
* directory).
*
* If this is a directory, discard all of its contents first, so
* we don't create a bunch of orphans that would need another fsck
* run to clean up.
*
* Even if truncating to zero length, the inode remains allocated.
*/
void
{
if (isdir) {
/*
* Go with the parent we found by chasing references,
* if we've gotten that far. Otherwise, use what the
* directory itself claims. If there's no ``..'' entry
* in it, give up trying to get the link counts right.
*/
if (update == TI_NOPARENT) {
parent = -1;
} else {
} else {
if (parent != 0) {
/*
* Make sure that the claimed
* parent actually has a
* reference to us.
*/
CKI_TRAVERSE) & FOUND) == 0)
parent = 0;
}
}
}
if (parent > 0) {
&idesc);
goto no_parent_update;
}
/*
* Currently don't have a good way to
* handle this, so throw up our hands.
* However, we know that we can still
* do some good if we continue, so
* don't actually exit yet.
*
* We don't do it for attrdirs,
* because there aren't link counts
* between them and their parents.
*/
pwarn("Could not determine former parent of "
"inode %d, link counts are possibly\n"
"incorrect. Please rerun fsck(1M) to "
"correct this.\n",
ino);
iscorrupt = 1;
}
/*
* ...else if it's a directory with parent == -1, then
* we've not gotten far enough to know connectivity,
* and it'll get handled automatically later.
*/
}
inodirty();
/*
* This has to be done after ckinode(), so that all of
* the fragments get visited. Note that we assume we're
* always truncating to a block boundary, rather than a
* fragment boundary.
*/
/*
* Clear now-obsolete pointers.
*/
}
}
inodirty();
}
/*
* Release an inode's resources, then release the inode itself.
*/
void
{
int cg;
n_files--;
/*
* We need to make sure that the file is really a large file.
* Everything bigger than UFS_MAXOFFSET_T is treated as a file with
* negative size, which shall be cleared. (see verify_inode() in
* pass1.c)
*/
}
}
clearinode(dp);
inodirty();
/*
* Keep the disk in sync with us so that pass5 doesn't get
* upset about spurious inconsistencies.
*/
cgdirty();
sbdirty();
}
void
{
inp->i_blkssize);
}
/*
* Return the inode number in the ".." entry of the provided
* directory inode.
*/
static int
{
}
return (0);
}
/*
* Convenience wrapper around ckinode(findino()).
*/
int
{
}
return (0);
}
/*
* Marks inodes that are being orphaned and might need to be reconnected
* by pass4(). The inode we're traversing is the directory whose
* contents will be reconnected later. id_parent is the lfn at which
* to start looking at said contents.
*/
static int
{
return (KEEPON);
}
}
return (KEEPON);
}
static void
{
}
/*
* Clear the i_oeftflag/extended attribute pointer from INO.
*/
void
{
if (debug) {
if (dp->di_oeftflag == 0)
(void) printf("clearattref: no attr to clear on %d\n",
ino);
}
dp->di_oeftflag = 0;
inodirty();
}