dir.c revision b9a41fd39fb451c441a90e8959cb2dc2db84b497
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms are permitted
* provided that: (1) source distributions retain this entire copyright
* notice and comment, and (2) distributions including binaries display
* the following acknowledgement: ``This product includes software
* developed by the University of California, Berkeley and its contributors''
* in the documentation or other materials provided with the distribution
* and in all advertising materials mentioning features or use of this
* software. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/param.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/mntent.h>
#include <string.h>
#include <stdarg.h>
#include <sys/fs/ufs_fs.h>
#include <sys/vnode.h>
#include <sys/fs/ufs_inode.h>
#define _KERNEL
#include <sys/fs/ufs_fsdir.h>
#undef _KERNEL
#include "fsck.h"
struct rc_queue {
struct rc_queue *rc_next;
fsck_ino_t rc_orphan;
fsck_ino_t rc_parent;
caddr_t rc_name;
};
caddr_t lfname = "lost+found"; /* name to use for l+f dir */
static int lfmode = 01700; /* mode to use when creating l+f dir */
static struct dirtemplate emptydir = { 0, DIRBLKSIZ };
static struct dirtemplate dirhead = {
0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".."
};
static void lftempname(char *, fsck_ino_t);
static int do_reconnect(fsck_ino_t, fsck_ino_t, caddr_t);
static caddr_t mkuniqname(caddr_t, caddr_t, fsck_ino_t, fsck_ino_t);
static int chgino(struct inodesc *);
static int dircheck(struct inodesc *, struct direct *);
static int expanddir(fsck_ino_t, char *);
static void freedir(fsck_ino_t, fsck_ino_t);
static struct direct *fsck_readdir(struct inodesc *);
static struct bufarea *getdirblk(daddr32_t, size_t);
static int mkentry(struct inodesc *);
static fsck_ino_t newdir(fsck_ino_t, fsck_ino_t, int, caddr_t);
static fsck_ino_t reallocdir(fsck_ino_t, fsck_ino_t, int, caddr_t);
/*
* Propagate connected state through the tree.
*/
void
propagate(void)
{
struct inoinfo **inpp, *inp;
struct inoinfo **inpend;
int change, inorphan;
inpend = &inpsort[inplast];
do {
change = 0;
for (inpp = inpsort; inpp < inpend; inpp++) {
inp = *inpp;
if (inp->i_parent == 0)
continue;
if (statemap[inp->i_parent] == DFOUND &&
INO_IS_DUNFOUND(inp->i_number)) {
inorphan = statemap[inp->i_number] & INORPHAN;
statemap[inp->i_number] = DFOUND | inorphan;
change++;
}
}
} while (change > 0);
}
/*
* Scan each entry in a directory block.
*/
int
dirscan(struct inodesc *idesc)
{
struct direct *dp;
struct bufarea *bp;
uint_t dsize, n;
size_t blksiz;
union { /* keep lint happy about alignment */
char dbuf[DIRBLKSIZ];
struct direct dir;
} u;
if (idesc->id_type != DATA)
errexit("wrong type to dirscan %d\n", idesc->id_type);
if (idesc->id_entryno == 0 &&
(idesc->id_filesize & (DIRBLKSIZ - 1)) != 0)
idesc->id_filesize = roundup(idesc->id_filesize, DIRBLKSIZ);
blksiz = idesc->id_numfrags * sblock.fs_fsize;
if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
idesc->id_filesize -= (offset_t)blksiz;
return (SKIP);
}
idesc->id_loc = 0;
for (dp = fsck_readdir(idesc); dp != NULL; dp = fsck_readdir(idesc)) {
/*
* If we were just passed a corrupt directory entry with
* d_reclen > DIRBLKSIZ, we don't want to memmove() all over
* our stack. This directory gets cleaned up later.
*/
dsize = MIN(dp->d_reclen, sizeof (u.dbuf));
(void) memmove((void *)u.dbuf, (void *)dp, (size_t)dsize);
idesc->id_dirp = &u.dir;
if ((n = (*idesc->id_func)(idesc)) & ALTERED) {
/*
* We can ignore errors from getdirblk() here,
* as the block is still in memory thanks to
* buffering and fsck_readdir(). If there was
* an error reading it before, then all decisions
* leading to getting us here were based on the
* resulting zeros. As such, we have nothing
* to worry about at this point.
*/
bp = getdirblk(idesc->id_blkno, blksiz);
(void) memmove((void *)(bp->b_un.b_buf +
idesc->id_loc - dsize),
(void *)u.dbuf, (size_t)dsize);
dirty(bp);
sbdirty();
}
if (n & STOP)
return (n);
}
return (idesc->id_filesize > 0 ? KEEPON : STOP);
}
/*
* Get current entry in a directory (and peek at the next entry).
*/
static struct direct *
fsck_readdir(struct inodesc *idesc)
{
struct direct *dp, *ndp = 0;
struct bufarea *bp;
ushort_t size; /* of directory entry */
size_t blksiz;
int dofixret;
int salvaged; /* when to report SALVAGED in preen mode */
int origloc = idesc->id_loc;
blksiz = idesc->id_numfrags * sblock.fs_fsize;
/*
* Sanity check id_filesize and id_loc fields. The latter
* has to be within the block we're looking at, as well as
* aligned to a four-byte boundary. The alignment is due to
* a struct direct containing four-byte integers. It's
* unfortunate that the four is a magic number, but there's
* really no good way to derive it from the ufs header files.
*/
if ((idesc->id_filesize <= 0) || (idesc->id_loc >= blksiz) ||
((idesc->id_loc & 3) != 0))
return (NULL);
/*
* We don't have to worry about holes in the directory's
* block list, because that was checked for when the
* inode was first encountered during pass1. We never
* scan a directory until after we've vetted its block list.
*/
/*
* We can ignore errors from getdirblk() here, as dircheck()
* will reject any entries that would have been in the bad
* sectors (fsck_bread() fills in zeros on failures). The main
* reject keys are that d_reclen would be zero and/or that it
* is less than the minimal size of a directory entry. Since
* entries can't span sectors, there's no worry about having
* a good beginning in one sector and the rest in the next,
* where that second sector was unreadable and therefore
* replaced with zeros.
*/
bp = getdirblk(idesc->id_blkno, blksiz);
/* LINTED b_buf is aligned and id_loc was verified above */
dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
/*
* Check the current entry in the directory.
*/
if (dircheck(idesc, dp) == 0) {
/*
* If we are in here, then either the current directory
* entry is bad or the next directory entry is bad.
*/
next_is_bad:
/*
* Find the amount of space left to the end of the
* directory block for either directory entry.
*/
size = DIRBLKSIZ - (idesc->id_loc & (DIRBLKSIZ - 1));
/*
* Advance to the end of the directory block.
*/
idesc->id_loc += size;
idesc->id_filesize -= (offset_t)size;
/*
* Ask the question before we fix the in-core directory
* block because dofix() may reuse the buffer.
*/
salvaged = (idesc->id_fix == DONTKNOW);
dofixret = dofix(idesc, "DIRECTORY CORRUPTED");
/*
* If there was an error reading the block, then that
* same error can reasonably be expected to have occurred
* when it was read previously. As such, the decision
* to come here was based on the results of that partially-
* zerod block, and so anything we change should be
* based on it as well. Upshot: no need to check for
* errors here.
*/
bp = getdirblk(idesc->id_blkno, blksiz);
/* LINTED b_buf is aligned and id_loc/origloc was verified */
dp = (struct direct *)(bp->b_un.b_buf + origloc);
/*
* This is the current directory entry and since it is
* corrupt we cannot trust the rest of the directory
* block so change the current directory entry to
* contain nothing and encompass the rest of the block.
*/
if (ndp == NULL) {
dp->d_reclen = size;
dp->d_ino = 0;
dp->d_namlen = 0;
dp->d_name[0] = '\0';
}
/*
* This is the next directory entry, i.e., we got here
* via a "goto next_is_bad". That directory entry is
* corrupt. However, the current directory entry is okay
* so if we are in fix mode, just extend its record size
* to encompass the rest of the block.
*/
else if (dofixret) {
dp->d_reclen += size;
}
/*
* If the user said to fix the directory corruption, then
* mark the block as dirty. Otherwise, our "repairs" only
* apply to the in-core copy so we don't hand back trash
* to the caller.
*
* Note: It is possible that saying "no" to a change in
* one part of the I/O buffer and "yes" to a later change
* in the same I/O buffer may still flush the change to
* which we said "no". This is the pathological case and
* no fix is planned at this time.
*/
if (dofixret) {
dirty(bp);
if (preen && salvaged)
(void) printf(" (SALVAGED)\n");
if (idesc->id_number == lfdir)
lfdir = 0;
}
/*
* dp points into bp, which will get re-used at some
* arbitrary time in the future. We rely on the fact
* that we're singled-threaded, and that we'll be done
* with this directory entry by the time the next one
* is needed.
*/
return (dp);
}
/*
* The current directory entry checked out so advance past it.
*/
idesc->id_loc += dp->d_reclen;
idesc->id_filesize -= (offset_t)dp->d_reclen;
/*
* If we are not at the directory block boundary, then peek
* at the next directory entry and if it is bad we can add
* its space to the current directory entry (compression).
* Again, we sanity check the id_loc and id_filesize fields
* since we modified them above.
*/
if ((idesc->id_loc & (DIRBLKSIZ - 1)) && /* not at start */
(idesc->id_loc < blksiz) && /* within block */
((idesc->id_loc & 3) == 0) && /* properly aligned */
(idesc->id_filesize > 0)) { /* data follows */
/* LINTED b_buf is aligned and id_loc verified to be ok */
ndp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
if (dircheck(idesc, ndp) == 0)
goto next_is_bad;
}
/*
* See comment above about dp pointing into bp.
*/
return (dp);
}
/*
* Verify that a directory entry is valid.
* This is a superset of the checks made in the kernel.
*/
static int
dircheck(struct inodesc *idesc, struct direct *dp)
{
size_t size;
char *cp;
int spaceleft;
/*
* Recall that id_filesize is the number of bytes left to
* process in the directory. We check id_filesize >= size
* instead of id_filesize >= d_reclen because all that the
* directory is actually required to contain is the entry
* itself (and it's how the kernel does the allocation).
*
* We indirectly check for d_reclen going past the end of
* the allocated space by comparing it against spaceleft.
*/
size = DIRSIZ(dp);
spaceleft = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ);
if (dp->d_ino < maxino &&
dp->d_reclen != 0 &&
(int)dp->d_reclen <= spaceleft &&
(dp->d_reclen & 0x3) == 0 &&
(int)dp->d_reclen >= size &&
idesc->id_filesize >= (offset_t)size &&
dp->d_namlen <= MAXNAMLEN) {
if (dp->d_ino == 0)
return (1);
for (cp = dp->d_name, size = 0; size < (size_t)dp->d_namlen;
size++, cp++)
if ((*cp == '\0') || (*cp == '/'))
goto bad;
if (*cp == '\0')
return (1);
}
bad:
if (debug) {
(void) printf("Bad dir in inode %d at lbn %d, loc %d:\n",
idesc->id_number, idesc->id_lbn, idesc->id_loc);
(void) printf(" ino %d reclen %d namlen %d name `%s'\n",
dp->d_ino, dp->d_reclen, dp->d_namlen, dp->d_name);
}
return (0);
}
void
adjust(struct inodesc *idesc, int lcnt)
{
struct dinode *dp;
caddr_t flow;
int saveiscorrupt;
struct inodesc lcidesc;
dp = ginode(idesc->id_number);
if (dp->di_nlink == lcnt) {
/*
* If we have not hit any unresolved problems, are running
* in preen mode, and are on a file system using logging,
* then just toss any partially allocated files, as they are
* an expected occurrence.
*/
if (!iscorrupt && preen && islog) {
clri(idesc, "UNREF", CLRI_VERBOSE, CLRI_NOP_OK);
return;
} else {
/*
* The file system can be considered clean even if
* a file is not linked up, but is cleared. In
* other words, the kernel won't panic over it.
* Hence, iscorrupt should not be set when
* linkup is answered no, but clri is answered yes.
*
* If neither is answered yes, then we have a
* non-panic-inducing known corruption that the
* user needs to be reminded of when we exit.
*/
saveiscorrupt = iscorrupt;
if (linkup(idesc->id_number, (fsck_ino_t)0,
NULL) == 0) {
iscorrupt = saveiscorrupt;
clri(idesc, "UNREF", CLRI_QUIET, CLRI_NOP_OK);
if (statemap[idesc->id_number] != USTATE)
iscorrupt = 1;
return;
}
dp = ginode(idesc->id_number);
}
lcnt = lncntp[idesc->id_number];
}
/*
* It doesn't happen often, but it's possible to get a true
* excess of links (especially if a lot of directories got
* orphaned and reattached to lost+found). Instead of wrapping
* around, do something semi-useful (i.e., give progress towards
* a less-broken filesystem) when this happens.
*/
LINK_RANGE(flow, dp->di_nlink, -lcnt);
if (flow != NULL) {
LINK_CLEAR(flow, idesc->id_number, dp->di_mode, &lcidesc);
if (statemap[idesc->id_number] == USTATE)
return;
}
dp = ginode(idesc->id_number);
if (lcnt && dp->di_nlink != lcnt) {
pwarn("LINK COUNT %s",
file_id(idesc->id_number, dp->di_mode));
pinode(idesc->id_number);
dp = ginode(idesc->id_number);
(void) printf(" COUNT %d SHOULD BE %d",
dp->di_nlink, dp->di_nlink - lcnt);
/*
* Even lost+found is subject to this, as whenever
* we modify it, we update both the in-memory and
* on-disk counts. Thus, they should still be in
* sync.
*/
if (preen) {
if (lcnt < 0) {
(void) printf("\n");
if ((dp->di_mode & IFMT) == IFSHAD)
pwarn("LINK COUNT INCREASING");
else
pfatal("LINK COUNT INCREASING");
}
}
if (preen || reply("ADJUST") == 1) {
dp->di_nlink -= lcnt;
inodirty();
if (preen)
(void) printf(" (ADJUSTED)\n");
} else if (((dp->di_mode & IFMT) == IFDIR) ||
((dp->di_mode & IFMT) == IFATTRDIR)) {
/*
* File counts can be off relatively harmlessly,
* but a bad directory count can cause the
* kernel to lose its mind.
*/
iscorrupt = 1;
}
}
}
static int
mkentry(struct inodesc *idesc)
{
struct direct *dirp = idesc->id_dirp;
struct direct newent;
int newlen, oldlen;
newent.d_namlen = strlen(idesc->id_name);
newlen = DIRSIZ(&newent);
if (dirp->d_ino != 0)
oldlen = DIRSIZ(dirp);
else
oldlen = 0;
if ((int)dirp->d_reclen - oldlen < newlen)
return (KEEPON);
newent.d_reclen = dirp->d_reclen - (ushort_t)oldlen;
dirp->d_reclen = (ushort_t)oldlen;
/* LINTED dirp is aligned and DIRSIZ() forces oldlen to be aligned */
dirp = (struct direct *)(((char *)dirp) + oldlen);
dirp->d_ino = idesc->id_parent; /* ino to be entered is in id_parent */
dirp->d_reclen = newent.d_reclen;
dirp->d_namlen = newent.d_namlen;
(void) memmove(dirp->d_name, idesc->id_name,
(size_t)newent.d_namlen + 1);
return (ALTERED|STOP);
}
static int
chgino(struct inodesc *idesc)
{
struct direct *dirp = idesc->id_dirp;
if (memcmp(dirp->d_name, idesc->id_name,
(size_t)dirp->d_namlen + 1) != 0)
return (KEEPON);
dirp->d_ino = idesc->id_parent;
return (ALTERED|STOP);
}
int
linkup(fsck_ino_t orphan, fsck_ino_t parentdir, caddr_t name)
{
int rval;
struct dinode *dp;
int lostdir;
int lostshadow;
fsck_ino_t oldlfdir;
fsck_ino_t *intree;
struct inodesc idesc;
init_inodesc(&idesc);
dp = ginode(orphan);
lostdir = (((dp->di_mode & IFMT) == IFDIR) ||
((dp->di_mode & IFMT) == IFATTRDIR));
if (debug && lostdir && dp->di_nlink <= 0 && lncntp[orphan] == -1)
(void) printf(
"old fsck would have left inode %d for reclaim thread\n",
orphan);
lostshadow = (dp->di_mode & IFMT) == IFSHAD;
pwarn("UNREF %s ", file_id(orphan, dp->di_mode));
pinode(orphan);
if (lostshadow || (dp->di_size == 0 && dp->di_oeftflag == 0))
return (0);
if (!preen && (reply("RECONNECT") == 0))
goto noconnect;
if (lfdir == 0) {
dp = ginode(UFSROOTINO);
idesc.id_name = lfname;
idesc.id_type = DATA;
idesc.id_func = findino;
idesc.id_number = UFSROOTINO;
idesc.id_fix = DONTKNOW;
if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) != 0) {
lfdir = idesc.id_parent;
} else {
pwarn("NO %s DIRECTORY", lfname);
if (preen || reply("CREATE") == 1) {
lfdir = newdir(UFSROOTINO, (fsck_ino_t)0,
lfmode, lfname);
if (lfdir != 0) {
if (preen)
(void) printf(" (CREATED)\n");
else
(void) printf("\n");
statemap[lfdir] |= INFOUND;
/*
* XXX What if we allocate an inode
* that's already been scanned? Then
* we need to leave lnctnp[] alone.
*/
TRACK_LNCNTP(UFSROOTINO,
lncntp[UFSROOTINO]++);
}
}
}
if (lfdir == 0) {
pfatal("SORRY. CANNOT CREATE %s DIRECTORY\n", lfname);
pwarn("Could not reconnect inode %d\n", orphan);
goto noconnect;
} else {
/*
* We searched for it via the namespace, so by
* definition it's been found. We have to do this
* because it is possible that we're called before
* the full namespace mapping is complete (especially
* from pass 1, if it encounters a corrupt directory
* that has to be cleared).
*/
statemap[lfdir] |= INFOUND;
}
}
dp = ginode(lfdir);
if ((dp->di_mode & IFMT) != IFDIR) {
pfatal("%s IS NOT A DIRECTORY", lfname);
if (reply("REALLOCATE") == 0) {
iscorrupt = 1;
goto noconnect;
}
oldlfdir = lfdir;
lfdir = reallocdir(UFSROOTINO, (fsck_ino_t)0, lfmode, lfname);
if (lfdir == 0) {
iscorrupt = 1;
pfatal("SORRY. CANNOT CREATE %s DIRECTORY\n\n",
lfname);
goto noconnect;
}
inodirty();
statemap[lfdir] |= INFOUND;
freeino(oldlfdir, TI_PARENT);
}
if (statemap[lfdir] != DFOUND) {
/*
* Not a consistency problem of the sort that'll
* cause the kernel heartburn, so don't set iscorrupt.
*/
if (debug)
(void) printf("lfdir %d is in state 0x%x\n",
lfdir, (int)statemap[lfdir]);
lfdir = 0;
pfatal("SORRY. %s DIRECTORY DISAPPEARED\n\n", lfname);
pwarn("Could not reconnect inode %d\n", orphan);
goto noconnect;
}
rval = do_reconnect(orphan, parentdir, name);
return (rval);
/*
* Leaving things unconnected is harmless as far as trying to
* use the filesystem later, so don't set iscorrupt yet (it's
* just lost blocks and inodes, after all).
*
* Lost directories get noted for reporting after all checks
* are done - they may get cleared later.
*/
noconnect:
if (lostdir) {
intree = tsearch((void *)orphan, &limbo_dirs,
ino_t_cmp);
if (intree == NULL)
errexit("linkup: out of memory");
}
return (0);
}
/*
* Connect an orphaned inode to lost+found.
*
* Returns non-zero for success, zero for failure.
*/
static int
do_reconnect(fsck_ino_t orphan, fsck_ino_t parentdir, caddr_t name)
{
caddr_t flow_msg;
struct dinode *dp;
int lostdir;
mode_t mode;
fsck_ino_t *intree;
struct inodesc idesc;
dp = ginode(orphan);
mode = dp->di_mode & IFMT;
lostdir = (mode == IFDIR) || (mode == IFATTRDIR);
name = mkuniqname(name, lfname, lfdir, orphan);
if (name == NULL)
goto noconnect;
if (makeentry(lfdir, orphan, name) == 0) {
pfatal("SORRY. NO SPACE IN %s DIRECTORY\n", lfname);
pwarn("Could not reconnect inode %d\n", orphan);
goto noconnect;
}
dp = ginode(orphan);
LINK_RANGE(flow_msg, lncntp[orphan], -1);
if (flow_msg != NULL) {
LINK_CLEAR(flow_msg, orphan, dp->di_mode, &idesc);
if (statemap[orphan] == USTATE)
goto noconnect;
}
TRACK_LNCNTP(orphan, lncntp[orphan]--);
/*
* Make sure that anything we put into the normal namespace
* looks like it belongs there. Attributes can only be in
* attribute directories, not the normal directory lost+found.
*/
maybe_convert_attrdir_to_dir(orphan);
if (lostdir) {
/*
* Can't be creating a duplicate entry with makeentry(),
* because changeino() will succeed if ".." already
* exists.
*/
if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 &&
parentdir != (fsck_ino_t)-1)
(void) makeentry(orphan, lfdir, "..");
/*
* If we were half-detached, don't try to get
* inode 0 later on.
*/
if (parentdir == 0)
parentdir = -1;
/*
* Fix up link counts.
*
* XXX This section is getting pretty byzantine, espcially
* when combined with changeino()/chgino()'s link manipulation.
*/
LFDIR_LINK_RANGE_RVAL(flow_msg, lncntp[lfdir], 1, &idesc, 0);
TRACK_LNCNTP(lfdir, lncntp[lfdir]--);
pwarn("DIR I=%lu CONNECTED. ", (long)orphan);
reattached_dir = 1;
if (parentdir != (fsck_ino_t)-1) {
/*
* Have to clear the parent's reference. Otherwise,
* if it's an orphan, then we may clear this orphan
* in pass 4 even though we've reconnected it.
*
* We already have the reference count
* allowing for a parent link, so undo the
* adjustment done above. Otherwise we come
* out high by one.
*/
(void) printf("PARENT WAS I=%lu\n", (long)parentdir);
(void) cleardirentry(parentdir, orphan);
}
if (!preen)
(void) printf("\n");
} else if (preen) {
(void) printf(" (RECONNECTED)\n");
}
statemap[orphan] &= ~INDELAYD;
return (1);
/*
* Leaving things unconnected is harmless as far as trying to
* use the filesystem later, so don't set iscorrupt yet (it's
* just lost blocks and inodes, after all).
*
* Lost directories get noted for reporting after all checks
* are done - they may get cleared later.
*/
noconnect:
if (lostdir) {
intree = tsearch((void *)orphan, &limbo_dirs,
ino_t_cmp);
if (intree == NULL)
errexit("linkup: out of memory");
}
return (0);
}
/*
* fix an entry in a directory.
*/
int
changeino(fsck_ino_t dir, char *name, fsck_ino_t newnum)
{
struct inodesc idesc;
init_inodesc(&idesc);
idesc.id_type = DATA;
idesc.id_func = chgino;
idesc.id_number = dir;
idesc.id_fix = DONTKNOW;
idesc.id_name = name;
idesc.id_parent = newnum; /* new value for name */
return (ckinode(ginode(dir), &idesc, CKI_TRAVERSE));
}
/*
* make an entry in a directory
*/
int
makeentry(fsck_ino_t parent, fsck_ino_t ino, char *name)
{
int repeat;
struct dinode *dp;
struct inoinfo *iip;
struct inodesc idesc;
char pathbuf[MAXPATHLEN + 1];
if (parent < UFSROOTINO || parent >= maxino ||
ino < UFSROOTINO || ino >= maxino)
return (0);
init_inodesc(&idesc);
idesc.id_type = DATA;
idesc.id_func = mkentry;
idesc.id_number = parent;
idesc.id_parent = ino; /* this is the inode to enter */
idesc.id_fix = DONTKNOW;
idesc.id_name = name;
repeat = 0;
again:
dp = ginode(parent);
if ((dp->di_size % DIRBLKSIZ) != 0) {
dp->di_size = roundup(dp->di_size, DIRBLKSIZ);
inodirty();
iip = getinoinfo(ino);
if (iip != NULL)
iip->i_isize = dp->di_size;
}
if ((ckinode(dp, &idesc, CKI_TRAVERSE) & ALTERED) != 0) {
iip = getinoinfo(ino);
if (iip != NULL)
iip->i_isize = dp->di_size;
return (1);
}
if (repeat == 0) {
getpathname(pathbuf, parent, parent);
if (expanddir(parent, pathbuf) == 0)
return (0);
repeat = 1;
goto again;
}
return (0);
}
/*
* Attempt to expand the size of a directory
*/
static int
expanddir(fsck_ino_t ino, char *name)
{
struct bufarea *bpback, *bp[2];
daddr32_t nxtibn, nxtbn;
daddr32_t newblk[2];
struct dinode *dp;
char *cp;
int bc, f;
int n;
int allocIndir;
int frag2blks;
int lffragsz = 0;
int c = 0;
int retval = 0;
bp[0] = bp[1] = NULL;
dp = ginode(ino);
if (dp->di_size == 0) {
goto bail;
}
nxtbn = lblkno(&sblock, dp->di_size - 1) + 1;
/*
* Check that none of the nominally in-use direct block
* addresses for the directory are bogus.
*/
for (bc = 0; ((nxtbn > 0) && (bc < nxtbn) && (bc < NDADDR)); bc++) {
if (dp->di_db[bc] == 0) {
goto bail;
}
}
/*
* Determine our data block allocation needs. We always need to
* allocate at least one data block. We may need a second, the
* indirect block itself.
*/
allocIndir = 0;
nxtibn = -1;
n = 0;
if (nxtbn <= NDADDR) {
/*
* Still in direct blocks. Check for the unlikely
* case where the last block is a frag rather than
* a full block. This would only happen if someone had
* created a file in lost+found, and then that caused
* the dynamic directory shrinking capabilities of ufs
* to kick in.
*
* Note that we test nxtbn <= NDADDR, as it's the
* next block (i.e., one greater than the current/
* actual block being examined).
*/
lffragsz = dp->di_size % sblock.fs_bsize;
}
if (nxtbn >= NDADDR && !lffragsz) {
n = sblock.fs_bsize / sizeof (daddr32_t);
nxtibn = nxtbn - NDADDR;
/*
* Only go one level of indirection
*/
if (nxtibn >= n) {
goto bail;
}
/*
* First indirect block means we need to pick up
* the actual indirect pointer block as well.
*/
if (nxtibn == 0)
allocIndir++;
}
/*
* Allocate all the new blocks we need.
*/
if ((newblk[0] = allocblk(sblock.fs_frag)) == 0) {
goto bail;
}
c++;
if (allocIndir) {
if ((newblk[1] = allocblk(sblock.fs_frag)) == 0) {
goto bail;
}
c++;
}
/*
* Take care of the block that will hold new directory entries.
* This one is always allocated.
*/
bp[0] = getdirblk(newblk[0], (size_t)sblock.fs_bsize);
if (bp[0]->b_errs) {
goto bail;
}
if (lffragsz) {
/*
* Preserve the partially-populated existing directory.
*/
bpback = getdirblk(dp->di_db[nxtbn - 1],
(size_t)dblksize(&sblock, dp, nxtbn - 1));
if (!bpback->b_errs) {
(void) memmove(bp[0]->b_un.b_buf, bpback->b_un.b_buf,
(size_t)lffragsz);
}
}
/*
* Initialize the new fragments. lffragsz is zero if this
* is a completely-new block.
*/
for (cp = &(bp[0]->b_un.b_buf[lffragsz]);
cp < &(bp[0]->b_un.b_buf[sblock.fs_bsize]);
cp += DIRBLKSIZ) {
(void) memmove(cp, (char *)&emptydir,
sizeof (emptydir));
}
dirty(bp[0]);
/*
* If we allocated the indirect block, zero it out. Otherwise
* read it in if we're using one.
*/
if (allocIndir) {
bp[1] = getdatablk(newblk[1], (size_t)sblock.fs_bsize);
if (bp[1]->b_errs) {
goto bail;
}
(void) memset(bp[1]->b_un.b_buf, 0, sblock.fs_bsize);
dirty(bp[1]);
} else if (nxtibn >= 0) {
/* Check that the indirect block pointer looks okay */
if (dp->di_ib[0] == 0) {
goto bail;
}
bp[1] = getdatablk(dp->di_ib[0], (size_t)sblock.fs_bsize);
if (bp[1]->b_errs) {
goto bail;
}
for (bc = 0; ((bc < nxtibn) && (bc < n)); bc++) {
/* LINTED pointer cast alignment */
if (((daddr32_t *)bp[1]->b_un.b_buf)[bc] == 0) {
goto bail;
}
}
}
/*
* Since the filesystem's consistency isn't affected by
* whether or not we actually do the expansion, iscorrupt
* is left alone for any of the approval paths.
*/
pwarn("NO SPACE LEFT IN %s", name);
if (!preen && (reply("EXPAND") == 0))
goto bail;
/*
* Now that everything we need is gathered up and the
* necessary approvals acquired, we can make our provisional
* changes permanent.
*/
if (lffragsz) {
/*
* We've saved the data from the old end fragment(s) in
* our new block, so we can just swap the new one in.
* Make sure the size reflects the expansion of the
* final fragments/block.
*/
frag2blks = roundup(lffragsz, sblock.fs_fsize);
freeblk(ino, dp->di_db[nxtbn - 1],
frag2blks / sblock.fs_fsize);
frag2blks = btodb(frag2blks);
dp->di_size -= (u_offset_t)lffragsz;
dp->di_blocks = dp->di_blocks - frag2blks;
dp->di_db[nxtbn - 1] = newblk[0];
dp->di_size += (u_offset_t)sblock.fs_bsize;
dp->di_blocks += btodb(sblock.fs_bsize);
inodirty();
retval = 1;
goto done;
}
/*
* Full-block addition's much easier. It's just an append.
*/
dp->di_size += (u_offset_t)sblock.fs_bsize;
dp->di_blocks += btodb(sblock.fs_bsize);
if (allocIndir) {
dp->di_blocks += btodb(sblock.fs_bsize);
}
inodirty();
if (nxtibn < 0) {
/*
* Still in direct blocks
*/
dp->di_db[nxtbn] = newblk[0];
} else {
/*
* Last indirect is always going to point at the
* new directory buffer
*/
if (allocIndir)
dp->di_ib[0] = newblk[1];
/* LINTED pointer case alignment */
((daddr32_t *)bp[1]->b_un.b_buf)[nxtibn] = newblk[0];
dirty(bp[1]);
}
if (preen)
(void) printf(" (EXPANDED)\n");
retval = 1;
goto done;
bail:
for (f = 0; f < c; f++)
freeblk(ino, newblk[f], sblock.fs_frag);
done:
/*
* bp[0] is handled by the directory cache's auto-release.
*/
if (bp[1] != NULL)
brelse(bp[1]);
return (retval);
}
static fsck_ino_t
newdir(fsck_ino_t parent, fsck_ino_t request, int mode, caddr_t name)
{
fsck_ino_t dino;
char pname[BUFSIZ];
/*
* This function creates a new directory and populates it with
* "." and "..", then links to it as NAME in PARENT.
*/
dino = allocdir(parent, request, mode, 1);
if (dino != 0) {
getpathname(pname, parent, parent);
name = mkuniqname(name, pname, parent, dino);
/*
* We don't touch numdirs, because it's just a cache of
* what the filesystem claimed originally and is used
* to calculate hash keys.
*/
if (makeentry(parent, dino, name) == 0) {
freedir(dino, parent);
dino = 0;
}
}
return (dino);
}
/*
* Replace whatever NAME refers to in PARENT with a new directory.
* Note that if the old inode REQUEST is a directory, all of its
* contents will be freed and reaped.
*/
static fsck_ino_t
reallocdir(fsck_ino_t parent, fsck_ino_t request, int mode, caddr_t name)
{
int retval;
fsck_ino_t newino;
if ((request != 0) && (statemap[request] != USTATE))
freeino(request, TI_PARENT);
newino = allocdir(parent, request, mode, 0);
if (newino != 0) {
retval = changeino(parent, name, newino);
if ((retval & ALTERED) == 0) {
/*
* No change made, so name doesn't exist, so
* unwind allocation rather than leak it.
*/
freedir(newino, parent);
newino = 0;
}
}
return (newino);
}
/*
* allocate a new directory
*/
fsck_ino_t
allocdir(fsck_ino_t parent, fsck_ino_t request, int mode, int update_parent)
{
fsck_ino_t ino;
caddr_t cp;
caddr_t flow;
struct dinode *dp;
struct bufarea *bp;
struct inoinfo *inp;
struct inodesc idesc;
struct dirtemplate *dirp;
ino = allocino(request, IFDIR|mode);
if (ino == 0)
return (0);
dirp = &dirhead;
dirp->dot_ino = ino;
dirp->dotdot_ino = parent;
dp = ginode(ino);
bp = getdirblk(dp->di_db[0], (size_t)sblock.fs_fsize);
if (bp->b_errs) {
freeino(ino, TI_PARENT);
return (0);
}
(void) memmove(bp->b_un.b_buf, (void *)dirp,
sizeof (struct dirtemplate));
for (cp = &bp->b_un.b_buf[DIRBLKSIZ];
cp < &bp->b_un.b_buf[sblock.fs_fsize];
cp += DIRBLKSIZ)
(void) memmove(cp, (void *)&emptydir, sizeof (emptydir));
dirty(bp);
dp->di_nlink = 2;
inodirty();
if (!inocached(ino)) {
cacheino(dp, ino);
} else {
/*
* re-using an old directory inode
*/
inp = getinoinfo(ino);
if (inp == NULL) {
if (debug)
errexit("allocdir got NULL from getinoinfo "
"for existing entry I=%d\n",
ino);
cacheino(dp, ino);
} else {
init_inoinfo(inp, dp, ino);
inp->i_parent = parent;
inp->i_dotdot = parent;
}
}
/*
* Short-circuit all the dancing around below if it's the
* root inode. The net effect's the same.
*/
if (ino == UFSROOTINO) {
TRACK_LNCNTP(ino, lncntp[ino] = dp->di_nlink);
return (ino);
}
if (!update_parent)
return (ino);
/*
* We never create attribute directories, which can have
* non-directory parents. So, the parent of the directory
* we're creating must itself be a directory.
*/
if (!INO_IS_DVALID(parent)) {
freeino(ino, TI_PARENT);
return (0);
}
/*
* Make sure the parent can handle another link.
* Since we might only update one version of the
* count (disk versus in-memory), we have to check both.
*/
LINK_RANGE(flow, lncntp[parent], -1);
if (flow == NULL)
LINK_RANGE(flow, (int)dp->di_nlink, 1);
if (flow != NULL) {
LINK_CLEAR(flow, parent, dp->di_mode, &idesc);
if (statemap[parent] == USTATE) {
/*
* No parent any more, so bail out. Callers
* are expected to handle this possibility.
* Since most just throw up their hands if
* we return 0, this just happens to work.
*/
freeino(ino, TI_PARENT);
return (0);
}
}
/*
* We've created a directory with two entries, "." and "..",
* and a link count of two ("." and one from its parent). If
* the parent's not been scanned yet, which means this inode
* will get scanned later as well, then make our in-core count
* match what we pushed out to disk. Similarly, update the
* parent. On the other hand, if the parent's already been
* looked at (statemap[ino] == DFOUND), the discrepancy
* between lncntp[] and di_nlink will be noted later, with
* appropriate reporting and propagation, in pass2.
*
* We're explicitly skipping where the parent was DZLINK or
* DFOUND. If it has zero links, it can't be gotten to, so
* we want a discrepancy set up that will be caught in pass2.
* DFOUND was discussed above.
*
* Regarding the claim of a link from the parent: we've not
* done anything to create such a link here. We depend on the
* semantics of our callers attaching the inode we return to
* an existing entry in the directory or creating the entry
* themselves, but in either case, not modifying the link
* count.
*
* Note that setting lncntp[ino] to zero means that both claimed
* links have been ``found''.
*/
statemap[ino] = statemap[parent];
if (INO_IS_DVALID(parent)) {
TRACK_LNCNTP(ino, lncntp[ino] = 0);
TRACK_LNCNTP(parent, lncntp[parent]--);
}
dp = ginode(parent);
dp->di_nlink++;
inodirty();
return (ino);
}
/*
* free a directory inode
*/
static void
freedir(fsck_ino_t ino, fsck_ino_t parent)
{
struct inoinfo *iip;
if (ino != parent) {
/*
* Make sure that the desired parent gets a link
* count update from freeino()/truncino(). If
* we can't look it up, then it's not really a
* directory, so there's nothing to worry about.
*/
iip = getinoinfo(ino);
if (iip != NULL)
iip->i_parent = parent;
}
freeino(ino, TI_PARENT);
}
/*
* generate a temporary name for use in the lost+found directory.
*/
static void
lftempname(char *bufp, fsck_ino_t ino)
{
fsck_ino_t in;
caddr_t cp;
int namlen;
cp = bufp + 2;
for (in = maxino; in > 0; in /= 10)
cp++;
*--cp = '\0';
/* LINTED difference will not overflow an int */
namlen = cp - bufp;
if ((namlen > BUFSIZ) || (namlen > MAXPATHLEN)) {
errexit("buffer overflow in lftempname()\n");
}
in = ino;
while (cp > bufp) {
*--cp = (in % 10) + '0';
in /= 10;
}
*cp = '#';
}
/*
* Get a directory block.
* Insure that it is held until another is requested.
*
* Our callers are expected to check for errors and/or be
* prepared to handle blocks of zeros in the middle of a
* directory.
*/
static struct bufarea *
getdirblk(daddr32_t blkno, size_t size)
{
if (pdirbp != 0) {
brelse(pdirbp);
}
pdirbp = getdatablk(blkno, size);
return (pdirbp);
}
/*
* Create a unique name for INODE to be created in directory PARENT.
* Use NAME if it is provided (non-NULL) and doesn't already exist.
* Returning NULL indicates no unique name could be generated.
*
* If we were given a name, and it conflicts with an existing
* entry, use our usual temp name instead. Without this check,
* we could end up creating duplicate entries for multiple
* orphaned directories in lost+found with the same name (but
* different parents). Of course, our usual name might already
* be in use as well, so be paranoid.
*
* We could do something like keep tacking something onto the
* end of tempname until we come up with something that's not
* in use, but that has liabilities as well. This is a
* sufficiently rare case that it's not worth going that
* overboard for.
*/
static caddr_t
mkuniqname(caddr_t name, caddr_t pname, fsck_ino_t parent, fsck_ino_t inode)
{
fsck_ino_t oldino;
struct dinode *dp;
caddr_t flow_msg;
struct inodesc idesc;
static char tempname[BUFSIZ];
lftempname(tempname, inode);
if ((name != NULL) &&
(lookup_named_ino(parent, name) != 0)) {
name = NULL;
}
if (name == NULL) {
/*
* No name given, or it wasn't unique.
*/
name = tempname;
if ((oldino = lookup_named_ino(parent, name)) != 0) {
pfatal(
"Name ``%s'' for inode %d already exists in %s \n",
name, oldino, pname);
if (reply("REMOVE OLD ENTRY") == 0) {
if (parent == lfdir)
pwarn(
"Could not reconnect inode %d\n\n",
inode);
else
pwarn(
"Could not create entry for %d\n\n",
inode);
name = NULL;
goto noconnect;
}
(void) changeino(parent, name, inode);
LINK_RANGE(flow_msg, lncntp[oldino], 1);
if (flow_msg != NULL) {
/*
* Do a best-effort, but if we're not
* allowed to do the clear, the fs is
* corrupt in any case, so just carry on.
*/
dp = ginode(oldino);
LINK_CLEAR(flow_msg, oldino, dp->di_mode,
&idesc);
if (statemap[oldino] != USTATE)
iscorrupt = 1;
} else {
TRACK_LNCNTP(oldino, lncntp[oldino]++);
}
}
}
noconnect:
return (name);
}