ufs_dir.c revision d5dbd18d69de8954ab5ceb588e99d43fc9b21d46
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Directory manipulation routines.
*
* When manipulating directories, the i_rwlock provides serialization
* since directories cannot be mmapped. The i_contents lock is redundant.
*/
#include <sys/sysmacros.h>
/*
* This is required since we're using P2ROUNDUP_TYPED on DIRBLKSIZ
*/
#error "DIRBLKSIZ not a power of 2"
#endif
/*
* A virgin directory.
*/
static struct dirtemplate mastertemplate = {
0, 12, 1, ".",
};
#define MAX_DIR_NAME_LEN(len) \
/*
* The dnlc directory cache allows a 64 bit handle for directory entries.
* For ufs we squeeze both the 32 bit inumber and a 32 bit disk offset
* into the handle. Note, a 32 bit offset allows a 4GB directory, which
* is way beyond what could be cached in memory by the directory
* caching routines. So we are quite safe with this limit.
* The macros below pack and unpack the handle.
*/
/*
* The average size of a typical on disk directory entry is about 16 bytes
* and so defines AV_DIRECT_SHIFT : log2(16)
* This define is only used to approximate the number of entries
* is a directory. This is needed for dnlc_dir_start() which will immediately
* return an error if the value is not within its acceptable range of
* number of files in a directory.
*/
#define AV_DIRECT_SHIFT 4
/*
* If the directory size (from i_size) is greater than the ufs_min_dir_cache
* tunable then we request dnlc directory caching.
* This has found to be profitable after 1024 file names.
*/
#ifdef DEBUG
int dirchk = 1;
#else /* !DEBUG */
int dirchk = 0;
#endif /* DEBUG */
int ufs_negative_cache = 1;
static void dirbad();
static int ufs_dircheckforname();
static int ufs_dirrename();
static int ufs_diraddentry();
static int ufs_dirempty();
static int ufs_dirscan();
static int ufs_dirclrdotdot();
static int ufs_dirfixdotdot();
static int ufs_dirpurgedotdot();
static int dirprepareentry();
static int ufs_dirmakedirect();
static int dirbadname();
static int dirmangled();
/*
* Look for a given name in a directory. On successful return, *ipp
* will point to the VN_HELD inode.
*/
int
char *namep,
int skipdnlc) /* skip the 1st level dnlc */
{
int entryoffsetinblock; /* offset of ep in addr's buffer */
int numdirpasses; /* strategy for directory search */
int namlen; /* length of name */
int err;
int doingchk;
int i;
int caching;
/*
* Check accessibility of directory.
*/
return (ENOTDIR);
return (err);
/*
* Check the directory name lookup cache, first for individual files
* then for complete directories.
*/
/* vp is already held from dnlc_lookup */
if (vp == DNLC_NO_VNODE) {
return (ENOENT);
}
return (0);
}
/*
* Grab the reader lock on the directory data before checking
* the dnlc to avoid a race with ufs_dirremove() & friends.
*/
case DFOUND:
return (0);
}
/*
* release the lock on the dir we are searching
* to avoid a deadlock when grabbing the
* i_contents lock in ufs_iget_alloced().
*/
/*
* must recheck as we dropped dp->i_rwlock
*/
return (0);
}
/* check failed, read the actual directory */
if (!err) {
}
goto restart;
}
/* usual case of not "." nor ".." */
if (err) {
return (err);
}
return (0);
case DNOENT:
}
return (ENOENT);
default:
break;
}
doingchk = 0;
chkino = 0;
caching = 0;
/*
* Attempt to cache any directories greater than
* the tunable ufs_min_cache_dir.
*/
case DNOMEM:
case DTOOBIG:
dp->i_cachedir = 0;
break;
case DOK:
caching = 1;
break;
default:
break;
}
}
/*
* If caching we don't stop when the file has been
* found, but need to know later, so clear *ipp now
*/
if (caching) {
offset = 0;
entryoffsetinblock = 0;
numdirpasses = 1;
} else {
/*
* Take care to look at dp->i_diroff only once, as it
*/
offset = 0;
}
if (offset == 0) {
entryoffsetinblock = 0;
numdirpasses = 1;
} else {
if (entryoffsetinblock != 0) {
if (err)
goto bad;
}
numdirpasses = 2;
}
}
last_offset = 0;
/*
* If offset is on a block boundary,
* read the next directory block.
* Release previous if it exists.
*/
}
if (err)
goto bad;
entryoffsetinblock = 0;
}
/*
* If the offset to the next entry is invalid or if the
* next entry is a zero length record or if the record
* length is invalid, then skip to the next directory
* block. Complete validation checks are done if the
* record length is invalid.
*
* Full validation checks are slow so they are disabled
* by default. Complete checks can be run by patching
* "dirchk" to be true.
*
* We have to check the validity of entryoffsetinblock
* here because it can be set to i_diroff above.
*/
offset += i;
entryoffsetinblock += i;
if (caching) {
caching = 0;
}
continue;
}
/*
* Add named entries and free space into the directory cache
*/
if (caching) {
dp->i_cachedir = 0;
caching = 0;
}
} else {
/*
* entries hold the previous offset except the
* 1st which holds the offset + 1
*/
off2 = last_offset;
} else {
}
}
}
}
/*
* Check for a name match.
* We have the parent inode read locked with i_rwlock.
*/
/*
* We have to release the fbp early here to avoid
* a possible deadlock situation where we have the
* fbp and want the directory inode and someone doing
* a ufs_direnter_* has the directory inode and wants
* the fbp. XXX - is this still needed?
*/
/*
* Atomic update (read lock held)
*/
if (caching) {
caching = 0;
}
if (doingchk) {
/*
* if the inumber didn't change
* continue with already found inode.
*/
goto checkok;
else {
/* *ipp is nulled at restart */
goto restart;
}
}
/*
* release the lock on the dir we are searching
* to avoid a deadlock when grabbing the
* i_contents lock in ufs_iget_alloced().
*/
cr);
if (err)
goto bad;
/*
* Since we released the lock on the directory,
* we must check that the same inode is still
* the ".." entry for this directory.
*/
/*CSTYLED*/
/*
* Modification time changed on the
* directory, we must go check if
* the inumber changed for ".."
*/
doingchk = 1;
entryoffsetinblock = 0;
if (caching) {
/*
* Forget directory caching
* for this rare case
*/
caching = 0;
}
goto recheck;
}
if (caching) {
caching = 0;
}
} else {
cr);
if (err)
goto bad;
}
/*
* If we are not caching then just return the entry
* otherwise complete loading up the cache
*/
if (!caching) {
return (0);
}
if (err)
goto bad;
}
}
/*
* If we started in the middle of the directory and failed
* to find our target, we must check the beginning as well.
*/
if (numdirpasses == 2) {
numdirpasses--;
offset = 0;
goto searchloop;
}
/*
* If whole directory caching is on (or was originally on) then
* the entry may have been found.
*/
}
}
if (caching) {
caching = 0;
}
bad:
/*
* err and *ipp can both be set if we were attempting to
* cache the directory, and we found the entry, then later
* while trying to complete the directory cache encountered
* a error (eg reading a directory sector).
*/
}
if (fbp)
if (caching)
return (err);
}
/*
* If ufs_dircheckforname() fails to find an entry with the given name,
* this "slot" structure holds state for ufs_direnter_*() as to where
* there is space to put an entry with that name.
* If ufs_dircheckforname() finds an entry with the given name, this structure
* holds state for ufs_dirrename() and ufs_dirremove() as to where the
* entry is. "status" indicates what ufs_dircheckforname() found:
* NONE name not found, large enough free slot not found,
* FOUND name not found, large enough free slot found
* EXIST name found
* If ufs_dircheckforname() fails due to an error, this structure is not
* filled in.
*
* After ufs_dircheckforname() succeeds the values are:
* status offset size fbp, ep
* ------ ------ ---- -------
* NONE end of dir needed not valid
* FOUND start of entry of ent both valid if fbp != NULL
* EXIST start of entry of prev ent valid
*
* "endoff" is set to 0 if the an entry with the given name is found, or if no
* free slot could be found or made; this means that the directory should not
* be truncated. If the entry was found, the search terminates so
* ufs_dircheckforname() didn't find out where the last valid entry in the
* directory was, so it doesn't know where to cut the directory off; if no free
* slot could be found or made, the directory has to be extended to make room
* for the new entry, so there's nothing to cut off.
* Otherwise, "endoff" is set to the larger of the offset of the last
* non-empty entry in the directory, or the offset at which the new entry will
* be placed, whichever is larger. This is used by ufs_diraddentry(); if a new
* entry is to be added to the directory, any complete directory blocks at the
* end of the directory that contain no non-empty entries are lopped off the
* end, thus shrinking the directory dynamically.
*/
struct slot {
int size; /* size of area at slotoffset */
int cached; /* cached directory */
};
/*
* Write a new directory entry for DE_CREATE or DE_MKDIR operations.
*/
int
char *namep, /* name of entry */
int flags) /* no entry exists */
{
char *s;
int namlen; /* length of name */
int err; /* error number */
int do_rele_nip = 0; /* release nip */
return (EINVAL);
/* don't allow '/' characters in pathname component */
if (*s == '/')
return (EACCES);
/*
* If name is "." or ".." then if this is a create look it up
* and return EEXIST.
*/
if (namep[0] == '.' &&
/*
* ufs_dirlook will acquire the i_rwlock
*/
return (err);
}
return (EEXIST);
}
/*
* If target directory has not been removed, then we can consider
* allowing file to be created.
*/
return (ENOENT);
}
/*
* Check accessibility of directory.
*/
return (ENOTDIR);
}
/*
* Execute access is required to search the directory.
*/
return (err);
}
/*
* Search for the entry. Return VN_HELD tip if found.
*/
if (err)
goto out;
if (tip) {
} else {
/*
* The entry does not exist. Check write permission in
* directory to see if entry can be created.
*/
goto out;
/*
* Make new inode and directory entry.
*/
do_rele_nip = 1;
goto out;
}
/*
* Unmake the inode we just made.
*/
}
do_rele_nip = 1;
} else {
}
}
out:
/*
* Drop vfs_dqrwlock before calling VN_RELE() on nip to
* avoid deadlock since ufs_delete() grabs vfs_dqrwlock as reader.
*/
if (do_rele_nip) {
}
return (err);
}
/*
* Write a new directory entry for DE_LINK, DE_SYMLINK or DE_RENAME operations.
* If tvpp is non-null, return with the pointer to the target vnode.
*/
int
char *namep, /* name of entry */
{
char *s;
int namlen; /* length of name */
int err; /* error number */
/* don't allow '/' characters in pathname component */
if (*s == '/')
return (EACCES);
/*
* If name is "." or ".." then if this is a create look it up
* and return EEXIST. Rename or link TO "." or ".." is forbidden.
*/
if (namep[0] == '.' &&
return (EINVAL); /* *SIGH* should be ENOTEMPTY */
}
return (EEXIST);
}
/*
* For link and rename lock the source entry and check the link count
* to see if it has been removed while it was unlocked. If not, we
* increment the link count and force the inode to disk to make sure
* that it is there before any directory entry that points to it.
*
* In the case of a symbolic link, we are dealing with a new inode
* which does not yet have any links. We've created it with a link
* count of 1, and we don't want to increment it since this will be
* its first link.
*
* We are about to push the inode to disk. We make sure
* that the inode's data blocks are flushed first so the
* inode and it's data blocks are always in sync. This
* adds some robustness in in the event of a power failure
* or panic where sync fails. If we panic before the
* inode is updated, then the inode still refers to the
* old data blocks (or none for a new file). If we panic
* after the inode is updated, then the inode refers to
* the new data blocks.
*
* We do this before grabbing the i_contents lock because
* ufs_syncip() will want that lock. We could do the data
* syncing after the removal checks, but upon return from
* the data sync we would have to repeat the removal
* checks.
*/
return (err);
}
return (ENOENT);
}
return (EMLINK);
}
/*
* Sync the indirect blocks associated with the file
* for the same reasons as described above. Since this
* call wants the i_contents lock held for it we can do
* this here with no extra work.
*/
return (err);
}
if (op != DE_SYMLINK)
/*
* If target directory has not been removed, then we can consider
* allowing file to be created.
*/
goto out2;
}
/*
* Check accessibility of directory.
*/
goto out2;
}
/*
* Execute access is required to search the directory.
*/
goto out2;
}
/*
* Search for the entry. Return VN_HELD tip if found.
*/
if (err)
goto out;
if (tip) {
switch (op) {
case DE_RENAME:
break;
case DE_LINK:
case DE_SYMLINK:
/*
* Can't link to an existing file.
*/
break;
default:
break;
}
} else {
/*
* The entry does not exist. Check write permission in
* directory to see if entry can be created.
*/
goto out;
cr);
}
out:
/*
* Drop vfs_dqrwlock before calling VN_RELE() on tip to
* avoid deadlock since ufs_delete() grabs vfs_dqrwlock as reader.
*/
/*
* If we renamed a file over the top of an existing file,
* or linked a file to an existing file (or tried to),
* then set *tvpp to the target vnode, if tvpp is non-null
* otherwise, release and delete (or just release) the inode.
*
* N.B., by returning the target's vnode pointer to the caller,
* that caller becomes responsible for doing the VN_RELE.
*/
if (tip) {
} else {
}
}
out2:
if (err) {
/*
* Undo bumped link count.
*/
if (op != DE_SYMLINK) {
}
}
return (err);
}
/*
* Check for the existence of a name in a directory (unless noentry
* is set) , or else of an empty
* slot in which an entry may be made. If the requested name is found,
* then on return *ipp points at the inode and *offp contains
* its offset in the directory. If the name is not found, then *ipp
* will be NULL and *slotp will contain information about a directory slot in
* which an entry may be made (either an empty slot, or the first position
* past the end of the directory).
* The target directory inode (tdp) is supplied write locked (i_rwlock).
*
* This may not be used on "." or "..", but aliases of "." are ok.
*/
static int
char *namep, /* name we're checking for */
int namlen, /* length of name, excluding null */
int noentry) /* noentry - just look for space */
{
int entryoffsetinblk; /* offset of ep in fbp's buffer */
int i; /* length of mangled entry */
int needed;
int err;
int first;
int caching;
int stat;
/*
* First check if there is a complete cache of the directory.
*/
if (noentry) {
/*
* We know from the 1st level dnlc cache that the entry
* doesn't exist, so don't bother searching the directory
* cache, but just look for space (possibly in the directory
* cache).
*/
} else {
}
switch (stat) {
case DFOUND:
} else {
if (err)
return (err);
}
first = 0;
if (offset & 1) {
/* This is the first entry in the block */
first = 1;
offset -= 1;
}
if (err) {
return (err);
}
/*
* Check the validity of the entry.
* If it's bad, then throw away the cache and
* continue without it. The dirmangled() routine
* will then be called upon it.
*/
break;
}
/*
* Remember the returned offset is the offset of the
* preceding record (unless this is the 1st record
* in the DIRBLKSIZ sized block (disk sector)), then it's
* offset + 1. Note, no real offsets are on odd boundaries.
*/
if (first) {
} else {
/* get the next entry */
/*
* Check the validity of this entry as well
* If it's bad, then throw away the cache and
* continue without it. The dirmangled() routine
* will then be called upon it.
*/
break;
}
}
return (0);
case DNOENT:
/*
* The caller gets to set the initial slot status to
* indicate whether it's interested in getting a
* empty slot. For example, the status can be set
* to FOUND when an entry is being deleted.
*/
return (0);
}
&handle)) {
case DFOUND:
if (err) {
return (err);
}
/*
* Check the validity of the entry.
* If it's bad, then throw away the cache and
* continue without it. The dirmangled() routine
* will then be called upon it.
*/
break;
}
/*
* Remember the returned offset is the offset of the
* containing record.
*/
/*
* Set end offset to 0. Truncation is handled
* because the dnlc cache will blow away the
* cached directory when an entry is removed
* that drops the entries left to less than half
* the minumum number (dnlc_min_dir_cache).
*/
return (0);
case DNOENT:
return (0);
default:
break;
}
break;
}
/*
* Attempt to cache any directories greater than
* the tunable ufs_min_cache_dir.
*/
switch (dnlc_dir_start(dcap,
case DNOMEM:
case DTOOBIG:
tdp->i_cachedir = 0;
break;
case DOK:
caching = 1;
break;
default:
break;
}
}
}
/*
* No point in using i_diroff since we must search whole directory
*/
enduseful = 0;
offset = last_offset = 0;
entryoffsetinblk = 0;
/*
* If offset is on a block boundary,
* read the next directory block.
* Release previous if it exists.
*/
if (err) {
if (caching) {
}
return (err);
}
entryoffsetinblk = 0;
}
/*
* If still looking for a slot, and at a DIRBLKSIZ
* boundary, have to start looking for free space
* again.
*/
}
/*
* If the next entry is a zero length record or if the
* record length is invalid, then skip to the next
* directory block. Complete validation checks are
* done if the record length is invalid.
*
* Full validation checks are slow so they are disabled
* by default. Complete checks can be run by patching
* "dirchk" to be true.
*
* We do not have to check the validity of
* entryoffsetinblk here because it starts out as zero
* and is only incremented by d_reclen values that we
* validate here.
*/
offset += i;
entryoffsetinblk += i;
if (caching) {
caching = 0;
}
continue;
}
/*
* Add named entries and free space into the directory cache
*/
if (caching) {
caching = 0;
}
} else {
/*
* entries hold the previous offset if
* not the 1st one
*/
off2 = last_offset;
} else {
}
}
}
}
/*
* If an appropriate sized slot has not yet been found,
* check to see if one is available.
*/
if (size > 0) {
if (noentry) {
return (0);
}
}
}
}
/*
* Check for a name match.
*/
} else {
if (err) {
if (caching)
return (err);
}
}
if (caching)
return (0);
}
}
if (fbp) {
}
if (caching) {
return (0);
}
(void) dnlc_dir_rem_space_by_handle(dcap,
return (0);
}
}
/*
* We didn't find a slot; the new directory entry should be put
* at the end of the directory. Return an indication of where
* this is, and set "endoff" to zero; since we're going to have
* to extend the directory, we're certainly not going to
* truncate it.
*/
} else {
/*
* We found a slot, and will return an indication of where that
* slot is, as any new directory entry will be put there.
* Since that slot will become a useful entry, if the last
* useful entry we found was before this one, update the offset
* of the last useful entry.
*/
}
return (0);
}
/*
* Rename the entry in the directory tdp so that it points to
* sip instead of tip.
*/
static int
char *namep, /* entry we are trying to change */
{
int err;
int doingdirectory;
/*
* Short circuit rename of something to itself.
*/
return (ESAME); /* special KLUDGE error code */
}
/*
* We're locking 2 peer level locks, so must use tryenter
* on the 2nd to avoid deadlocks that would occur
* if we renamed a->b and b->a concurrently.
*/
/*
* drop tip and wait (sleep) until we stand a chance
* of holding sip
*/
/*
* Reverse the lock grabs in case we have heavy
* contention on the 2nd lock.
*/
goto retry;
}
}
/*
* Check that everything is on the same filesystem.
*/
goto out;
}
/*
* Must have write permission to rewrite target entry.
* Perform additional checks for sticky directories.
*/
goto out;
/*
* Ensure source and target are compatible (both directories
* or both not directories). If target is a directory it must
* be empty and have no links to it; in addition it must not
* be a mount point, and both the source and target must be
* writable.
*/
if (!doingdirectory) {
goto out;
}
/*
* vn_vfswlock will prevent mounts from using the directory
* until we are done.
*/
goto out;
}
goto out;
}
goto out;
}
} else if (doingdirectory) {
goto out;
}
/*
* Rewrite the inode pointer for target name entry
* from the target inode (ip) to the source inode (sip).
* This prevents the target entry from disappearing
* during a crash. Mark the directory inode to reflect the changes.
*/
} else {
}
}
if (err)
else
if (err) {
if (doingdirectory)
goto out;
}
/*
* Decrement the link count of the target inode.
* Fix the ".." entry in sip to point to dp.
* This is done after the new entry is on the disk.
*/
if (doingdirectory) {
/*
* The entry for tip no longer exists so I can unlock the
* vfslock.
*/
/*
* Decrement target link count once more if it was a directory.
*/
"ufs_dirrename: target directory link count != 0 (%s)",
return (err);
}
/*
* Renaming a directory with the parent different
* requires that ".." be rewritten. The window is
* still there for ".." to be inconsistent, but this
* is unavoidable, and a lot shorter than when it was
* done in a user process. We decrement the link
* count in the new parent as appropriate to reflect
* the just-removed target. If the parent is the
* same, this is appropriate since the original
* directory is going away. If the new parent is
* different, ufs_dirfixdotdot() will bump the link count
* back.
*/
return (err);
}
} else
out:
return (err);
}
/*
* Fix the ".." entry of the child directory so that it points
* to the new parent directory instead of the old one. Routine
* assumes that dp is a directory and that all the inodes are on
* the same file system.
*/
static int
{
struct dirtemplate *dirp;
int err;
/*
* We hold the child directory's i_contents lock before calling
* blkatoff so that we honor correct locking protocol which is
* i_contents lock and then page lock. (blkatoff will call
* ufs_getpage where we want the page lock)
* We hold the child directory's i_rwlock before i_contents (as
* per the locking protocol) since we are modifying the ".." entry
* of the child directory.
* We hold the i_rwlock and i_contents lock until we record
* this directory delta to the log (via ufs_trans_dir) and have
* done fbrelse.
*/
if (err)
goto bad;
goto bad;
}
goto bad;
}
/*
* Increment the link count in the new parent inode and force it out.
*/
goto bad;
}
/*
* Rewrite the child ".." entry and force it out.
*/
if (err)
else
if (err)
goto bad;
/*
* Decrement the link count of the old parent inode and force it out.
*/
return (0);
bad:
if (fbp)
return (err);
}
/*
* Enter the file sip in the directory tdp with name namep.
*/
static int
char *namep,
int namlen,
{
int err;
/*
* Prepare a new entry. If the caller has not supplied an
* existing inode, make a new one.
*/
if (err) {
}
return (err);
}
/*
* Check inode to be linked to see if it is in the
* same filesystem.
*/
goto bad;
}
/*
* If renaming a directory then fix up the ".." entry in the
* directory to point to the new parent.
*/
if (err)
goto bad;
}
/*
* Fill in entry data.
*/
/*
* Note the offset supplied for any named entry is
* the offset of the previous one, unless it's the 1st.
* slotp->size is used to pass the length to
* the previous entry.
*/
} else {
}
/*
* Add back any usable unused space to the dnlc directory
* cache.
*/
}
/* adjust the previous offset of the next entry */
/*
* Not a new block.
*
* Check the validity of the next entry.
* If it's bad, then throw away the cache, and
* continue as before directory caching.
*/
== DNOENT) {
}
}
}
/*
* Write out the directory block.
*/
if (err)
else
/*
* If this is a rename of a directory, then we have already
* fixed the ".." entry to refer to the new parent. If err
* is true at this point, we have failed to update the new
* parent to refer to the renamed directory.
* XXX - we need to unwind the ".." fix.
*/
if (err)
return (err);
/*
* Mark the directory inode to reflect the changes.
* Truncate the directory to chop off blocks of empty entries.
*/
/*
* If the directory grew then dirprepareentry() will have
* set IATTCHG in tdp->i_flag, then the directory inode must
* be flushed out. This is because if fsync() is used later
* the directory size must be correct, otherwise a crash would
* cause fsck to move the file to lost+found. Also because later
* a file may be linked in more than one directory, then there
* is no way to flush the original directory. So it must be
* flushed out on creation. See bug 4293809.
*/
}
cr);
}
}
return (0);
bad:
return (err);
}
/*
* Clear out entry prepared by dirprepareent.
*/
/*
* Don't touch err so we don't clobber the real error that got us here.
*/
else
return (err);
}
/*
* Prepare a directory slot to receive an entry.
*/
static int
{
int err;
/*
* If we didn't find a slot, then indicate that the
* new slot belongs at the end of the directory.
* If we found a slot, then the new entry can be
* put at slotp->offset.
*/
"dirprepareentry: bad fs_fsize, DIRBLKSIZ: %d"
" > dp->i_fs->fs_fsize: %d (%s)",
return (err);
}
/*
* Allocate the new block.
*/
if (err) {
return (err);
}
/*
* Adjust directory size, if needed. This should never
* push the size past a new multiple of DIRBLKSIZ.
* This is an artifact of the old (4.2BSD) way of initializing
* directory sizes to be less than DIRBLKSIZ.
*/
}
/*
* Get the block containing the space for the new directory entry.
*/
if (err) {
return (err);
}
}
switch (status) {
case NONE:
/*
* No space in the directory. slotp->offset will be on a
* directory block boundary and we will write the new entry
* into a fresh block.
*/
break;
case FOUND:
/*
* An entry of the required size has been found. Use it.
*/
/* this is the 1st record in a block */
} else {
}
break;
default:
break;
}
return (0);
}
/*
* Allocate and initialize a new inode that will go into directory tdp.
* This routine is called from ufs_symlink(), as well as within this file.
*/
int
{
int imode; /* mode and format as in inode */
int err;
op == DE_SYMLINK);
/*
* Allocate a new inode.
*/
} else {
}
if (op == DE_ATTRDIR)
else
if (err)
return (err);
/*
* We don't need to grab vfs_dqrwlock here because it is held
* in ufs_direnter_*() above us.
*/
"ufs_dirmakeinode, ip->i_dquot != NULL: dquot (%s)",
return (err);
}
/*
* Don't allow a special file to be created with a
* dev_t that cannot be represented by this filesystem
* format on disk.
*/
goto fail;
}
} else {
}
}
} else {
}
if (op == DE_ATTRDIR) {
} else
/*
* To determine the group-id of the created file:
* 1) If the gid is set in the attribute list (non-Sun & pre-4.0
* clients are not likely to set the gid), then use it if
* the process is privileged, belongs to the target group,
* or the group is the same as the parent directory.
* 2) If the filesystem was not mounted with the Old-BSD-compatible
* GRPID option, and the directory's set-gid bit is clear,
* then use the process's gid.
* 3) Otherwise, set the group-id to the gid of the parent directory.
*/
secpolicy_vnode_create_gid(cr) == 0)) {
/*
* XXX - is this only the case when a 4.0 NFS client, or a
* client derived from that code, makes a call over the wire?
*/
} else
/*
* For SunOS 5.0->5.4, the lines below read:
*
* ip->i_suid = (ip->i_uid > MAXUID) ? UID_LONG : ip->i_uid;
* ip->i_sgid = (ip->i_gid > MAXUID) ? GID_LONG : ip->i_gid;
*
* where MAXUID was set to 60002. See notes on this in ufs_inode.c
*/
/*
* If we're creating a directory, and the parent directory has the
* set-GID bit set, set it on the new directory.
* Otherwise, if the user is neither privileged nor a member of the
* file's new group, clear the file's set-GID bit.
*/
else {
}
goto fail;
}
/*
* Extended attribute directories are not subject to quotas.
*/
if (op != DE_ATTRDIR)
else
if (err)
goto fail;
}
/*
* generate the shadow inode and attach it to the new object
*/
if (err) {
/*
* clean up parent directory
*
* tdp->i_contents already locked from
* ufs_direnter_*()
*/
}
goto fail;
}
}
/*
* settings, then use them instead of using the current
* high resolution time.
*/
} else
gethrestime(&now);
/*
* In 2038, ctime sticks forever..
*/
} else {
}
} else
} else
/*
* If this is an attribute tag it as one.
*/
}
/*
* push inode before it's name appears in a directory
*/
return (0);
fail:
/* Throw away inode we just allocated. */
return (err);
}
/*
* Write a prototype directory into the empty inode ip, whose parent is dp.
*/
static int
int attrdir,
{
struct dirtemplate *dirp;
int err;
/*
* Allocate space for the directory we're creating.
*/
if (err)
return (err);
"ufs_dirmakedirect: bad fs_fsize, DIRBLKSIZ: %d > dp->i_fs->fs_fsize: %d (%s)",
return (err);
}
/*
* Update the tdp link count and write out the change.
* This reflects the ".." entry we'll soon write.
*/
return (EMLINK);
if (attrdir == 0)
/*
* Initialize directory with "."
* and ".." from static template.
*
* Since the parent directory is locked, we don't have to
* worry about anything changing when we drop the write
* lock on (ip).
*
*/
if (err) {
goto fail;
}
/*
* Now initialize the directory we're creating
* with the "." and ".." entries.
*/
if (err) {
goto fail;
}
if (err) {
goto fail;
}
return (0);
fail:
if (attrdir == 0)
return (err);
}
/*
* Delete a directory entry. If oip is nonzero the entry is checked
* to make sure it still reflects oip.
*
* If vpp is non-null, return the ptr of the (held) vnode associated with
* the removed name. The caller is responsible for doing the VN_RELE().
*/
int
char *namep,
{
int namlen;
int err;
int mode;
if (namlen == 0)
/*
* return error when removing . and ..
*/
if (namep[0] == '.') {
if (namlen == 1)
return (EINVAL);
return (EEXIST); /* SIGH should be ENOTEMPTY */
}
}
/*
* Check accessibility of directory.
*/
return (ENOTDIR);
}
/*
* Execute access is required to search the directory.
* Access for write is interpreted as allowing
* deletion of files in the directory.
*/
return (err);
}
if (err)
goto out_novfs;
goto out_novfs;
}
goto out_novfs;
}
/*
* vn_vfswlock() prevents races between mount and rmdir.
*/
if (vn_vfswlock(vp)) {
goto out_novfs;
}
goto out;
}
/*
* If we are removing a directory, get a lock on it.
* Taking a writer lock prevents a parallel ufs_dirlook from
* incorrectly entering a negative cache vnode entry in the dnlc
* If the directory is empty, it will stay empty until
* we can remove it.
*/
/*
* It is possible that a thread in rename would have
* acquired this rwlock. To prevent a deadlock we
* do a rw_tryenter. If we fail to get the lock
* we drop all the locks we have acquired, wait
* for 2 ticks and reacquire the
* directory's (dp) i_rwlock and try again.
* If we dont drop dp's i_rwlock then we will panic
* with a "Deadlock: cycle in blocking chain"
* since in ufs_dircheckpath we want dp's i_rwlock.
* dp is guaranteed to exist since ufs_dirremove is
* called after a VN_HOLD(dp) has been done.
*/
delay(2);
goto retry;
}
}
/*
* Now check the restrictions that apply on sticky directories.
*/
goto out;
}
/*
* For rmdir(2), some special checks are required.
* (a) Don't remove any alias of the parent (e.g. ".").
* (b) Don't remove the current directory.
* (c) Make sure the entry is (still) a directory.
* (d) Make sure the directory is empty.
*/
}
if (err) {
goto out;
}
/*
* unlink(2) requires a different check: allow only
* privileged users to unlink a directory.
*/
goto out;
}
}
/*
* Remove the cache'd entry, if any.
*/
}
/*
* Collapse new free space into previous entry.
* Note, the previous entry has already been
* validated in ufs_dircheckforname().
*/
goto nocache;
}
} else {
}
(void) dnlc_dir_rem_space_by_handle(dcap,
}
/* adjust the previous pointer in the next entry */
/*
* Not a new block.
*
* Check the validity of the entry.
* If it's bad, then throw away the cache and
* continue.
*/
}
}
} else {
}
} else {
/*
* If the entry isn't the first in the directory, we must
* reclaim the space of the now empty record by adding
* the record size to the size of the previous entry.
*/
/*
* Collapse new free space into previous entry.
*/
}
}
if (err)
else
/*
* If we were removing a directory, it is 'gone' now, but we cannot
* unlock it as a thread may be waiting for the lock in ufs_create. If
* we did, it could then create a file in a deleted directory.
*/
if (err) {
goto out;
}
/*
* Now dispose of the inode.
*/
/*
* This is not done for IFATTRDIR's because they don't
* have entries in the dnlc and the link counts are
* not incremented when they are created.
*/
/*
* Decrement by 2 because we're trashing the "."
* entry as well as removing the entry in dp.
* Clear the directory entry, but there may be
* other hard links so don't free the inode.
* Decrement the dp linkcount because we're
* trashing the ".." entry.
*/
/*
* XXX need to discard negative cache entries
* for vp. See comment in ufs_delete().
*/
/*
* The return value is ignored here bacause if
* the directory purge fails we don't want to
* stop the delete. If ufs_dirpurgedotdot fails
* the delete will continue with the preexiting
* behavior.
*/
} else {
}
}
out:
}
/*
* If no error and vpp is non-NULL, return the vnode ptr to the caller.
* The caller becomes responsible for the VN_RELE(). Otherwise,
* Release (and delete) the inode after we drop vfs_dqrwlock to
* avoid deadlock since ufs_delete() grabs vfs_dqrwlock as reader.
*/
if (ip) {
} else {
}
}
return (err);
}
/*
* Return buffer with contents of block "offset"
* from the beginning of directory "ip". If "res"
* is non-zero, fill it in with a pointer to the
* remaining space in the directory.
*
*/
int
char **res,
{
int err;
if (err) {
return (err);
}
if (res)
return (0);
}
/*
* Do consistency checking:
* record length must be multiple of 4
* entry must fit in rest of its DIRBLKSIZ block
* record must be large enough to contain entry
* name is not longer than MAXNAMLEN
* name must be as long as advertised, and null terminated
* NOTE: record length must not be zero (should be checked previously).
* This routine is only called if dirchk is true.
* It would be nice to set the FSBAD flag in the super-block when
* this routine fails so that a fsck is forced on next reboot,
* but locking is a problem.
*/
static int
int entryoffsetinblock,
{
int i;
return (1);
}
return (0);
}
static void
{
}
static int
dirbadname(char *sp, int l)
{
while (l--) { /* check for nulls */
if (*sp++ == '\0') {
return (1);
}
}
return (*sp); /* check for terminating null */
}
/*
* Check if a directory is empty or not.
*/
static int
{
}
/*
* clear the .. directory entry.
*/
static int
{
}
/*
* Scan the directoy. If clr_dotdot is true clear the ..
* directory else check to see if the directory is empty.
*
* Using a struct dirtemplate here is not precisely
* what we want, but better than using a struct direct.
*
* clr_dotdot is used as a flag to tell us if we need
* to clear the dotdot entry
*
* N.B.: does not handle corrupted directories.
*/
static int
int clr_dotdot)
{
struct dirtemplate dbuf;
/*
* Since we read MINDIRSIZ, residual must
* be 0 unless we're at end of file.
*/
empty = 0;
break;
}
/* skip empty entries */
continue;
/* accept only "." and ".." */
empty = 0;
break;
}
/*
* At this point d_namlen must be 1 or 2.
* 1 implies ".", 2 implies ".." if second
* char is also "."
*/
continue;
/*
* If we're doing a purge we need to check for
* the . and .. entries and clear the d_ino for ..
*
* if clr_dotdot is set ufs_dirscan does not
* check for an empty directory.
*/
if (clr_dotdot) {
/*
* Have to actually zap the ..
* entry in the directory, as
* otherwise someone might have
* dp as its cwd and try to
* open .., which now points to
* an unallocated inode.
*/
break;
} else {
continue;
}
}
empty = 0;
break;
}
return (empty);
}
/*
* Check if source directory inode is in the path of the target directory.
* Target is supplied locked.
*
* The source and target inode's should be different upon entry.
*/
int
{
struct dirtemplate *dirp;
int err;
goto out;
}
err = 0;
goto out;
}
/*
* Search back through the directory tree, using the ".." entries.
* Fail any attempt to move a directory into an ancestor directory.
*/
for (;;) {
if (err)
break;
break;
}
break;
}
if (dotdotino == source_ino) {
break;
}
if (dotdotino == UFSROOTINO)
break;
if (fbp) {
}
}
/*
* Race to get the inode.
*/
break;
}
/*
* If the directory of the source inode (also a directory)
* is the same as this next entry up the chain, then
* we know the source directory itself can't be in the
* chain. This also prevents a panic because we already
* have sdp->i_rwlock locked.
*/
break;
}
/*
* If someone has set the WRITE_WANTED bit in this lock and if
* this happens to be a sdp or tdp of another parallel rename
* which is executing the same code and in similar situation
* we end up in a 4 way deadlock. We need to make sure that
* the WRITE_WANTED bit is not set.
*/
/*
* If the lock held as WRITER thats fine but if it
* has WRITE_WANTED bit set we might end up in a
* deadlock. If WRITE_WANTED is set we return
* with EAGAIN else we just go back and try.
*/
if (fbp) {
}
return (err);
} else {
/*
* The lock is being write held. We could
* just do a rw_enter here but there is a
* window between the check and now, where
* the status could have changed, so to
* avoid looping we backoff and go back to
* try for the lock.
*/
goto retry_lock;
}
}
}
if (fbp) {
}
out:
if (ip) {
}
}
return (err);
}
int
{
struct dirtemplate dbuf;
/*
* Since we read MINDIRSIZ, residual must
* be 0 unless we're at end of file.
*/
empty = 0;
break;
}
/* skip empty entries */
continue;
/*
* At this point d_namlen must be 1 or 2.
* 1 implies ".", 2 implies ".." if second
* char is also "."
*/
continue;
continue;
}
empty = 0;
break;
}
return (empty);
}
/*
* Allocate and initialize a new shadow inode to contain extended attributes.
*/
int
int flags,
{
int err;
int retry = 1;
int issync;
int trans_size;
int dorwlock; /* 0 = not yet taken, */
/* 1 = taken outside the transaction, */
/* 2 = taken inside the transaction */
/*
* Validate permission to create attribute directory
*/
return (err);
}
return (EROFS);
/*
* No need to re-init err after again:, since it's set before
* the next use of it.
*/
dorwlock = 0;
} else {
}
if (err)
return (err);
/*
* Acquire i_rwlock before TRANS_BEGIN_CSYNC() if this is a file.
* This follows the protocol for read()/write().
*/
dorwlock = 1;
}
if (ulp) {
}
/*
* Acquire i_rwlock after TRANS_BEGIN_CSYNC() if this is a directory.
* This follows the protocol established by
*/
if (dorwlock == 0) {
dorwlock = 2;
}
/*
* Suppress out of inodes messages if we will retry.
*/
if (retry)
if (err)
goto fail;
if (flags) {
/*
* Now attach it to src file.
*/
}
if (dorwlock == 2)
if (ulp) {
int terr = 0;
if (err == 0)
}
if (dorwlock == 1)
return (err);
fail:
if (dorwlock == 2)
if (ulp) {
}
if (dorwlock == 1)
/*
* No inodes? See if any are tied up in pending deletions.
* This has to be done outside of any of the above, because
* the draining operation can't be done from inside a transaction.
*/
retry = 0;
goto again;
}
return (err);
}
/*
* clear the dotdot directory entry.
* Used by ufs_dirscan when clr_dotdot
* flag is set and we're deleting a
* directory.
*/
static int
{
int err = 0;
if (err) {
return (err);
}
}
if (err) {
} else {
}
}
} else {
err = -1;
}
return (err);
}