zfs_dir.c revision afefc7e4027e02291d2f534f969cfd38b9dd1db3
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/sysmacros.h>
#include <sys/resource.h>
#include <sys/pathname.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_fuid.h>
#include <sys/extdirent.h>
/*
* zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups
* of names after deciding which is the appropriate lookup interface.
*/
static int
{
int error;
if (rpnp) {
}
if (exact)
/*
* In the non-mixed case we only expect there would ever
* be one match, but we need to use the normalizing lookup.
*/
} else {
}
return (error);
}
/*
* Lock a directory entry. A dirlock on <dzp, name> protects that name
* in dzp's directory zap object. As long as you hold a dirlock, you can
* assume two things: (1) dzp cannot be reaped, and (2) no other thread
* can change the zap entry for (i.e. link or unlink) this name.
*
* Input arguments:
* dzp - znode for directory
* name - name of entry to lock
* flag - ZNEW: if the entry already exists, fail with EEXIST.
* ZEXISTS: if the entry does not exist, fail with ENOENT.
* ZSHARED: allow concurrent access with other ZSHARED callers.
* ZXATTR: we want dzp's xattr directory
* ZCILOOK: On a mixed sensitivity file system,
* this lookup should be case-insensitive.
* ZCIEXACT: On a purely case-insensitive file system,
* this lookup should be case-sensitive.
* ZRENAMING: we are locking for renaming, force narrow locks
* ZHAVELOCK: Don't grab the z_name_lock for this call. The
* current thread already holds it.
*
* Output arguments:
* zpp - pointer to the znode for the entry (NULL if there isn't one)
* dlpp - pointer to the dirlock for this entry (NULL on error)
* direntflags - (case-insensitive lookup only)
* flags if multiple case-sensitive matches exist in directory
* realpnp - (case-insensitive lookup only)
* actual name matched within the directory
*
* Return value: 0 on success or errno on failure.
*
* NOTE: Always checks for, and rejects, '.' and '..'.
* NOTE: For case-insensitive file systems we take wide locks (see below),
* but return znode pointers to a single match.
*/
int
{
int error = 0;
int cmpflags;
/*
* Verify that we are not trying to lock '.', '..', or '.zfs'
*/
if (name[0] == '.' &&
return (EEXIST);
/*
* Case sensitivity and normalization preferences are set when
* the file system is created. These are stored in the
* zfsvfs->z_case and zfsvfs->z_norm fields. These choices
* affect what vnodes can be cached in the DNLC, how we
* perform zap lookups, and the "width" of our dirlocks.
*
* A normal dirlock locks a single name. Note that with
* normalization a name can be composed multiple ways, but
* when normalized, these names all compare equal. A wide
* dirlock locks multiple names. We need these when the file
* system is supporting mixed-mode access. It is sometimes
* necessary to lock all case permutations of file name at
* once so that simultaneous case-insensitive/case-sensitive
* behaves as rationally as possible.
*/
/*
* Decide if exact matches should be requested when performing
* a zap lookup on file systems supporting case-insensitive
* access.
*/
exact =
/*
* Only look in or update the DNLC if we are looking for the
* name on a file system that does not require normalization
* or case folding. We can also look there if we happen to be
* on a non-normalizing, mixed sensitivity file system IF we
* are looking for the exact name.
*
* Maybe can add TO-UPPERed version of name to dnlc in ci-only
* case for performance improvement?
*/
/*
* ZRENAMING indicates we are in a situation where we should
* take narrow locks regardless of the file system's
* preferences for normalizing and case folding. This will
* prevent us deadlocking trying to grab the same wide lock
* twice if the two names happen to be case-insensitive
* matches.
*/
cmpflags = 0;
else
/*
* Wait until there are no locks on this name.
*
* Don't grab the the lock if it is already held. However, cannot
* have both ZSHARED and ZHAVELOCK together.
*/
for (;;) {
if (dzp->z_unlinked) {
return (ENOENT);
}
break;
}
if (error != 0) {
return (ENOENT);
}
/*
* Allocate a new dirlock and add it to the list.
*/
dl->dl_sharecnt = 0;
dl->dl_namelock = 0;
dl->dl_namesize = 0;
break;
}
break;
}
/*
* If the z_name_lock was NOT held for this dirlock record it.
*/
/*
* We're the second shared reference to dl. Make a copy of
* dl_name in case the first thread goes away before we do.
* Note that we initialize the new name before storing its
* pointer into dl_name, because the first thread may load
* dl->dl_name at any time. He'll either see the old value,
* which is his, or the new shared copy; either is OK.
*/
}
/*
* We have a dirlock on the name. (Note that it is the dirlock,
* not the dzp's z_lock, that protects the name in the zap object.)
* See if there's an object by this name; if so, put a hold on it.
*/
} else {
if (update)
if (vp == DNLC_NO_VNODE) {
} else if (vp) {
return (EEXIST);
}
return (0);
} else {
}
}
if (error) {
return (error);
}
} else {
return (EEXIST);
}
if (error) {
return (error);
}
}
return (0);
}
/*
* Unlock this directory entry and wake anyone who was waiting for it.
*/
void
{
if (!dl->dl_namelock)
dl->dl_sharecnt--;
return;
}
if (dl->dl_namesize != 0)
}
/*
* Look up an entry in a directory.
*
* NOTE: '.' and '..' are handled as special cases because
* no directory entries are actually stored for them. If this is
* the root of a filesystem, then '.zfs' is also treated as a
* special pseudo-directory.
*/
int
{
int error = 0;
/*
* If we are a snapshot mounted under .zfs, return
* the vp for the snapshot directory.
*/
return (error);
}
if (error == 0)
} else {
int zf;
if (flags & FIGNORECASE)
if (error == 0) {
}
}
return (error);
}
/*
* unlinked Set (formerly known as the "delete queue") Error Handling
*
* When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
* don't specify the name of the entry that we will be manipulating. We
* also fib and say that we won't be adding any new entries to the
* unlinked set, even though we might (this is to lower the minimum file
* size that can be deleted in a full filesystem). So on the small
* chance that the nlink list is using a fat zap (ie. has more than
* 2000 entries), we *may* not pre-read a block that's needed.
* Therefore it is remotely possible for some of the assertions
* regarding the unlinked set below to fail due to i/o error. On a
* nondebug system, this will result in the space being leaked.
*/
void
{
VERIFY3U(0, ==,
}
/*
* Clean up any znodes that had no links when we either crashed or
* (force) umounted the file system.
*/
void
{
int error;
/*
* Interate over the contents of the unlinked set.
*/
zap_cursor_advance(&zc)) {
/*
* See what kind of object we have in list
*/
if (error != 0)
continue;
/*
* We need to re-mark these list entries for deletion,
* so we pull them back into core and set zp->z_unlinked.
*/
/*
* We may pick up znodes that are already marked for deletion.
* This could happen during the purge of an extended attribute
* directory. All we need to do is skip over them, since they
* are already in the system marked z_unlinked.
*/
if (error != 0)
continue;
}
}
/*
* Delete the entire contents of a directory. Return a count
* of the number of entries that could not be deleted. If we encounter
* an error, return a count of at least one so that the directory stays
* in the unlinked set.
*
* NOTE: this function assumes that the directory is inactive,
* so there is no need to lock its entries before deletion.
* Also, it assumes the directory contents is *only* regular
* files.
*/
static int
{
int skipped = 0;
int error;
zap_cursor_advance(&zc)) {
if (error) {
skipped += 1;
continue;
}
if (error) {
skipped += 1;
continue;
}
if (error)
skipped += 1;
}
skipped += 1;
return (skipped);
}
void
{
int error;
/*
* If this is an attribute directory, purge its contents.
*/
if (zfs_purgedir(zp) != 0) {
/*
* Not enough space to delete some xattrs.
* Leave it in the unlinked set.
*/
return;
}
}
/*
* Free up all the data in the file.
*/
if (error) {
/*
* Not enough space. Leave the file in the unlinked set.
*/
return;
}
/*
* If the file has extended attributes, we're going to unlink
* the xattr dir.
*/
}
/*
* Set up the final transaction.
*/
if (xzp) {
}
if (acl_obj)
if (error) {
/*
* Not enough space to delete the file. Leave it in the
* unlinked set, leaking it until the fs is remounted (at
* which point we'll call zfs_unlinked_drain() to process it).
*/
goto out;
}
if (xzp) {
}
/* Remove this znode from the unlinked set */
VERIFY3U(0, ==,
out:
if (xzp)
}
static uint64_t
{
return (de);
}
/*
* Link zp into dl. Can only fail if zp has been unlinked.
*/
int
{
int error;
return (ENOENT);
}
}
return (0);
}
/*
* Unlink zp from dl, and mark zp for deletion if this was the last link.
* Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
* If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
* If it's non-NULL, we use it to indicate whether the znode needs deletion,
* and it's the caller's job to do it.
*/
int
{
int error;
return (EBUSY);
return (EBUSY);
}
return (EEXIST);
}
zfs_panic_recover("zfs: link count on %s is %u, "
"should be at least %u",
zp_is_dir + 1);
}
} else {
}
}
else
} else {
}
else if (unlinked)
return (0);
}
/*
* Indicate whether the directory is empty. Works with or without z_lock
* held, but can only be consider a hint in the latter case. Returns true
* if only "." and ".." remain and there's no work in progress.
*/
{
}
int
{
int error;
return (error);
&acl_ids)) != 0)
return (error);
return (EDQUOT);
}
if (fuid_dirtied)
if (error) {
return (error);
}
if (fuid_dirtied)
return (0);
}
/*
* Return a znode for the extended attribute directory for zp.
* ** If the directory does not already exist, it is created **
*
* IN: zp - znode to obtain attribute directory from
* cr - credentials of caller
* flags - flags from the VOP_LOOKUP call
*
* OUT: xzpp - pointer to extended attribute znode
*
* RETURN: 0 on success
* error number on failure
*/
int
{
int error;
top:
if (error)
return (error);
return (0);
}
if (!(flags & CREATE_XATTR_DIR)) {
return (ENOENT);
}
return (EROFS);
}
/*
* The ability to 'create' files in an attribute
* directory comes from the write_xattr permission on the base file.
*
* The ability to 'search' an attribute directory requires
* read_xattr permission on the base file.
*
* is controlled by the permissions on the attribute file.
*/
/* NB: we already did dmu_tx_wait() if necessary */
goto top;
}
return (error);
}
/*
* Decide whether it is okay to remove within a sticky directory.
*
* In sticky directories, write access is not sufficient;
* you can remove entries from a directory only if:
*
* you own the directory,
* you own the entry,
* the entry is a plain file and you have write access,
* or you are privileged (checked in secpolicy...).
*
* The function returns 0 if remove access is granted.
*/
int
{
return (0);
return (0);
return (0);
else
return (secpolicy_vnode_remove(cr));
}