zfs_vfsops.c revision 4ccbb6e737373468bb9dc1709618384cce4c9f92
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/pathname.h>
#include <sys/vfs_opreg.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_i18n.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_deleg.h>
#include <sys/zfs_ioctl.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_fuid.h>
#include <sys/bootconf.h>
#include <sys/dmu_objset.h>
int zfsfstype;
static kmutex_t zfs_dev_mtx;
static const fs_operation_def_t zfs_vfsops_template[] = {
};
static const fs_operation_def_t zfs_vfsops_eio_template[] = {
};
/*
* We need to keep a count of active fs's.
* This is necessary to prevent our module
* from being unloaded after a umount -f
*/
static uint32_t zfs_active_fs_count = 0;
/*
* MO_DEFAULT is not used since the default value is determined
* by the equivalent property.
*/
};
static mntopts_t zfs_mntopts = {
};
/*ARGSUSED*/
int
{
/*
* Data integrity is job one. We don't want a compromised kernel
* writing to the storage pool, so we never sync during panic.
*/
if (panicstr)
return (0);
/*
* SYNC_ATTR is used by fsflush() to force old filesystems like UFS
* to sync metadata, which they would otherwise cache indefinitely.
* Semantically, the only requirement is that the sync be initiated.
* The DMU syncs out txgs frequently, so there's nothing to do.
*/
return (0);
/*
* Sync a specific filesystem.
*/
else
} else {
/*
* Sync all ZFS filesystems. This is what happens when you
* run sync(1M). Unlike other filesystems, ZFS honors the
* request by waiting for all pools to commit all dirty data.
*/
}
return (0);
}
static int
{
do {
do {
/*
* If we're still using the real major
* number space. If we're using a getudev()'ed
* major number, we can use all of its minors.
*/
else
zfs_minor = 0;
} else {
zfs_minor++;
}
/*
* We are using all ~262,000 minor numbers for the
* current major number. Create a new major number.
*/
"zfs_mount: Can't get unique major "
"device number.");
return (-1);
}
zfs_minor = 0;
} else {
break;
}
/* CONSTANTCONDITION */
} while (1);
return (0);
}
static void
{
} else {
}
}
static void
{
/* XXX locking on vfs_flag? */
} else {
/* XXX locking on vfs_flag? */
}
}
static void
{
if (newval < SPA_MINBLOCKSIZE ||
}
static void
{
if (newval) {
/* XXX locking on vfs_flag? */
} else {
/* XXX locking on vfs_flag? */
}
}
static void
{
} else {
}
}
static void
{
} else {
}
}
static void
{
} else {
}
}
/*
* The nbmand mount option can be changed at mount time.
* We can't allow it to be toggled on live file systems or incorrect
* behavior may be seen from cifs clients
*
* This property isn't registered via dsl_prop_register(), but this callback
* will be called when a file system is first mounted
*/
static void
{
} else {
}
}
static void
{
}
static void
{
}
static void
{
}
static void
{
}
static int
{
int error;
return (0);
if (error)
goto normquit;
switch ((int)pval) {
case ZFS_NORMALIZE_NONE:
break;
case ZFS_NORMALIZE_C:
break;
case ZFS_NORMALIZE_KC:
break;
case ZFS_NORMALIZE_D:
break;
case ZFS_NORMALIZE_KD:
break;
default:
break;
}
if (error)
goto normquit;
if (pval)
else
if (error)
goto normquit;
switch ((int)pval) {
case ZFS_CASE_SENSITIVE:
break;
case ZFS_CASE_INSENSITIVE:
break;
case ZFS_CASE_MIXED:
break;
default:
break;
}
return (error);
}
static int
{
int error = 0;
/*
* The act of registering our callbacks will destroy any mount
* options we may have. In order to enable temporary overrides
* of mount options, we stash away the current values and
* restore them after we register the callbacks.
*/
}
do_devices = B_TRUE;
} else {
do_devices = B_TRUE;
do_devices = B_TRUE;
}
}
}
}
}
}
/*
* nbmand is a special property. It can only be changed at
* mount time.
*
* This is weird, but it is documented to only be changeable
* at mount time.
*/
} else {
char osname[MAXNAMELEN];
NULL))
return (error);
}
/*
* Register property callbacks.
*
* It would probably be fine to just check for i/o error from
* the first prop_register(), but I guess I like to go
* overboard...
*/
if (error)
goto unregister;
/*
* Invoke our callbacks to restore temporary mount options.
*/
if (do_readonly)
if (do_setuid)
if (do_exec)
if (do_devices)
if (do_xattr)
if (do_atime)
return (0);
/*
* We may attempt to unregister some callbacks that are not
* registered, but this is OK; it will simply return ENOMSG,
* which we will ignore.
*/
zfsvfs);
return (error);
}
static int
{
int error;
if (error)
return (error);
/*
* Set the objset user_ptr to track its zfsvfs.
*/
/*
* If we are not mounting (ie: online recv), then we don't
* have to worry about replaying the log as we blocked all
* operations out since we closed the ZIL.
*/
if (mounting) {
/*
* During replay we remove the read only flag to
* allow replays to succeed.
*/
if (readonly != 0)
else
/*
* Parse and replay the intent log.
*
* Because of ziltest, this must be done after
* zfs_unlinked_drain(). (Further note: ziltest doesn't
* use readonly mounts, where zfs_unlinked_drain() isn't
* called.) This is because ziltest causes spa_sync()
* to think it's committed, but actually it is not, so
* the intent log contains many txg's worth of changes.
*
* In particular, if object N is in the unlinked set in
* the last txg to actually sync, then it could be
* actually freed in a later txg and then reallocated in
* a yet later txg. This would write a "create object
* N" record to the intent log. Normally, this would be
* fine because the spa_sync() would have written out
* the fact that object N is free, before we could write
* the "create object N" intent log record.
*
* But when we are in ziltest mode, we advance the "open
* txg" without actually spa_sync()-ing the changes to
* disk. So we would see that object N is still
* allocated and in the unlinked set, and there is an
* intent log record saying to allocate it.
*/
}
if (!zil_disable)
return (0);
}
static int
{
int error = 0;
int mode;
/*
* Initialize the zfs-specific filesystem structure.
* Should probably make this a kmem cache, shuffle fields,
* and just bzero up to z_hold_mtx[].
*/
/* Initialize the generic filesystem structure. */
vfsp->vfs_bcount = 0;
goto out;
}
NULL))
goto out;
goto out;
if (readonly)
else
}
if (error)
goto out;
goto out;
/* The call to zfs_init_fs leaves the vnode held, release it here. */
/*
* Set features for file system.
*/
if (zfsvfs->z_use_fuids) {
}
/*
* Set normalization regardless of whether or not the object
* set is a snapshot. Snapshots and clones need to have
* identical normalization as did the file system they
* originated from.
*/
goto out;
goto out;
} else {
}
out:
if (error) {
} else {
}
return (error);
}
void
{
struct dsl_dataset *ds;
/*
* Unregister properties.
*/
if (!dmu_objset_is_snapshot(os)) {
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
acl_inherit_changed_cb, zfsvfs) == 0);
vscan_changed_cb, zfsvfs) == 0);
}
}
/*
* Convert a decimal digit string to a uint64_t integer.
*/
static int
{
while (*str) {
return (EINVAL);
}
return (0);
}
/*
* The boot path passed from the boot loader is in the form of
* "rootpool-name/root-filesystem-object-number'. Convert this
* string to a dataset name: "rootpool-name/root-filesystem-name".
*/
static int
{
char *slashp;
int error;
return (EINVAL);
/* if no '/', just return the pool name */
return (0);
}
return (error);
*slashp = '\0';
*slashp = '/';
return (error);
}
static int
{
int error = 0;
int ret = 0;
static int zfsrootdone = 0;
char *zfs_bootpath;
/*
* The filesystem that we mount as root is defined in the
* "zfs-bootfs" property.
*/
if (zfsrootdone++)
return (EBUSY);
return (EIO);
if (error)
return (error);
return (error);
goto out;
goto out;
/*
* The zfs_zget call above returns with a hold on vp, we release
* it here.
*/
/*
* Mount root as readonly initially, it will be remouted
*/
out:
return (ret);
} else if (why == ROOT_REMOUNT) {
/* refresh mount options */
return (zfs_register_callbacks(vfsp));
} else if (why == ROOT_UNMOUNT) {
return (0);
}
/*
* if "why" is equal to anything else other than ROOT_INIT,
* ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
*/
return (ENOTSUP);
}
/*ARGSUSED*/
static int
{
char *osname;
int error = 0;
int canwrite;
return (ENOTDIR);
return (EBUSY);
}
/*
* ZFS does not support passing unparsed data in via MS_DATA.
* Users should use the MS_OPTIONSTR interface; this means
* that all option parsing is already done and the options struct
* can be interrogated.
*/
return (EINVAL);
/*
* Get the objset name (the "special" mount argument).
*/
return (error);
/*
* Check for mount privilege?
*
* If we don't have privilege then see if
* we have local permission to allow it
*/
if (error) {
if (error == 0) {
/*
* Make sure user is the owner of the mount point
* or has sufficient privileges.
*/
goto out;
}
goto out;
}
goto out;
}
} else {
goto out;
}
}
/*
* Refuse to mount a filesystem if we are in a local zone and the
* dataset is not visible.
*/
if (!INGLOBALZONE(curproc) &&
goto out;
}
/*
* When doing a remount, we simply refresh our temporary properties
* according to those options set in the current VFS options.
*/
/* refresh mount options */
goto out;
}
out:
return (error);
}
static int
{
/*
* The underlying storage pool actually uses multiple block sizes.
* We report the fragsize as the smallest block size we support,
* and we report our blocksize as the filesystem's maximum blocksize.
*/
/*
* The following report "total" blocks of various kinds in the
* file system, but reported in terms of f_frsize - the
* "fragment" size.
*/
/*
* statvfs() should really be called statufs(), because it assumes
* static metadata. ZFS doesn't preallocate files, so the best
* we can do is report the max that could possibly fit in f_files,
* and that minus the number actually used in f_ffree.
* For f_ffree, report the smaller of the number of object available
* and the number of blocks (each object will take at least a block).
*/
/*
* We're a zfs filesystem.
*/
/*
* We have all of 32 characters to stuff a string here.
*/
return (0);
}
static int
{
int error;
if (error == 0)
return (error);
}
/*
* Teardown the zfsvfs::z_os.
*
* Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
* and 'z_teardown_inactive_lock' held.
*/
static int
{
if (!unmounting) {
/*
* We purge the parent filesystem's vfsp as the parent
* filesystem and all of its snapshots have their vnode's
* v_vfsp set to the parent's filesystem's vfsp. Note,
* 'z_parent' is self referential for non-snapshots.
*/
}
/*
* Close the zil. NB: Can't close the zil while zfs_inactive
* threads are blocked as zil_close can call zfs_inactive.
*/
}
/*
* If we are not unmounting (ie: online recv) and someone already
* unmounted this file system while we were doing the switcheroo,
* or a reopen of z_os failed then just bail out now.
*/
return (EIO);
}
/*
* At this point there are no vops active, and any new vops will
* fail with EIO since we have z_teardown_lock for writer (only
* relavent for forced unmount).
*
* Release all holds on dbufs.
* Note, the dmu can still callback via znode_pageout_func()
* which can zfs_znode_free() the znode. So we lock
* z_all_znodes; search the list for a held dbuf; drop the lock
* (we know zp can't disappear if we hold a dbuf lock) then
* regrab the lock and restart.
*
* Since we have to restart the search after finding each held dbuf,
* we do two things to speed up searching: we insert a dummy znode
* ('markerzp') to detect the original tail of the list, and move
* non-held znodes to the end of the list. Once we hit 'markerzp',
* we know we've looked at each znode and can break out.
*/
/* dbufs should only be held when force unmounting */
/* Start again */
} else {
}
}
/*
* If we are unmounting, set the unmounted flag and let new vops
* unblock. zfs_inactive will have the unmounted behavior, and all
* other vops will fail with EIO.
*/
if (unmounting) {
}
/*
* z_os will be NULL if there was an error in attempting to reopen
* zfsvfs, so just return as the properties had already been
* unregistered and cached data had been evicted before.
*/
return (0);
/*
* Unregister properties.
*/
/*
* Evict cached data
*/
if (dmu_objset_evict_dbufs(os)) {
(void) dmu_objset_evict_dbufs(os);
}
return (0);
}
/*ARGSUSED*/
static int
{
int ret;
if (ret) {
if (ret)
return (ret);
}
/*
* We purge the parent filesystem's vfsp as the parent filesystem
* and all of its snapshots have their vnode's v_vfsp set to the
* parent's filesystem's vfsp. Note, 'z_parent' is self
* referential for non-snapshots.
*/
/*
* Unmount any snapshots mounted under .zfs before unmounting the
* dataset itself.
*/
return (ret);
}
/*
* Check the number of active vnodes in the file system.
* Our count is maintained in the vfs structure, but the
* number is off by 1 to indicate a hold on the vfs
* structure itself.
*
* The '.zfs' directory maintains a reference of its
* own, and any active references underneath are
* reflected in the vnode count.
*/
return (EBUSY);
} else {
return (EBUSY);
}
}
/*
* z_os will be NULL if there was an error in
* attempting to reopen zfsvfs.
*/
/*
* Unset the objset user_ptr.
*/
/*
* Finally close the objset
*/
}
/*
* We can now safely destroy the '.zfs' directory node.
*/
return (0);
}
static int
{
int i, err;
if (err)
return (EINVAL);
}
} else {
return (EINVAL);
}
/* A zero fid_gen means we are in the .zfs control directories */
if (fid_gen == 0 &&
if (object == ZFSCTL_INO_SNAPDIR) {
} else {
}
return (0);
}
return (err);
}
if (zp_gen == 0)
zp_gen = 1;
return (EINVAL);
}
return (0);
}
/*
* Block out VOPs and close zfsvfs_t::z_os
*
* Note, if successful, then we return with the 'z_teardown_lock' and
* 'z_teardown_inactive_lock' write held.
*/
int
{
int error;
return (error);
return (0);
}
/*
* Reopen zfsvfs_t::z_os and release VOPs.
*/
int
{
int err;
if (err) {
} else {
/*
* Attempt to re-establish all the active znodes with
* their dbufs. If a zfs_rezget() fails, then we'll let
* any potential callers discover that via ZFS_ENTER_VERIFY_VP
* when they try to use their znode.
*/
(void) zfs_rezget(zp);
}
}
/* release the VOPs */
if (err) {
/*
* Since we couldn't reopen zfsvfs::z_os, force
* unmount this file system.
*/
}
return (err);
}
static void
{
int i;
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
}
/*
* VFS_INIT() initialization. Note that there is no VFS_FINI(),
* so we can't safely do any non-idempotent initialization here.
* Leave that to zfs_init() and zfs_fini(), which are called
* from the module's _init() and _fini() entry points.
*/
/*ARGSUSED*/
static int
{
int error;
/*
* Setup vfsops and vnodeops tables.
*/
if (error != 0) {
}
if (error) {
(void) vfs_freevfsops_by_type(zfsfstype);
return (error);
}
/*
* Unique major number for all zfs mounts.
* If we run out of 32-bit minors, we'll getudev() another major.
*/
return (0);
}
void
zfs_init(void)
{
/*
* Initialize .zfs directory structures
*/
zfsctl_init();
/*
* Initialize znode cache, vnode ops, etc...
*/
}
void
zfs_fini(void)
{
zfsctl_fini();
}
int
zfs_busy(void)
{
return (zfs_active_fs_count != 0);
}
int
{
int error;
return (error);
}
int
{
int error;
/*
* XXX for now, require that the filesystem be unmounted. Would
* be nice to find the zfsvfs_t and just update that if
* possible.
*/
return (EINVAL);
if (error)
return (error);
if (error)
goto out;
goto out;
}
if (error) {
goto out;
}
dmu_objset_id(os));
out:
return (error);
}
};
struct modlfs zfs_modlfs = {
};