zfs_vfsops.c revision ecd6cf800b63704be73fb264c3f5b6e0dafc068d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/pathname.h>
#include <sys/vfs_opreg.h>
#include <sys/zfs_znode.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_deleg.h>
#include <sys/zfs_ioctl.h>
#include <sys/zfs_ctldir.h>
#include <sys/bootconf.h>
int zfsfstype;
static kmutex_t zfs_dev_mtx;
static const fs_operation_def_t zfs_vfsops_template[] = {
};
static const fs_operation_def_t zfs_vfsops_eio_template[] = {
};
/*
* We need to keep a count of active fs's.
* This is necessary to prevent our module
* from being unloaded after a umount -f
*/
static uint32_t zfs_active_fs_count = 0;
/*
* MNTOPT_DEFAULT was removed from MNTOPT_XATTR, since the
* default value is now determined by the xattr property.
*/
};
static mntopts_t zfs_mntopts = {
};
/*ARGSUSED*/
int
{
/*
* Data integrity is job one. We don't want a compromised kernel
* writing to the storage pool, so we never sync during panic.
*/
if (panicstr)
return (0);
/*
* SYNC_ATTR is used by fsflush() to force old filesystems like UFS
* to sync metadata, which they would otherwise cache indefinitely.
* Semantically, the only requirement is that the sync be initiated.
* The DMU syncs out txgs frequently, so there's nothing to do.
*/
return (0);
/*
* Sync a specific filesystem.
*/
else
} else {
/*
* Sync all ZFS filesystems. This is what happens when you
* run sync(1M). Unlike other filesystems, ZFS honors the
* request by waiting for all pools to commit all dirty data.
*/
}
return (0);
}
static int
{
do {
do {
/*
* If we're still using the real major
* number space. If we're using a getudev()'ed
* major number, we can use all of its minors.
*/
else
zfs_minor = 0;
} else {
zfs_minor++;
}
/*
* We are using all ~262,000 minor numbers for the
* current major number. Create a new major number.
*/
"zfs_mount: Can't get unique major "
"device number.");
return (-1);
}
zfs_minor = 0;
} else {
break;
}
/* CONSTANTCONDITION */
} while (1);
return (0);
}
static void
{
} else {
}
}
static void
{
/* XXX locking on vfs_flag? */
} else {
/* XXX locking on vfs_flag? */
}
}
static void
{
if (newval < SPA_MINBLOCKSIZE ||
}
static void
{
if (newval) {
/* XXX locking on vfs_flag? */
} else {
/* XXX locking on vfs_flag? */
}
}
static void
{
} else {
}
}
static void
{
} else {
}
}
static void
{
} else {
}
}
static void
{
}
static void
{
}
static void
{
}
static int
{
/*
* Remount operations default to "rw" unless "ro" is explicitly
* specified.
*/
} else {
return (EROFS);
}
} else {
}
return (0);
}
static int
{
int error = 0;
/*
* The act of registering our callbacks will destroy any mount
* options we may have. In order to enable temporary overrides
* of mount options, we stash away the current values and
* restore them after we register the callbacks.
*/
}
do_devices = B_TRUE;
} else {
do_devices = B_TRUE;
do_devices = B_TRUE;
}
}
}
}
}
/*
* Register property callbacks.
*
* It would probably be fine to just check for i/o error from
* the first prop_register(), but I guess I like to go
* overboard...
*/
if (error)
goto unregister;
/*
* Invoke our callbacks to restore temporary mount options.
*/
if (do_readonly)
if (do_setuid)
if (do_exec)
if (do_devices)
if (do_xattr)
return (0);
/*
* We may attempt to unregister some callbacks that are not
* registered, but this is OK; it will simply return ENOMSG,
* which we will ignore.
*/
zfsvfs);
return (error);
}
static int
{
int error = 0;
int mode;
/*
* Initialize the zfs-specific filesystem structure.
* Should probably make this a kmem cache, shuffle fields,
* and just bzero up to z_hold_mtx[].
*/
/* Initialize the generic filesystem structure. */
vfsp->vfs_bcount = 0;
goto out;
}
NULL))
goto out;
goto out;
if (readonly)
else
}
if (error)
goto out;
goto out;
/* The call to zfs_init_fs leaves the vnode held, release it here. */
goto out;
} else {
if (error)
goto out;
/*
* Parse and replay the intent log.
*/
if (!zil_disable)
}
out:
if (error) {
} else {
}
return (error);
}
void
{
struct dsl_dataset *ds;
/*
* Unregister properties.
*/
if (!dmu_objset_is_snapshot(os)) {
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
zfsvfs) == 0);
acl_inherit_changed_cb, zfsvfs) == 0);
}
}
/*
* Convert a decimal digit string to a uint64_t integer.
*/
static int
{
while (*str) {
return (EINVAL);
}
return (0);
}
/*
* The boot path passed from the boot loader is in the form of
* "rootpool-name/root-filesystem-object-number'. Convert this
* string to a dataset name: "rootpool-name/root-filesystem-name".
*/
static int
{
char *slashp;
int error;
return (EINVAL);
/* if no '/', just return the pool name */
return (0);
}
return (error);
*slashp = '\0';
*slashp = '/';
return (error);
}
static int
{
int error = 0;
int ret = 0;
static int zfsrootdone = 0;
char *zfs_bootpath;
/*
* The filesystem that we mount as root is defined in the
* "zfs-bootfs" property.
*/
if (zfsrootdone++)
return (EBUSY);
return (EIO);
if (error)
return (error);
return (error);
goto out;
goto out;
/*
* The zfs_zget call above returns with a hold on vp, we release
* it here.
*/
/*
* Mount root as readonly initially, it will be remouted
*/
out:
return (ret);
} else if (why == ROOT_REMOUNT) {
return (zfs_refresh_properties(vfsp));
} else if (why == ROOT_UNMOUNT) {
return (0);
}
/*
* if "why" is equal to anything else other than ROOT_INIT,
* ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
*/
return (ENOTSUP);
}
/*ARGSUSED*/
static int
{
char *osname;
int error = 0;
int canwrite;
return (ENOTDIR);
return (EBUSY);
}
/*
* ZFS does not support passing unparsed data in via MS_DATA.
* Users should use the MS_OPTIONSTR interface; this means
* that all option parsing is already done and the options struct
* can be interrogated.
*/
return (EINVAL);
/*
* When doing a remount, we simply refresh our temporary properties
* according to those options set in the current VFS options.
*/
return (zfs_refresh_properties(vfsp));
}
/*
* Get the objset name (the "special" mount argument).
*/
return (error);
/*
* Check for mount privilege?
*
* If we don't have privilege then see if
* we have local permission to allow it
*/
if (error) {
if (error == 0) {
/*
* Make sure user is the owner of the mount point
* or has sufficient privileges.
*/
goto out;
}
goto out;
}
goto out;
}
} else {
goto out;
}
}
/*
* Refuse to mount a filesystem if we are in a local zone and the
* dataset is not visible.
*/
if (!INGLOBALZONE(curproc) &&
goto out;
}
out:
return (error);
}
static int
{
/*
* The underlying storage pool actually uses multiple block sizes.
* We report the fragsize as the smallest block size we support,
* and we report our blocksize as the filesystem's maximum blocksize.
*/
/*
* The following report "total" blocks of various kinds in the
* file system, but reported in terms of f_frsize - the
* "fragment" size.
*/
/*
* statvfs() should really be called statufs(), because it assumes
* static metadata. ZFS doesn't preallocate files, so the best
* we can do is report the max that could possibly fit in f_files,
* and that minus the number actually used in f_ffree.
* For f_ffree, report the smaller of the number of object available
* and the number of blocks (each object will take at least a block).
*/
/*
* We're a zfs filesystem.
*/
/*
* We have all of 32 characters to stuff a string here.
*/
return (0);
}
static int
{
int error;
if (error == 0)
return (error);
}
/*ARGSUSED*/
static int
{
int ret;
if (ret) {
if (ret)
return (ret);
}
(void) dnlc_purge_vfsp(vfsp, 0);
/*
* Unmount any snapshots mounted under .zfs before unmounting the
* dataset itself.
*/
return (ret);
}
/*
* Ensure that z_unmounted1 reaches global visibility
* before z_op_cnt.
*/
/*
* Wait for all zfs threads to leave zfs.
* Grabbing a rwlock as reader in all vops and
* as writer here doesn't work because it too easy to get
* multiple reader enters as zfs can re-enter itself.
* This can lead to deadlock if there is an intervening
* rw_enter as writer.
* So a file system threads ref count (z_op_cnt) is used.
* A polling loop on z_op_cnt may seem inefficient, but
* - this saves all threads on exit from having to grab a
* mutex in order to cv_signal
* - only occurs on forced unmount in the rare case when
* there are outstanding threads within the file system.
*/
delay(1);
}
return (0);
}
/*
* Check the number of active vnodes in the file system.
* Our count is maintained in the vfs structure, but the number
* is off by 1 to indicate a hold on the vfs structure itself.
*
* The '.zfs' directory maintains a reference of its own, and any active
* references underneath are reflected in the vnode count.
*/
return (EBUSY);
} else {
return (EBUSY);
}
}
return (0);
}
static int
{
int i, err;
if (err)
return (EINVAL);
}
} else {
return (EINVAL);
}
/* A zero fid_gen means we are in the .zfs control directories */
if (fid_gen == 0 &&
if (object == ZFSCTL_INO_SNAPDIR) {
} else {
}
return (0);
}
return (err);
}
if (zp_gen == 0)
zp_gen = 1;
return (EINVAL);
}
return (0);
}
static void
{
/*
* For forced unmount, at this point all vops except zfs_inactive
* are erroring EIO. We need to now suspend zfs_inactive threads
* while we are freeing dbufs before switching zfs_inactive
* to use behaviour without a objset.
*/
/*
* Release all holds on dbufs
* Note, although we have stopped all other vop threads and
* zfs_inactive(), the dmu can callback via znode_pageout_func()
* which can zfs_znode_free() the znode.
* So we lock z_all_znodes; search the list for a held
* dbuf; drop the lock (we know zp can't disappear if we hold
* a dbuf lock; then regrab the lock and restart.
*/
if (zp->z_dbuf_held) {
/* dbufs should only be held when force unmounting */
zp->z_dbuf_held = 0;
/* Start again */
}
}
/*
* Unregister properties.
*/
if (!dmu_objset_is_snapshot(os))
/*
* Switch zfs_inactive to behaviour without an objset.
* It just tosses cached pages and frees the znode & vnode.
* Then re-enable zfs_inactive threads in that new behaviour.
*/
/*
* Close the zil. Can't close the zil while zfs_inactive
* threads are blocked as zil_close can call zfs_inactive.
*/
}
/*
* Evict all dbufs so that cached znodes will be freed
*/
(void) dmu_objset_evict_dbufs(os, 0);
}
/*
* Finally close the objset
*/
/*
* We can now safely destroy the '.zfs' directory node.
*/
}
static void
{
}
/*
* VFS_INIT() initialization. Note that there is no VFS_FINI(),
* so we can't safely do any non-idempotent initialization here.
* Leave that to zfs_init() and zfs_fini(), which are called
* from the module's _init() and _fini() entry points.
*/
/*ARGSUSED*/
static int
{
int error;
/*
* Setup vfsops and vnodeops tables.
*/
if (error != 0) {
}
if (error) {
(void) vfs_freevfsops_by_type(zfsfstype);
return (error);
}
/*
* Unique major number for all zfs mounts.
* If we run out of 32-bit minors, we'll getudev() another major.
*/
return (0);
}
void
zfs_init(void)
{
/*
* Initialize .zfs directory structures
*/
zfsctl_init();
/*
* Initialize znode cache, vnode ops, etc...
*/
}
void
zfs_fini(void)
{
zfsctl_fini();
}
int
zfs_busy(void)
{
return (zfs_active_fs_count != 0);
}
};
struct modlfs zfs_modlfs = {
};