zfs_ctldir.c revision d39ee142a97a7c58f60f7b52c62409f2ff64b234
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * CDDL HEADER START
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * The contents of this file are subject to the terms of the
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Common Development and Distribution License (the "License").
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * You may not use this file except in compliance with the License.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * See the License for the specific language governing permissions
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * and limitations under the License.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * When distributing Covered Code, include this CDDL HEADER in each
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * If applicable, add the following below this CDDL HEADER, with the
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * fields enclosed by brackets "[]" replaced with your own identifying
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * information: Portions Copyright [yyyy] [name of copyright owner]
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * CDDL HEADER END
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * ZFS control directory (a.k.a. ".zfs")
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * This directory provides a common location for all ZFS meta-objects.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Currently, this is only the 'snapshot' directory, but this may expand in the
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * future. The elements are built using the GFS primitives, as the hierarchy
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * does not actually exist on disk.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * For 'snapshot', we don't want to have all snapshots always mounted, because
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * this would take up a huge amount of space in /etc/mnttab. We have three
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * types of objects:
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * ctldir ------> snapshotdir -------> snapshot
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * mounted fs
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * The 'snapshot' node contains just enough information to lookup '..' and act
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * perform an automount of the underlying filesystem and return the
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * corresponding vnode.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * All mounts are handled automatically by the kernel, but unmounts are
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * (currently) handled from user land. The main reason is that there is no
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * reliable way to auto-unmount the filesystem when it's "no longer in use".
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * When the user unmounts a filesystem, we call zfsctl_unmount(), which
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * unmounts any snapshots within the snapshot directory.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * The '.zfs', '.zfs/snapshot', and all directories created under
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * share the same vfs_t as the head filesystem (what '.zfs' lives under).
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * However, vnodes within these mounted on file systems have their v_vfsp
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * fields set to the head filesystem to make NFS happy (see
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * so that it cannot be freed until all snapshots have been unmounted.
d0e518695adc90b82233b99af7dffbb3d3f92c00amwtypedef struct zfsctl_node {
d0e518695adc90b82233b99af7dffbb3d3f92c00amw timestruc_t zc_cmtime; /* ctime and mtime, always the same */
d0e518695adc90b82233b99af7dffbb3d3f92c00amwtypedef struct zfsctl_snapdir {
d0e518695adc90b82233b99af7dffbb3d3f92c00amwtypedef struct {
d0e518695adc90b82233b99af7dffbb3d3f92c00amwsnapentry_compare(const void *a, const void *b)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (-1);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw else if (ret > 0)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (1);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (0);
d0e518695adc90b82233b99af7dffbb3d3f92c00amwstatic vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
d0e518695adc90b82233b99af7dffbb3d3f92c00amwstatic int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir },
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares },
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Root directory elements. We only have two entries
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * snapshot and shares.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw/* include . and .. in the calculation */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Initialize the various GFS pieces we'll need to create and manipulate .zfs
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * directories. This is called from the ZFS init routine, and initializes the
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * vnode ops vectors that we'll be using.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Remove vfsctl vnode ops
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Return the inode number associated with the 'snapshot' or
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * 'shares' directory.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw/* ARGSUSED */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Create the '.zfs' directory. This directory is cached as part of the VFS
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * structure. This results in a hold on the vfs_t. The code in zfs_umount()
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * therefore checks against a vfs_count of 2 instead of 1. This reference
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * is removed when the ctldir is destroyed in the unmount.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
d0e518695adc90b82233b99af7dffbb3d3f92c00amw VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * We're only faking the fact that we have a root of a filesystem for
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * the sake of the GFS interfaces. Undo the flag manipulation it did
d0e518695adc90b82233b99af7dffbb3d3f92c00amw vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Destroy the '.zfs' directory. Only called when the filesystem is unmounted.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * There might still be more references if we were force unmounted, but only
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * new zfs_inactive() calls can occur and they don't reference .zfs
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Given a root znode, retrieve the associated .zfs directory.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Add a hold to the vnode and return it.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Common open routine. Disallow any write access.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw/* ARGSUSED */
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (0);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Common close routine. Nothing to do here.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw/* ARGSUSED */
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off,
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (0);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Common access routine. Disallow writes.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw/* ARGSUSED */
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr,
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (0);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Common getattr function. Fill in basic information.
d0e518695adc90b82233b99af7dffbb3d3f92c00amwstatic void
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * We are a purely virtual object, so we have no
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * blocksize or allocated blocks.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * We live in the now (for atime).
d0e518695adc90b82233b99af7dffbb3d3f92c00amw/*ARGSUSED*/
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* .zfs znodes always have a generation number of 0 */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (0);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw/*ARGSUSED*/
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * .zfs inode namespace
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * We need to generate unique inode numbers for all files and directories
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * within the .zfs pseudo-filesystem. We use the following scheme:
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * ENTRY ZFSCTL_INODE
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * .zfs/snapshot/<snap> objectid(snap)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Get root directory attributes.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw/* ARGSUSED */
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (0);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Special case the handling of "..".
d0e518695adc90b82233b99af7dffbb3d3f92c00amw/* ARGSUSED */
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
d0e518695adc90b82233b99af7dffbb3d3f92c00amw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * No extended attributes allowed under .zfs
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * We only care about ACL_ENABLED so that libsec can
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * display ACL correctly and not default to POSIX draft.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (0);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { VOPNAME_GETATTR, { .vop_getattr = zfsctl_root_getattr } },
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } },
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_root_lookup } },
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } },
d0e518695adc90b82233b99af7dffbb3d3f92c00amw { VOPNAME_PATHCONF, { .vop_pathconf = zfsctl_pathconf } },
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (0);
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* this will be dropped by dounmount() */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * We can't use VN_RELE(), as that will try to invoke
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * zfsctl_snapdir_inactive(), which would cause us to destroy
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * the sd_lock mutex held by our caller.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (0);
d0e518695adc90b82233b99af7dffbb3d3f92c00amwstatic void
d0e518695adc90b82233b99af7dffbb3d3f92c00amwzfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
int err;
if (err == 0) {
return (err);
if (!err)
if (!err)
if (err)
return (err);
return (EINVAL);
return (ENOENT);
if (err == 0)
return (err);
int err;
if (err == 0) {
return (err);
if (!err)
if (err)
return (err);
if (sep) {
if (err)
return (err);
int err;
return (EILSEQ);
if (err)
return (err);
if (err == 0) {
if (err)
return (err);
return (err);
char *mountpoint;
int err;
return (EINVAL);
return (ENOENT);
if (err == 0) {
return (err);
if (realpnp)
if (err) {
goto domount;
return (err);
if (err) {
return (ENOENT);
if (err == 0) {
if (err == 0) {
if (err) {
return (err);
int error;
return (ENOTSUP);
return (error);
int error;
if (error) {
return (error);
int error;
return (ENOTSUP);
return (error);
vnode_t *
return (vp);
vnode_t *
return (vp);
int error;
return (ENOTSUP);
return (error);
void *private;
{ NULL }
{ NULL }
static vnode_t *
return (vp);
int error;
if (error != 0)
return (error);
if (error == 0) {
return (error);
int error;
if (error != 0)
return (error);
if (error) {
return (error);