zfs_ctldir.c revision fa9e4066f08beec538e775443c5be79dd423fcab
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER START
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The contents of this file are subject to the terms of the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Common Development and Distribution License, Version 1.0 only
fa9e4066f08beec538e775443c5be79dd423fcabahrens * (the "License"). You may not use this file except in compliance
fa9e4066f08beec538e775443c5be79dd423fcabahrens * with the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * See the License for the specific language governing permissions
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and limitations under the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When distributing Covered Code, include this CDDL HEADER in each
fa9e4066f08beec538e775443c5be79dd423fcabahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If applicable, add the following below this CDDL HEADER, with the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * fields enclosed by brackets "[]" replaced with your own identifying
fa9e4066f08beec538e775443c5be79dd423fcabahrens * information: Portions Copyright [yyyy] [name of copyright owner]
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER END
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Use is subject to license terms.
fa9e4066f08beec538e775443c5be79dd423fcabahrens#pragma ident "%Z%%M% %I% %E% SMI"
fa9e4066f08beec538e775443c5be79dd423fcabahrens * ZFS control directory (a.k.a. ".zfs")
fa9e4066f08beec538e775443c5be79dd423fcabahrens * This directory provides a common location for all ZFS meta-objects.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Currently, this is only the 'snapshot' directory, but this may expand in the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * future. The elements are built using the GFS primitives, as the hierarchy
fa9e4066f08beec538e775443c5be79dd423fcabahrens * does not actually exist on disk.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * For 'snapshot', we don't want to have all snapshots always mounted, because
fa9e4066f08beec538e775443c5be79dd423fcabahrens * this would take up a huge amount of space in /etc/mnttab. We have three
fa9e4066f08beec538e775443c5be79dd423fcabahrens * types of objects:
fa9e4066f08beec538e775443c5be79dd423fcabahrens * ctldir ------> snapshotdir -------> snapshot
fa9e4066f08beec538e775443c5be79dd423fcabahrens * mounted fs
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The 'snapshot' node contains just enough information to lookup '..' and act
fa9e4066f08beec538e775443c5be79dd423fcabahrens * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
fa9e4066f08beec538e775443c5be79dd423fcabahrens * perform an automount of the underlying filesystem and return the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * corresponding vnode.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * All mounts are handled automatically by the kernel, but unmounts are
fa9e4066f08beec538e775443c5be79dd423fcabahrens * (currently) handled from user land. The main reason is that there is no
fa9e4066f08beec538e775443c5be79dd423fcabahrens * reliable way to auto-unmount the filesystem when it's "no longer in use".
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When the user unmounts a filesystem, we call zfsctl_unmount(), which
fa9e4066f08beec538e775443c5be79dd423fcabahrens * unmounts any snapshots within the snapshot directory.
fa9e4066f08beec538e775443c5be79dd423fcabahrenstypedef struct {
fa9e4066f08beec538e775443c5be79dd423fcabahrenssnapentry_compare(const void *a, const void *b)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (-1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens else if (ret > 0)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic const fs_operation_def_t zfsctl_tops_snapdir[];
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic const fs_operation_def_t zfsctl_tops_snapshot[];
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
fa9e4066f08beec538e775443c5be79dd423fcabahrens { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
fa9e4066f08beec538e775443c5be79dd423fcabahrens { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
fa9e4066f08beec538e775443c5be79dd423fcabahrenstypedef struct zfsctl_node {
fa9e4066f08beec538e775443c5be79dd423fcabahrenstypedef struct zfsctl_snapdir {
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Root directory elements. We have only a single static entry, 'snapshot'.
fa9e4066f08beec538e775443c5be79dd423fcabahrens { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* include . and .. in the calculation */
fa9e4066f08beec538e775443c5be79dd423fcabahrens#define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Initialize the various GFS pieces we'll need to create and manipulate .zfs
fa9e4066f08beec538e775443c5be79dd423fcabahrens * directories. This is called from the ZFS init routine, and initializes the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * vnode ops vectors that we'll be using.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Remove vfsctl vnode ops
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Return the inode number associated with the 'snapshot' directory.
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Create the '.zfs' directory. This directory is cached as part of the VFS
fa9e4066f08beec538e775443c5be79dd423fcabahrens * structure. This results in a hold on the vfs_t. The code in zfs_umount()
fa9e4066f08beec538e775443c5be79dd423fcabahrens * therefore checks against a vfs_count of 2 instead of 1. This reference
fa9e4066f08beec538e775443c5be79dd423fcabahrens * is removed when the ctldir is destroyed in the unmount.
fa9e4066f08beec538e775443c5be79dd423fcabahrens vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
fa9e4066f08beec538e775443c5be79dd423fcabahrens zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * We're only faking the fact that we have a root of a filesystem for
fa9e4066f08beec538e775443c5be79dd423fcabahrens * the sake of the GFS interfaces. Undo the flag manipulation it did
fa9e4066f08beec538e775443c5be79dd423fcabahrens vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Destroy the '.zfs' directory. Only called when the filesystem is
fa9e4066f08beec538e775443c5be79dd423fcabahrens * unmounted, and there are no more references. Release the vnode,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * which will release the hold on the vfs structure.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Given a root znode, retrieve the associated .zfs directory.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Add a hold to the vnode and return it.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Common open routine. Disallow any write access.
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Common close routine. Nothing to do here.
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off,
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Common access routine. Disallow writes.
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Common getattr function. Fill in basic information.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * We are a purly virtual object, so we have no
fa9e4066f08beec538e775443c5be79dd423fcabahrens * blocksize or allocated blocks.
fa9e4066f08beec538e775443c5be79dd423fcabahrens vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
fa9e4066f08beec538e775443c5be79dd423fcabahrens * We live in the now.
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* .zfs znodes always have a generation number of 0 */
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * .zfs inode namespace
fa9e4066f08beec538e775443c5be79dd423fcabahrens * We need to generate unique inode numbers for all files and directories
fa9e4066f08beec538e775443c5be79dd423fcabahrens * within the .zfs pseudo-filesystem. We use the following scheme:
fa9e4066f08beec538e775443c5be79dd423fcabahrens * ENTRY ZFSCTL_INODE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * .zfs/snapshot/<snap> objectid(snap)
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Get root directory attributes.
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Special case the handling of "..".
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic const fs_operation_def_t zfsctl_tops_root[] = {
fa9e4066f08beec538e775443c5be79dd423fcabahrens { VOPNAME_INACTIVE, (fs_generic_func_p) gfs_vop_inactive },
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
fa9e4066f08beec538e775443c5be79dd423fcabahrens objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_unmount_snap(vnode_t *dvp, const char *name, int force, cred_t *cr)
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL)
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* this will be dropped by dounmount() */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((err = dounmount(vn_mountedvfs(sep->se_root), force, kcred)) != 0)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
fa9e4066f08beec538e775443c5be79dd423fcabahrens const char *oldpath;
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Change the name in the AVL tree.
fa9e4066f08beec538e775443c5be79dd423fcabahrens VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Change the current mountpoint info:
fa9e4066f08beec538e775443c5be79dd423fcabahrens * - update the tail of the mntpoint path
fa9e4066f08beec538e775443c5be79dd423fcabahrens * - update the tail of the resource path
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT((tail - oldpath) + strlen(nm) + 2 < MAXNAMELEN);
fa9e4066f08beec538e775443c5be79dd423fcabahrens (void) strncpy(newpath, oldpath, tail - oldpath + 1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT((tail - oldpath) + strlen(nm) + 2 < MAXNAMELEN);
fa9e4066f08beec538e775443c5be79dd423fcabahrens (void) strncpy(newpath, oldpath, tail - oldpath + 1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
fa9e4066f08beec538e775443c5be79dd423fcabahrens VERIFY(zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from) == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Cannot move snapshots out of the snapdir.
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens VERIFY(zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to) == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr)
fa9e4066f08beec538e775443c5be79dd423fcabahrens VERIFY(zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname) == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Lookup entry point for the 'snapshot' directory. Try to open the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Perform a mount of the associated dataset on top of the vnode.
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If we get a recursive call, that means we got called
fa9e4066f08beec538e775443c5be79dd423fcabahrens * from the domount() code while it was trying to look up the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * spec (which looks like a local path for zfs). We need to
fa9e4066f08beec538e775443c5be79dd423fcabahrens * add some flag to domount() to tell it not to do this lookup.
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If the snapshot was unmounted behind our backs, remount it.
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The requested snapshot is not currently mounted, look it up.
fa9e4066f08beec538e775443c5be79dd423fcabahrens VERIFY(zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname) == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
fa9e4066f08beec538e775443c5be79dd423fcabahrens *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
fa9e4066f08beec538e775443c5be79dd423fcabahrens mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) +
fa9e4066f08beec538e775443c5be79dd423fcabahrens (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Fix up the root vnode.
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_snapdir_readdir_cb(vnode_t *vp, struct dirent64 *dp, int *eofp,
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
fa9e4066f08beec538e775443c5be79dd423fcabahrens vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic const fs_operation_def_t zfsctl_tops_snapdir[] = {
fa9e4066f08beec538e775443c5be79dd423fcabahrens { VOPNAME_INACTIVE, (fs_generic_func_p) zfsctl_snapdir_inactive },
fa9e4066f08beec538e775443c5be79dd423fcabahrens zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * These VP's should never see the light of day. They should always
fa9e4066f08beec538e775443c5be79dd423fcabahrens * be covered.
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic const fs_operation_def_t zfsctl_tops_snapshot[] = {
fa9e4066f08beec538e775443c5be79dd423fcabahrens VOPNAME_INACTIVE, (fs_generic_func_p) zfsctl_snapshot_inactive,
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
fa9e4066f08beec538e775443c5be79dd423fcabahrens error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Unmount any snapshots for the given filesystem. This is called from
fa9e4066f08beec538e775443c5be79dd423fcabahrens * zfs_umount() - if we have a ctldir, then go through and unmount all the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * snapshots.
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
fa9e4066f08beec538e775443c5be79dd423fcabahrens error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If this snapshot is not mounted, then it must
fa9e4066f08beec538e775443c5be79dd423fcabahrens * have just been unmounted by somebody else, and
fa9e4066f08beec538e775443c5be79dd423fcabahrens * will be cleaned up by zfsctl_snapdir_inactive().
fa9e4066f08beec538e775443c5be79dd423fcabahrens * We can't use VN_RELE(), as that will try to
fa9e4066f08beec538e775443c5be79dd423fcabahrens * invoke zfsctl_snapdir_inactive(), and that
fa9e4066f08beec538e775443c5be79dd423fcabahrens * would lead to an attempt to re-grab the sd_lock.