vfs.c revision 56c0b1dab35897a2c09f93b2a70746ba47df7523
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER START
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The contents of this file are subject to the terms of the
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * Common Development and Distribution License (the "License").
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * You may not use this file except in compliance with the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * See the License for the specific language governing permissions
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and limitations under the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When distributing Covered Code, include this CDDL HEADER in each
fa9e4066f08beec538e775443c5be79dd423fcabahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If applicable, add the following below this CDDL HEADER, with the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * fields enclosed by brackets "[]" replaced with your own identifying
fa9e4066f08beec538e775443c5be79dd423fcabahrens * information: Portions Copyright [yyyy] [name of copyright owner]
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER END
b2634b9c57bbcfa01bb5dec2e196aec32957925fEric Taylor * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Use is subject to license terms.
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* All Rights Reserved */
fa9e4066f08beec538e775443c5be79dd423fcabahrens * University Copyright- Copyright (c) 1982, 1986, 1988
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The Regents of the University of California
fa9e4066f08beec538e775443c5be79dd423fcabahrens * All Rights Reserved
fa9e4066f08beec538e775443c5be79dd423fcabahrens * University Acknowledgment- Portions of this document are derived from
fa9e4066f08beec538e775443c5be79dd423fcabahrens * software developed by the University of California, Berkeley, and its
fa9e4066f08beec538e775443c5be79dd423fcabahrens * contributors.
99653d4ee642c6528e88224f12409a5f23060994eschrock/* Private interfaces to create vopstats-related data structures */
99653d4ee642c6528e88224f12409a5f23060994eschrockextern vopstats_t *get_fstype_vopstats(struct vfs *, struct vfssw *);
99653d4ee642c6528e88224f12409a5f23060994eschrockextern vsk_anchor_t *get_vskstat_anchor(struct vfs *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_clearmntopt_nolock(mntopts_t *, const char *, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_setmntopt_nolock(mntopts_t *, const char *,
99653d4ee642c6528e88224f12409a5f23060994eschrock const char *, int, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic int vfs_optionisset_nolock(const mntopts_t *, const char *, char **);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_swapopttbl(mntopts_t *, mntopts_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_createopttbl_extend(mntopts_t *, const char *,
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic char **vfs_copycancelopt_extend(char **const, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_freecancelopt(char **);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void getrootfs(char **, char **);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_mnttabvp_setup(void);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic kmem_cache_t *vfs_cache; /* Pointer to VFS kmem cache */
99653d4ee642c6528e88224f12409a5f23060994eschrock * VFS global data.
99653d4ee642c6528e88224f12409a5f23060994eschrockvnode_t *rootdir; /* pointer to root inode vnode. */
99653d4ee642c6528e88224f12409a5f23060994eschrockvnode_t *devicesdir; /* pointer to inode of devices root */
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlchar *server_rootpath; /* root path for diskless clients */
99653d4ee642c6528e88224f12409a5f23060994eschrockchar *server_hostname; /* hostname of diskless server */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksstruct vfs *rootvfs = &root; /* pointer to root vfs; head of VFS list. */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksrvfs_t *rvfs_list; /* array of vfs ptrs for vfs hash list */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksint vfshsz = 512; /* # of heads/locks in vfs hash arrays */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks /* must be power of 2! */
99653d4ee642c6528e88224f12409a5f23060994eschrocktimespec_t vfs_mnttab_ctime; /* mnttab created time */
99653d4ee642c6528e88224f12409a5f23060994eschrocktimespec_t vfs_mnttab_mtime; /* mnttab last modified time */
99653d4ee642c6528e88224f12409a5f23060994eschrockstruct pollhead vfs_pollhd; /* for mnttab pollers */
99653d4ee642c6528e88224f12409a5f23060994eschrockstruct vnode *vfs_mntdummyvp; /* to fake mnttab read/write for file events */
99653d4ee642c6528e88224f12409a5f23060994eschrockint mntfstype; /* will be set once mnt fs is mounted */
99653d4ee642c6528e88224f12409a5f23060994eschrock * Table for generic options recognized in the VFS layer and acted
99653d4ee642c6528e88224f12409a5f23060994eschrock * on at this level before parsing file system specific options.
99653d4ee642c6528e88224f12409a5f23060994eschrock * The nosuid option is stronger than any of the devices and setuid
99653d4ee642c6528e88224f12409a5f23060994eschrock * options, so those are canceled when nosuid is seen.
3bb79bece53191f2cf27aa61a72ea1784a7ce700eschrock * All options which are added here need to be added to the
3bb79bece53191f2cf27aa61a72ea1784a7ce700eschrock * list of standard options in usr/src/cmd/fs.d/fslib.c as well.
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahl * VFS Mount options table
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlstatic char *nosuid_cancel[] = { MNTOPT_SUID, MNTOPT_DEVICES, MNTOPT_NODEVICES,
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *devices_cancel[] = { MNTOPT_NODEVICES, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nodevices_cancel[] = { MNTOPT_DEVICES, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *setuid_cancel[] = { MNTOPT_NOSETUID, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nosetuid_cancel[] = { MNTOPT_SETUID, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nbmand_cancel[] = { MNTOPT_NONBMAND, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nonbmand_cancel[] = { MNTOPT_NBMAND, NULL };
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor * option name cancel options default arg flags
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor MO_NODISPLAY, (void *)0 },
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
2f8aaab38e6371ad39ed90a1211ba8921acbb4d5eschrock (void *)0 },
fa94a07fd0519b8abfd871ad8fe60e6bebe1e2bbbrendan (void *)0 },
e7cbe64f7a72dae5cb44f100db60ca88f3313c65gw (void *)0 },
15e6edf145a9c2bb0e0272cf8debe823bb97529bgw (void *)0 },
15e6edf145a9c2bb0e0272cf8debe823bb97529bgw (void *)0 },
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby * File system operation dispatch functions.
ca45db4129beff691dc46576c328149443788af2Chris Kirbyfsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock * Make sure this root has a path. With lofs, it is possible to have
99653d4ee642c6528e88224f12409a5f23060994eschrock * a NULL mountpoint.
99653d4ee642c6528e88224f12409a5f23060994eschrock if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) {
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock * In order to handle system attribute fids in a manner
fa9e4066f08beec538e775443c5be79dd423fcabahrens * transparent to the underlying fs, we embed the fid for
99653d4ee642c6528e88224f12409a5f23060994eschrock * the sysattr parent object in the sysattr fid and tack on
99653d4ee642c6528e88224f12409a5f23060994eschrock * some extra bytes that only the sysattr layer knows about.
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling * This guarantees that sysattr fids are larger than other fids
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling * for this vfs. If the vfs supports sysattrs (implied
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling * by VFSFT_XVATTR support), we cannot have a size collision
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling * with XATTR_FIDSZ.
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp);
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_mountroot(vfs_t *vfsp, enum whymountroot reason)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason);
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate)
99653d4ee642c6528e88224f12409a5f23060994eschrock return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate));
99653d4ee642c6528e88224f12409a5f23060994eschrock if (ALLOCATED_VFSSW(&vfssw[fstype]) && VFS_INSTALLED(&vfssw[fstype]))
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*vfssw[fstype].vsw_vfsops.vfs_sync) (NULL, flag, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock * File system initialization. vfs_setfsops() must be called from a file
99653d4ee642c6528e88224f12409a5f23060994eschrock * system's init routine.
ece3d9b3bacef51a5f34d993935eedbb7bb87059llingfs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual,
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling static const fs_operation_trans_def_t vfs_ops_table[] = {
99653d4ee642c6528e88224f12409a5f23060994eschrock (fs_generic_func_p) fs_sync, /* No errors allowed */
99653d4ee642c6528e88224f12409a5f23060994eschrock VFSNAME_MOUNTROOT, offsetof(vfsops_t, vfs_mountroot),
99653d4ee642c6528e88224f12409a5f23060994eschrock (fs_generic_func_p)fs_freevfs, /* Shouldn't fail */
99653d4ee642c6528e88224f12409a5f23060994eschrock return (fs_build_vector(actual, unused_ops, vfs_ops_table, template));
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksvfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual)
40ff3960ec49d8bb707c8b7f0030f2ac0c014033ck * Verify that fstype refers to a valid fs. Note that
40ff3960ec49d8bb707c8b7f0030f2ac0c014033ck * 0 is valid since it's used to set "stray" ops.
99653d4ee642c6528e88224f12409a5f23060994eschrock /* Set up the operations vector. */
99653d4ee642c6528e88224f12409a5f23060994eschrock error = fs_copyfsops(template, &vfssw[fstype].vsw_vfsops, &unused_ops);
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling cmn_err(CE_WARN, "vfs_setfsops: %s: %d operations supplied "
99653d4ee642c6528e88224f12409a5f23060994eschrock "but not used", vfssw[fstype].vsw_name, unused_ops);
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_makefsops(const fs_operation_def_t *template, vfsops_t **actual)
99653d4ee642c6528e88224f12409a5f23060994eschrock *actual = (vfsops_t *)kmem_alloc(sizeof (vfsops_t), KM_SLEEP);
99653d4ee642c6528e88224f12409a5f23060994eschrock error = fs_copyfsops(template, *actual, &unused_ops);
99653d4ee642c6528e88224f12409a5f23060994eschrock return (0);
99653d4ee642c6528e88224f12409a5f23060994eschrock * Free a vfsops structure created as a result of vfs_makefsops().
99653d4ee642c6528e88224f12409a5f23060994eschrock * NOTE: For a vfsops structure initialized by vfs_setfsops(), use
99653d4ee642c6528e88224f12409a5f23060994eschrock * vfs_freevfsops_by_type().
99653d4ee642c6528e88224f12409a5f23060994eschrock * Since the vfsops structure is part of the vfssw table and wasn't
99653d4ee642c6528e88224f12409a5f23060994eschrock * really allocated, we're not really freeing anything. We keep
99653d4ee642c6528e88224f12409a5f23060994eschrock * the name for consistency with vfs_freevfsops(). We do, however,
99653d4ee642c6528e88224f12409a5f23060994eschrock * need to take care of a little bookkeeping.
99653d4ee642c6528e88224f12409a5f23060994eschrock * NOTE: For a vfsops structure created by vfs_setfsops(), use
99653d4ee642c6528e88224f12409a5f23060994eschrock * vfs_freevfsops_by_type().
b1b8ab34de515a5e83206da22c3d7e563241b021lling /* Verify that fstype refers to a loaded fs (and not fsid 0). */
99653d4ee642c6528e88224f12409a5f23060994eschrock/* Support routines used to reference vfs_op */
99653d4ee642c6528e88224f12409a5f23060994eschrock/* Set the operations vector for a vfs */
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* Retrieve the operations vector for a vfs */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (vfsp->vfs_femhead == NULL && op == vfsp->vfs_op) {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Returns non-zero (1) if the vfsops matches that of the vfs.
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Returns zero (0) if not.
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Returns non-zero (1) if the file system has installed a non-default,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * non-error vfs_sync routine. Returns zero (0) otherwise.
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* vfs_sync() routine is not the default/error function */
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Initialize a vfs structure.
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* Other initialization has been moved to vfs_alloc() */
fa9e4066f08beec538e775443c5be79dd423fcabahrens sema_init(&vfsp->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Allocate and initialize the vfs implementation private data
fa9e4066f08beec538e775443c5be79dd423fcabahrens * structure, vfs_impl_t.
5c7098917783942b65876f681a21342761227dadeschrock vfsp->vfs_implp = kmem_alloc(sizeof (vfs_impl_t), KM_SLEEP);
5c7098917783942b65876f681a21342761227dadeschrock /* Note that these are #define'd in vfs.h */
5c7098917783942b65876f681a21342761227dadeschrock /* Set size of counted array, then zero the array */
5c7098917783942b65876f681a21342761227dadeschrock * Release the vfs_impl_t structure, if it exists. Some unbundled
5c7098917783942b65876f681a21342761227dadeschrock * filesystems may not use the newer version of vfs and thus
fa9e4066f08beec538e775443c5be79dd423fcabahrens * would not contain this implementation private data structure.
99653d4ee642c6528e88224f12409a5f23060994eschrock * VFS system calls: mount, umount, syssync, statfs, fstatfs, statvfs,
99653d4ee642c6528e88224f12409a5f23060994eschrock * fstatvfs, and sysfs moved to common/syscall.
99653d4ee642c6528e88224f12409a5f23060994eschrock * Update every mounted file system. We call the vfs_sync operation of
99653d4ee642c6528e88224f12409a5f23060994eschrock * each file system type, passing it a NULL vfsp to indicate that all
99653d4ee642c6528e88224f12409a5f23060994eschrock * mounted file systems of that type should be updated.
99653d4ee642c6528e88224f12409a5f23060994eschrock for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
069f55e237020c4a4907b235fc38fafc6442ce94Eric Schrock * External routines.
99653d4ee642c6528e88224f12409a5f23060994eschrock * Lock for accessing the vfs linked list. Initialized in vfs_mountroot(),
99653d4ee642c6528e88224f12409a5f23060994eschrock * but otherwise should be accessed only via vfs_list_lock() and
99653d4ee642c6528e88224f12409a5f23060994eschrock * vfs_list_unlock(). Also used to protect the timestamp for mods to the list.
99653d4ee642c6528e88224f12409a5f23060994eschrock * Mount devfs on /devices. This is done right after root is mounted
99653d4ee642c6528e88224f12409a5f23060994eschrock * to provide device access support for the system
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris struct mounta mounta = { /* fake mounta for devfs_mount() */
5aba80db367b061758a29154d304977d00d8e4f4ck * _init devfs module to fill in the vfssw
5aba80db367b061758a29154d304977d00d8e4f4ck * Hold vfs
5aba80db367b061758a29154d304977d00d8e4f4ck * Locate mount point
5aba80db367b061758a29154d304977d00d8e4f4ck if (lookupname("/devices", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp))
5aba80db367b061758a29154d304977d00d8e4f4ck * Perform the mount of /devices
5aba80db367b061758a29154d304977d00d8e4f4ck * Set appropriate members and add to vfs list for mnttab display
5aba80db367b061758a29154d304977d00d8e4f4ck * Hold the root of /devices so it won't go away
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /devices");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock cmn_err(CE_NOTE, "Cannot acquire vfswlock of /devices");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * mount the first instance of /dev to root and remain mounted
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock struct mounta mounta = { /* fake mounta for sdev_mount() */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * _init dev module to fill in the vfssw
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Locate mount point
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock if (lookupname("/dev", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp))
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Perform the mount of /dev
990b4856d0eaada6f8140335733a1b1771ed2746lling * Set appropriate members and add to vfs list for mnttab display
990b4856d0eaada6f8140335733a1b1771ed2746lling * Hold the root of /dev so it won't go away
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /dev");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock cmn_err(CE_NOTE, "Cannot acquire vfswlock of /dev");
990b4856d0eaada6f8140335733a1b1771ed2746lling * Mount required filesystem. This is done right after root is mounted.
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling * vfs_mountroot is called by main() to mount the root filesystem.
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante * Alloc the vfs hash bucket array and locks
b1b8ab34de515a5e83206da22c3d7e563241b021lling rvfs_list = kmem_zalloc(vfshsz * sizeof (rvfs_t), KM_SLEEP);
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Call machine-dependent routine "rootconf" to choose a root
b1b8ab34de515a5e83206da22c3d7e563241b021lling * file system type.
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Get vnode for '/'. Set up rootdir, u.u_rdir and u.u_cdir
b1b8ab34de515a5e83206da22c3d7e563241b021lling * to point to it. These are used by lookuppn() so that it
990b4856d0eaada6f8140335733a1b1771ed2746lling * knows where to start from ('/' or '.').
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Setup the global zone's rootvp, now that it exists.
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante * Notify the module code that it can begin using the
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante * root filesystem instead of the boot program's services.
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Special handling for a ZFS root file system.
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Set up mnttab information for root
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Notify cluster software that the root filesystem is available.
b1b8ab34de515a5e83206da22c3d7e563241b021lling /* Now that we're all done with the root FS, set up its vopstats */
b1b8ab34de515a5e83206da22c3d7e563241b021lling if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) != NULL) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling /* Set flag for statistics collection */
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Mount /devices, /dev instance 1, /system/contract, /etc/mnttab,
b1b8ab34de515a5e83206da22c3d7e563241b021lling * /etc/svc/volatile, /etc/dfs/sharetab, /system/object, and /proc.
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile");
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mountfs("sharefs", "sharefs", "/etc/dfs/sharetab");
b1b8ab34de515a5e83206da22c3d7e563241b021lling * This bit of magic can go away when we convert sparc to
b1b8ab34de515a5e83206da22c3d7e563241b021lling * the new boot architecture based on ramdisk.
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Booting off a mirrored root volume:
b1b8ab34de515a5e83206da22c3d7e563241b021lling * At this point, we have booted and mounted root on a
b1b8ab34de515a5e83206da22c3d7e563241b021lling * single component of the mirror. Complete the boot
b1b8ab34de515a5e83206da22c3d7e563241b021lling * by configuring SVM and converting the root to the
990b4856d0eaada6f8140335733a1b1771ed2746lling * dev_t of the mirrored root device. This dev_t conversion
990b4856d0eaada6f8140335733a1b1771ed2746lling * only works because the underlying device doesn't change.
b1b8ab34de515a5e83206da22c3d7e563241b021lling * mnttab should reflect the new root device
990b4856d0eaada6f8140335733a1b1771ed2746lling#endif /* __sparc */
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Look up the root device via devfs so that a dv_node is
b1b8ab34de515a5e83206da22c3d7e563241b021lling * created for it. The vnode is never VN_RELE()ed.
b1b8ab34de515a5e83206da22c3d7e563241b021lling * We allocate more than MAXPATHLEN so that the
b1b8ab34de515a5e83206da22c3d7e563241b021lling * buffer passed to i_ddi_prompath_to_devfspath() is
b1b8ab34de515a5e83206da22c3d7e563241b021lling * exactly MAXPATHLEN (the function expects a buffer
b1b8ab34de515a5e83206da22c3d7e563241b021lling * of that length).
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen)
b1b8ab34de515a5e83206da22c3d7e563241b021lling lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling /* NUL terminate in case "path" has garbage */
b1b8ab34de515a5e83206da22c3d7e563241b021lling cmn_err(CE_WARN, "!Cannot lookup root device: %s", path);
b1b8ab34de515a5e83206da22c3d7e563241b021lling * If remount failed and we're in a zone we need to check for the zone
990b4856d0eaada6f8140335733a1b1771ed2746lling * root path and strip it before the call to vfs_setpath().
b1b8ab34de515a5e83206da22c3d7e563241b021lling * If strpath doesn't begin with the zone_rootpath the original
b1b8ab34de515a5e83206da22c3d7e563241b021lling * strpath is returned unchanged.
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic const char *
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (zonep->zone_rootpath == NULL || strpath == NULL) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling * we check for the end of the string at one past the
b1b8ab34de515a5e83206da22c3d7e563241b021lling * current position because the zone_rootpath always
b1b8ab34de515a5e83206da22c3d7e563241b021lling * ends with "/" but we don't want to strip that off.
b1b8ab34de515a5e83206da22c3d7e563241b021lling return ((char *)strpath);
b1b8ab34de515a5e83206da22c3d7e563241b021lling return (&str2[i]);
990b4856d0eaada6f8140335733a1b1771ed2746lling * Check to see if our "block device" is actually a file. If so,
990b4856d0eaada6f8140335733a1b1771ed2746lling * automatically add a lofi device, and keep track of this fact.
990b4856d0eaada6f8140335733a1b1771ed2746lling return (0);
990b4856d0eaada6f8140335733a1b1771ed2746lling return (0);
990b4856d0eaada6f8140335733a1b1771ed2746lling return (0);
990b4856d0eaada6f8140335733a1b1771ed2746lling if (lookupname(uap->spec, fromspace, FOLLOW, NULL, &vp) != 0)
990b4856d0eaada6f8140335733a1b1771ed2746lling /* OK, this is a lofi mount. */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks vfs_optionisset_nolock(mntopts, MNTOPT_SETUID, NULL) ||
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_optionisset_nolock(mntopts, MNTOPT_DEVICES, NULL)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling (void) strlcpy(li->li_filename, pn.pn_path, MAXPATHLEN + 1);
069f55e237020c4a4907b235fc38fafc6442ce94Eric Schrock * The lofi control node is currently exclusive-open. We'd like
990b4856d0eaada6f8140335733a1b1771ed2746lling * to improve this, but in the meantime, we'll loop waiting for
990b4856d0eaada6f8140335733a1b1771ed2746lling err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL,
990b4856d0eaada6f8140335733a1b1771ed2746lling (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred);
990b4856d0eaada6f8140335733a1b1771ed2746lling err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL,
990b4856d0eaada6f8140335733a1b1771ed2746lling err = ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE_MINOR, (intptr_t)li,
990b4856d0eaada6f8140335733a1b1771ed2746lling (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred);
990b4856d0eaada6f8140335733a1b1771ed2746lling * Common mount code. Called from the system call entry point, from autofs,
990b4856d0eaada6f8140335733a1b1771ed2746lling * nfsv4 trigger mounts, and from pxfs.
990b4856d0eaada6f8140335733a1b1771ed2746lling * Takes the effective file system type, mount arguments, the mount point
990b4856d0eaada6f8140335733a1b1771ed2746lling * vnode, flags specifying whether the mount is a remount and whether it
990b4856d0eaada6f8140335733a1b1771ed2746lling * should be entered into the vfs list, and credentials. Fills in its vfspp
990b4856d0eaada6f8140335733a1b1771ed2746lling * parameter with the mounted file system instance's vfs.
990b4856d0eaada6f8140335733a1b1771ed2746lling * Note that the effective file system type is specified as a string. It may
990b4856d0eaada6f8140335733a1b1771ed2746lling * be null, in which case it's determined from the mount arguments, and may
990b4856d0eaada6f8140335733a1b1771ed2746lling * differ from the type specified in the mount arguments; this is a hook to
990b4856d0eaada6f8140335733a1b1771ed2746lling * allow interposition when instantiating file system instances.
990b4856d0eaada6f8140335733a1b1771ed2746lling * The caller is responsible for releasing its own hold on the mount point
990b4856d0eaada6f8140335733a1b1771ed2746lling * vp (this routine does its own hold when necessary).
990b4856d0eaada6f8140335733a1b1771ed2746lling * Also note that for remounts, the mount point vp should be the vnode for
990b4856d0eaada6f8140335733a1b1771ed2746lling * the root of the file system rather than the vnode that the file system
990b4856d0eaada6f8140335733a1b1771ed2746lling * is mounted on top of.
990b4856d0eaada6f8140335733a1b1771ed2746llingdomount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
a9799022bd90b13722204e80112efaa5bf573099ck * The v_flag value for the mount point vp is permanently set
990b4856d0eaada6f8140335733a1b1771ed2746lling * to VVFSLOCK so that no one bypasses the vn_vfs*locks routine
990b4856d0eaada6f8140335733a1b1771ed2746lling * for mount point locking.
990b4856d0eaada6f8140335733a1b1771ed2746lling * Find the ops vector to use to invoke the file system-specific mount
990b4856d0eaada6f8140335733a1b1771ed2746lling * method. If the fsname argument is non-NULL, use it directly.
a9799022bd90b13722204e80112efaa5bf573099ck * Otherwise, dig the file system type information out of the mount
a9799022bd90b13722204e80112efaa5bf573099ck * arguments.
990b4856d0eaada6f8140335733a1b1771ed2746lling * A side effect is to hold the vfssw entry.
990b4856d0eaada6f8140335733a1b1771ed2746lling * Mount arguments can be specified in several ways, which are
990b4856d0eaada6f8140335733a1b1771ed2746lling * distinguished by flag bit settings. The preferred way is to set
990b4856d0eaada6f8140335733a1b1771ed2746lling * MS_OPTIONSTR, indicating an 8 argument mount with the file system
990b4856d0eaada6f8140335733a1b1771ed2746lling * type supplied as a character string and the last two arguments
990b4856d0eaada6f8140335733a1b1771ed2746lling * being a pointer to a character buffer and the size of the buffer.
990b4856d0eaada6f8140335733a1b1771ed2746lling * On entry, the buffer holds a null terminated list of options; on
990b4856d0eaada6f8140335733a1b1771ed2746lling * return, the string is the list of options the file system
990b4856d0eaada6f8140335733a1b1771ed2746lling * recognized. If MS_DATA is set arguments five and six point to a
990b4856d0eaada6f8140335733a1b1771ed2746lling * block of binary data which the file system interprets.
990b4856d0eaada6f8140335733a1b1771ed2746lling * A further wrinkle is that some callers don't set MS_FSS and MS_DATA
990b4856d0eaada6f8140335733a1b1771ed2746lling * consistently with these conventions. To handle them, we check to
990b4856d0eaada6f8140335733a1b1771ed2746lling * see whether the pointer to the file system name has a numeric value
990b4856d0eaada6f8140335733a1b1771ed2746lling * less than 256. If so, we treat it as an index.
990b4856d0eaada6f8140335733a1b1771ed2746lling } else if (uap->flags & (MS_OPTIONSTR | MS_DATA | MS_FSS)) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens (void) strcpy(fsname, vfssw[fstype].vsw_name);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * Handle either kernel or user address space.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) == NULL)
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * Fetch mount options and parse them for generic vfs options
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * Limit the buffer size
990b4856d0eaada6f8140335733a1b1771ed2746lling * Flag bits override the options string.
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_REMOUNT, NULL, 0, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_RO, NULL, 0, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling * Check if this is a remount; must be set in the option string and
990b4856d0eaada6f8140335733a1b1771ed2746lling * the file system must support a remount option.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * uap->flags and vfs_optionisset() should agree.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (rdonly = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_RO, NULL)) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling nbmand = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NBMAND, NULL);
990b4856d0eaada6f8140335733a1b1771ed2746lling * If we are splicing the fs into the namespace,
990b4856d0eaada6f8140335733a1b1771ed2746lling * perform mount point checks.
990b4856d0eaada6f8140335733a1b1771ed2746lling * We want to resolve the path for the mount point to eliminate
990b4856d0eaada6f8140335733a1b1771ed2746lling * '.' and ".." and symlinks in mount points; we can't do the
990b4856d0eaada6f8140335733a1b1771ed2746lling * same for the resource string, since it would turn
990b4856d0eaada6f8140335733a1b1771ed2746lling * "/dev/dsk/c0t0d0s0" into "/devices/pci@...". We need to do
990b4856d0eaada6f8140335733a1b1771ed2746lling * this before grabbing vn_vfswlock(), because otherwise we
990b4856d0eaada6f8140335733a1b1771ed2746lling * would deadlock with lookuppn().
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * Pick up mount point and device from appropriate space.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (pn_get(uap->spec, fromspace, &pn) == 0) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * Do a lookupname prior to taking the
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * writelock. Mark this as completed if
990b4856d0eaada6f8140335733a1b1771ed2746lling * successful for later cleanup and addition to
990b4856d0eaada6f8140335733a1b1771ed2746lling * the mount in progress table.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if ((error = pn_get(uap->dir, fromspace, &pn)) == 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling * Kludge to prevent autofs from deadlocking with
990b4856d0eaada6f8140335733a1b1771ed2746lling * itself when it calls domount().
990b4856d0eaada6f8140335733a1b1771ed2746lling * If autofs is calling, it is because it is doing
990b4856d0eaada6f8140335733a1b1771ed2746lling * (autofs) mounts in the process of an NFS mount. A
990b4856d0eaada6f8140335733a1b1771ed2746lling * lookuppn() here would cause us to block waiting for
990b4856d0eaada6f8140335733a1b1771ed2746lling * said NFS mount to complete, which can't since this
990b4856d0eaada6f8140335733a1b1771ed2746lling * is the thread that was supposed to doing it.
990b4856d0eaada6f8140335733a1b1771ed2746lling * The file disappeared or otherwise
990b4856d0eaada6f8140335733a1b1771ed2746lling * became inaccessible since we opened
990b4856d0eaada6f8140335733a1b1771ed2746lling * it; might as well fail the mount
990b4856d0eaada6f8140335733a1b1771ed2746lling * since the mount point is no longer
990b4856d0eaada6f8140335733a1b1771ed2746lling * accessible.
990b4856d0eaada6f8140335733a1b1771ed2746lling * If the addition of the zone's rootpath
990b4856d0eaada6f8140335733a1b1771ed2746lling * would push us over a total path length
990b4856d0eaada6f8140335733a1b1771ed2746lling * of MAXPATHLEN, we fail the mount with
990b4856d0eaada6f8140335733a1b1771ed2746lling * ENAMETOOLONG, which is what we would have
990b4856d0eaada6f8140335733a1b1771ed2746lling * gotten if we were trying to perform the same
990b4856d0eaada6f8140335733a1b1771ed2746lling * mount in the global zone.
990b4856d0eaada6f8140335733a1b1771ed2746lling * strlen() doesn't count the trailing
990b4856d0eaada6f8140335733a1b1771ed2746lling * '\0', but zone_rootpathlen counts both a
990b4856d0eaada6f8140335733a1b1771ed2746lling * trailing '/' and the terminating '\0'.
990b4856d0eaada6f8140335733a1b1771ed2746lling * Prevent path name resolution from proceeding past
990b4856d0eaada6f8140335733a1b1771ed2746lling * the mount point.
990b4856d0eaada6f8140335733a1b1771ed2746lling * Verify that it's legitimate to establish a mount on
990b4856d0eaada6f8140335733a1b1771ed2746lling * the prospective mount point.
990b4856d0eaada6f8140335733a1b1771ed2746lling * The mount point lock was obtained after some
990b4856d0eaada6f8140335733a1b1771ed2746lling * other thread raced through and established a mount.
goto errout;
if (remount) {
goto errout;
goto errout;
goto errout;
if (!remount) {
if (splice)
goto errout;
if (!remount) {
if (splice)
goto errout;
if (!addmip) {
if (addmip) {
if (error > 0) {
if (splice)
if (remount) {
goto errout;
addmip = 0;
if (splice)
&mnt_mntopts);
if (remount) {
if (error) {
if (remount) {
if (oldmntpt)
if (oldresource)
if (remount) {
if (oldresource)
if (oldmntpt)
} else if (splice) {
if (copyout_error == 0 &&
if (splice)
if (!remount) {
if (addmip)
if (delmip)
if (copyout_error) {
return (error);
char *sp;
int have_list_lock = 0;
goto out;
out:
if (have_list_lock) {
refstr_t *
return (resource);
refstr_t *
return (mntpt);
const char *s = opts;
count = 0;
count++;
char **result;
if (i + extend == 0)
return (NULL);
for (; j <= i + extend; j++)
return (result);
for (i = 0; i < count; i++) {
int c1 = 0;
int c2 = 0;
char **result;
if (c1 == 0)
if (c2 == 0)
return (result);
return (result);
count++;
char **newcanp;
for (i = 0; i < count; i++) {
if (update_mnttab)
int gotlock = 0;
if (gotlock)
* VFS_NOFORCEOPT bit is set in the flags. Also, VFS_DISPLAY/VFS_NODISPLAY flag
char *sp;
for (i = 0; i < count; i++) {
char **cp;
if (update_mnttab)
int gotlock = 0;
if (gotlock)
static mntopt_t *
return (mop);
int found = 0;
int err = 0;
if (!found) {
goto out;
if (err != 0)
goto out;
int len;
goto out;
goto out;
out:
return (err);
int found = 0;
int err = 0;
if (!found) {
goto out;
if (err != 0)
goto out;
goto out;
goto out;
out:
return (err);
if (p == NULL) {
p = s + strlen(s);
nextop = p;
if (p == NULL) {
valp = p;
if (create)
s = nextop;
struct mntopt *
for (i = 0; i < count; i++) {
return (mop);
return (NULL);
for (i = 0; i < count; i++) {
int ret;
return (ret);
char *cp;
uint_t i;
goto err;
if (comma)
int arglen;
goto err;
err:
return (EOVERFLOW);
int ccnt = 0;
char **cp;
ccnt++;
for (i = 0; i < count; i++) {
if (count) {
vfs_mnttabvp_setup(void)
&vfs_mntdummyvnops) != 0) {
if (rw) {
vfs_mnttab_writeop(void)
vfs_mnttab_readop(void)
vfs_mnttab_rwop(0);
int changed;
if (!changed) {
newhrt++;
int error;
return (error);
vfs_unmountall(void)
int error;
dnlc_purge();
if (error)
int newflag;
return (EBUSY);
return (EBUSY);
if (panicstr)
* vfs_rlock_wait/vfs_wlock_wait/vfs_wlock etc,.
int held;
if (panicstr)
return (held);
struct _kthread *
if (panicstr)
return (NULL);
return (owner);
int vhno;
if (insert_at_head) {
int vhno;
goto foundit;
goto foundit;
struct vfs *
return (vfsp);
return (NULL);
int retval = 0;
return (retval);
int found;
found = 0;
return (found);
struct vfs *
int found;
found = 0;
struct vfs *
const char *mntpt;
if (retvfsp)
return (retvfsp);
int found;
found = 0;
return (found);
struct vfssw *
return (NULL);
return (vswp);
return (NULL);
return (vfstype);
struct vfssw *
const char *modname;
RLOCK_VFSSW();
WLOCK_VFSSW();
return (NULL);
RLOCK_VFSSW();
return (vswp);
return (NULL);
RLOCK_VFSSW();
return (vswp);
struct vfssw *
return (NULL);
return (vswp);
return (NULL);
struct vfssw *
RLOCK_VFSSW();
return (vswp);
return (NULL);
* the SYNC phase of the panic code (see comments in panic.c). It should only
vfs_syncall(void)
sync();
while (sync_triesleft > 0) {
if (new_bufcnt)
if (new_pgcnt)
sync_timeleft = 0;
vfs_syncprogress(void)
if (panicstr)
return (stf);
vfsstray(void)
vfs_EIO(void)
return (EIO);
return (EIO);
vfsinit(void)
int error;
extern int vopstats_enabled;
extern void vopstats_startup();
fem_init();
if (error != 0) {
RLOCK_VFSSW();
if (vopstats_enabled) {
xattr_init();
vfs_t *
return (vfsp);
const fs_operation_trans_def_t *p;
num_trans++, p++)
const fs_operation_def_t *p;
num_ops++, p++)
used = 0;
for (i = 0; i < num_trans; i++) {
int j, found;
char *curname;
found = 0;
for (j = 0; j < num_ops; j++) {
used++;
if (found) {
return (EINVAL);
fs_error(void)
fs_default(void)
#ifdef __sparc
#if defined(__x86)
extern int hvmboot_rootconf();
rootconf()
int error;
extern void pm_init();
#if defined(__x86)
return (error);
return (error);
RLOCK_VFSSW();
return (ENXIO);
pm_init();
if (netboot)
(void) strplumb();
if (error)
return (error);
* convention that the NFS V2 filesystem name is "nfs" (see vfs_conf.c)
extern char *strplumb_get_netdev_path(void);
== DDI_SUCCESS) {
== DDI_SUCCESS) {
++netboot;
== DDI_SUCCESS) {
int ret = 0;
return (ret);
return (ret);
int strsize;
int err;
if (err)
return (err);