common/fs/vfs.c

	vfs.c revision 56c0b1dab35897a2c09f93b2a70746ba47df7523
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER START
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The contents of this file are subject to the terms of the
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * Common Development and Distribution License (the "License").
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * You may not use this file except in compliance with the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * or http://www.opensolaris.org/os/licensing.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * See the License for the specific language governing permissions
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and limitations under the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When distributing Covered Code, include this CDDL HEADER in each
fa9e4066f08beec538e775443c5be79dd423fcabahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If applicable, add the following below this CDDL HEADER, with the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * fields enclosed by brackets "[]" replaced with your own identifying
fa9e4066f08beec538e775443c5be79dd423fcabahrens * information: Portions Copyright [yyyy] [name of copyright owner]
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER END
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
b2634b9c57bbcfa01bb5dec2e196aec32957925fEric Taylor * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Use is subject to license terms.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*  Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*    All Rights Reserved   */
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * University Copyright- Copyright (c) 1982, 1986, 1988
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The Regents of the University of California
fa9e4066f08beec538e775443c5be79dd423fcabahrens * All Rights Reserved
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * University Acknowledgment- Portions of this document are derived from
fa9e4066f08beec538e775443c5be79dd423fcabahrens * software developed by the University of California, Berkeley, and its
fa9e4066f08beec538e775443c5be79dd423fcabahrens * contributors.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/types.h>
5aba80db367b061758a29154d304977d00d8e4f4ck#include <sys/t_lock.h>
5aba80db367b061758a29154d304977d00d8e4f4ck#include <sys/param.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/errno.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/user.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/fstyp.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/kmem.h>
91ebeef555ce7f899b6270a3c2df47b51f7ad59aahrens#include <sys/systm.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/proc.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/mount.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/vfs.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/vfs_opreg.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/fem.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/mntent.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/stat.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/statvfs.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/statfs.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/cred.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/vnode.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/rwstlock.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/dnlc.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/file.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/time.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/atomic.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/cmn_err.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/buf.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/swap.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/debug.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/vnode.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/modctl.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/ddi.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/pathname.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/bootconf.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/dumphdr.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/dc_ki.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/poll.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/sunddi.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/sysmacros.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/zone.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/policy.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/ctfs.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/objfs.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/console.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/reboot.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/attr.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/spa.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/lofi.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <vm/page.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <fs/fs_subr.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/* Private interfaces to create vopstats-related data structures */
99653d4ee642c6528e88224f12409a5f23060994eschrockextern void     initialize_vopstats(vopstats_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockextern vopstats_t   *get_fstype_vopstats(struct vfs *, struct vfssw *);
99653d4ee642c6528e88224f12409a5f23060994eschrockextern vsk_anchor_t *get_vskstat_anchor(struct vfs *);
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_clearmntopt_nolock(mntopts_t *, const char *, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_setmntopt_nolock(mntopts_t *, const char *,
99653d4ee642c6528e88224f12409a5f23060994eschrock    const char *, int, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic int  vfs_optionisset_nolock(const mntopts_t *, const char *, char **);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_freemnttab(struct vfs *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_freeopt(mntopt_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_swapopttbl(mntopts_t *, mntopts_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_createopttbl_extend(mntopts_t *, const char *,
99653d4ee642c6528e88224f12409a5f23060994eschrock    const mntopts_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic char **vfs_copycancelopt_extend(char **const, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_freecancelopt(char **);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void getrootfs(char **, char **);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic int getmacpath(dev_info_t *, void *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_mnttabvp_setup(void);
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockstruct ipmnt {
99653d4ee642c6528e88224f12409a5f23060994eschrock    struct ipmnt    *mip_next;
99653d4ee642c6528e88224f12409a5f23060994eschrock    dev_t       mip_dev;
99653d4ee642c6528e88224f12409a5f23060994eschrock    struct vfs  *mip_vfsp;
99653d4ee642c6528e88224f12409a5f23060994eschrock};
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic kmutex_t     vfs_miplist_mutex;
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic struct ipmnt *vfs_miplist = NULL;
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic struct ipmnt *vfs_miplist_end = NULL;
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic kmem_cache_t *vfs_cache; /* Pointer to VFS kmem cache */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * VFS global data.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockvnode_t *rootdir;       /* pointer to root inode vnode. */
99653d4ee642c6528e88224f12409a5f23060994eschrockvnode_t *devicesdir;        /* pointer to inode of devices root */
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlvnode_t *devdir;        /* pointer to inode of dev root */
99653d4ee642c6528e88224f12409a5f23060994eschrock
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlchar *server_rootpath;      /* root path for diskless clients */
99653d4ee642c6528e88224f12409a5f23060994eschrockchar *server_hostname;      /* hostname of diskless server */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic struct vfs root;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic struct vfs devices;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic struct vfs dev;
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksstruct vfs *rootvfs = &root;    /* pointer to root vfs; head of VFS list. */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksrvfs_t *rvfs_list;      /* array of vfs ptrs for vfs hash list */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksint vfshsz = 512;       /* # of heads/locks in vfs hash arrays */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks                /* must be power of 2!  */
99653d4ee642c6528e88224f12409a5f23060994eschrocktimespec_t vfs_mnttab_ctime;    /* mnttab created time */
99653d4ee642c6528e88224f12409a5f23060994eschrocktimespec_t vfs_mnttab_mtime;    /* mnttab last modified time */
99653d4ee642c6528e88224f12409a5f23060994eschrockchar *vfs_dummyfstype = "\0";
99653d4ee642c6528e88224f12409a5f23060994eschrockstruct pollhead vfs_pollhd; /* for mnttab pollers */
99653d4ee642c6528e88224f12409a5f23060994eschrockstruct vnode *vfs_mntdummyvp;   /* to fake mnttab read/write for file events */
99653d4ee642c6528e88224f12409a5f23060994eschrockint mntfstype;      /* will be set once mnt fs is mounted */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Table for generic options recognized in the VFS layer and acted
99653d4ee642c6528e88224f12409a5f23060994eschrock * on at this level before parsing file system specific options.
99653d4ee642c6528e88224f12409a5f23060994eschrock * The nosuid option is stronger than any of the devices and setuid
99653d4ee642c6528e88224f12409a5f23060994eschrock * options, so those are canceled when nosuid is seen.
99653d4ee642c6528e88224f12409a5f23060994eschrock *
3bb79bece53191f2cf27aa61a72ea1784a7ce700eschrock * All options which are added here need to be added to the
3bb79bece53191f2cf27aa61a72ea1784a7ce700eschrock * list of standard options in usr/src/cmd/fs.d/fslib.c as well.
06eeb2ad640ce72d394ac521094bed7681044408ek */
06eeb2ad640ce72d394ac521094bed7681044408ek/*
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahl * VFS Mount options table
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahl */
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlstatic char *ro_cancel[] = { MNTOPT_RW, NULL };
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlstatic char *rw_cancel[] = { MNTOPT_RO, NULL };
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlstatic char *suid_cancel[] = { MNTOPT_NOSUID, NULL };
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlstatic char *nosuid_cancel[] = { MNTOPT_SUID, MNTOPT_DEVICES, MNTOPT_NODEVICES,
b1b8ab34de515a5e83206da22c3d7e563241b021lling    MNTOPT_NOSETUID, MNTOPT_SETUID, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *devices_cancel[] = { MNTOPT_NODEVICES, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nodevices_cancel[] = { MNTOPT_DEVICES, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *setuid_cancel[] = { MNTOPT_NOSETUID, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nosetuid_cancel[] = { MNTOPT_SETUID, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nbmand_cancel[] = { MNTOPT_NONBMAND, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nonbmand_cancel[] = { MNTOPT_NBMAND, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *exec_cancel[] = { MNTOPT_NOEXEC, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *noexec_cancel[] = { MNTOPT_EXEC, NULL };
b7661ccca92e6bf5160f4d5d2601efaeaa1f5161mmusante
b7661ccca92e6bf5160f4d5d2601efaeaa1f5161mmusantestatic const mntopt_t mntopts[] = {
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor/*
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor *  option name     cancel options      default arg flags
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor */
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor    { MNTOPT_REMOUNT,   NULL,           NULL,
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor        MO_NODISPLAY, (void *)0 },
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor    { MNTOPT_RO,        ro_cancel,      NULL,       0,
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor        (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks    { MNTOPT_RW,        rw_cancel,      NULL,       0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks        (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks    { MNTOPT_SUID,      suid_cancel,        NULL,       0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks        (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks    { MNTOPT_NOSUID,    nosuid_cancel,      NULL,       0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks        (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks    { MNTOPT_DEVICES,   devices_cancel,     NULL,       0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks        (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks    { MNTOPT_NODEVICES, nodevices_cancel,   NULL,       0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks        (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks    { MNTOPT_SETUID,    setuid_cancel,      NULL,       0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks        (void *)0 },
2f8aaab38e6371ad39ed90a1211ba8921acbb4d5eschrock    { MNTOPT_NOSETUID,  nosetuid_cancel,    NULL,       0,
2f8aaab38e6371ad39ed90a1211ba8921acbb4d5eschrock        (void *)0 },
fa94a07fd0519b8abfd871ad8fe60e6bebe1e2bbbrendan    { MNTOPT_NBMAND,    nbmand_cancel,      NULL,       0,
fa94a07fd0519b8abfd871ad8fe60e6bebe1e2bbbrendan        (void *)0 },
e7cbe64f7a72dae5cb44f100db60ca88f3313c65gw    { MNTOPT_NONBMAND,  nonbmand_cancel,    NULL,       0,
e7cbe64f7a72dae5cb44f100db60ca88f3313c65gw        (void *)0 },
e7cbe64f7a72dae5cb44f100db60ca88f3313c65gw    { MNTOPT_EXEC,      exec_cancel,        NULL,       0,
15e6edf145a9c2bb0e0272cf8debe823bb97529bgw        (void *)0 },
15e6edf145a9c2bb0e0272cf8debe823bb97529bgw    { MNTOPT_NOEXEC,    noexec_cancel,      NULL,       0,
15e6edf145a9c2bb0e0272cf8debe823bb97529bgw        (void *)0 },
89a89ebfd7c3b4056afe2c03e959e22824df777dlling};
89a89ebfd7c3b4056afe2c03e959e22824df777dlling
89a89ebfd7c3b4056afe2c03e959e22824df777dllingconst mntopts_t vfs_mntopts = {
e6ca193ded880d478cc39e34ef82d4be36e4445dGeorge Wilson    sizeof (mntopts) / sizeof (mntopt_t),
e6ca193ded880d478cc39e34ef82d4be36e4445dGeorge Wilson    (mntopt_t *)&mntopts[0]
e6ca193ded880d478cc39e34ef82d4be36e4445dGeorge Wilson};
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby/*
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby * File system operation dispatch functions.
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby */
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby
ca45db4129beff691dc46576c328149443788af2Chris Kirbyint
ca45db4129beff691dc46576c328149443788af2Chris Kirbyfsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock    return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
c08432ebe688ed6410b302771a9afd8e23e2a7cceschrock
c08432ebe688ed6410b302771a9afd8e23e2a7cceschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_unmount(vfs_t *vfsp, int flag, cred_t *cr)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock    return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
99653d4ee642c6528e88224f12409a5f23060994eschrockint
fa9e4066f08beec538e775443c5be79dd423fcabahrensfsop_root(vfs_t *vfsp, vnode_t **vpp)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens    refstr_t *mntpt;
fa9e4066f08beec538e775443c5be79dd423fcabahrens    int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens    /*
99653d4ee642c6528e88224f12409a5f23060994eschrock     * Make sure this root has a path.  With lofs, it is possible to have
99653d4ee642c6528e88224f12409a5f23060994eschrock     * a NULL mountpoint.
99653d4ee642c6528e88224f12409a5f23060994eschrock     */
99653d4ee642c6528e88224f12409a5f23060994eschrock    if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) {
99653d4ee642c6528e88224f12409a5f23060994eschrock        mntpt = vfs_getmntpoint(vfsp);
99653d4ee642c6528e88224f12409a5f23060994eschrock        vn_setpath_str(*vpp, refstr_value(mntpt),
99653d4ee642c6528e88224f12409a5f23060994eschrock            strlen(refstr_value(mntpt)));
99653d4ee642c6528e88224f12409a5f23060994eschrock        refstr_rele(mntpt);
99653d4ee642c6528e88224f12409a5f23060994eschrock    }
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    return (ret);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_statfs(vfs_t *vfsp, statvfs64_t *sp)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock    return (*(vfsp)->vfs_op->vfs_statvfs)(vfsp, sp);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_sync(vfs_t *vfsp, short flag, cred_t *cr)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock    return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
b1b8ab34de515a5e83206da22c3d7e563241b021lling{
99653d4ee642c6528e88224f12409a5f23060994eschrock    /*
99653d4ee642c6528e88224f12409a5f23060994eschrock     * In order to handle system attribute fids in a manner
fa9e4066f08beec538e775443c5be79dd423fcabahrens     * transparent to the underlying fs, we embed the fid for
99653d4ee642c6528e88224f12409a5f23060994eschrock     * the sysattr parent object in the sysattr fid and tack on
99653d4ee642c6528e88224f12409a5f23060994eschrock     * some extra bytes that only the sysattr layer knows about.
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling     *
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling     * This guarantees that sysattr fids are larger than other fids
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling     * for this vfs. If the vfs supports sysattrs (implied
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling     * by VFSFT_XVATTR support), we cannot have a size collision
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling     * with XATTR_FIDSZ.
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling     */
99653d4ee642c6528e88224f12409a5f23060994eschrock    if (vfs_has_feature(vfsp, VFSFT_XVATTR) &&
99653d4ee642c6528e88224f12409a5f23060994eschrock        fidp->fid_len == XATTR_FIDSZ)
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling        return (xattr_dir_vget(vfsp, vpp, fidp));
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_mountroot(vfs_t *vfsp, enum whymountroot reason)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens    return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensfsop_freefs(vfs_t *vfsp)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock    (*(vfsp)->vfs_op->vfs_freevfs)(vfsp);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock    return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate));
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksint
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksfsop_sync_by_kind(int fstype, short flag, cred_t *cr)
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks{
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks    ASSERT((fstype >= 0) && (fstype < nfstype));
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    if (ALLOCATED_VFSSW(&vfssw[fstype]) && VFS_INSTALLED(&vfssw[fstype]))
99653d4ee642c6528e88224f12409a5f23060994eschrock        return (*vfssw[fstype].vsw_vfsops.vfs_sync) (NULL, flag, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock    else
99653d4ee642c6528e88224f12409a5f23060994eschrock        return (ENOTSUP);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * File system initialization.  vfs_setfsops() must be called from a file
99653d4ee642c6528e88224f12409a5f23060994eschrock * system's init routine.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrock
ece3d9b3bacef51a5f34d993935eedbb7bb87059llingstatic int
ece3d9b3bacef51a5f34d993935eedbb7bb87059llingfs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual,
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling    int *unused_ops)
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling{
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling    static const fs_operation_trans_def_t vfs_ops_table[] = {
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling        VFSNAME_MOUNT, offsetof(vfsops_t, vfs_mount),
99653d4ee642c6528e88224f12409a5f23060994eschrock            fs_nosys, fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling        VFSNAME_UNMOUNT, offsetof(vfsops_t, vfs_unmount),
fa9e4066f08beec538e775443c5be79dd423fcabahrens            fs_nosys, fs_nosys,
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens        VFSNAME_ROOT, offsetof(vfsops_t, vfs_root),
fa9e4066f08beec538e775443c5be79dd423fcabahrens            fs_nosys, fs_nosys,
fa9e4066f08beec538e775443c5be79dd423fcabahrens
99653d4ee642c6528e88224f12409a5f23060994eschrock        VFSNAME_STATVFS, offsetof(vfsops_t, vfs_statvfs),
99653d4ee642c6528e88224f12409a5f23060994eschrock            fs_nosys, fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock
fa9e4066f08beec538e775443c5be79dd423fcabahrens        VFSNAME_SYNC, offsetof(vfsops_t, vfs_sync),
fa9e4066f08beec538e775443c5be79dd423fcabahrens            (fs_generic_func_p) fs_sync,
99653d4ee642c6528e88224f12409a5f23060994eschrock            (fs_generic_func_p) fs_sync,    /* No errors allowed */
99653d4ee642c6528e88224f12409a5f23060994eschrock
97d9e3a676d96208790c885c4766194423e84b24ck        VFSNAME_VGET, offsetof(vfsops_t, vfs_vget),
99653d4ee642c6528e88224f12409a5f23060994eschrock            fs_nosys, fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock        VFSNAME_MOUNTROOT, offsetof(vfsops_t, vfs_mountroot),
99653d4ee642c6528e88224f12409a5f23060994eschrock            fs_nosys, fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock        VFSNAME_FREEVFS, offsetof(vfsops_t, vfs_freevfs),
99653d4ee642c6528e88224f12409a5f23060994eschrock            (fs_generic_func_p)fs_freevfs,
99653d4ee642c6528e88224f12409a5f23060994eschrock            (fs_generic_func_p)fs_freevfs,  /* Shouldn't fail */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock        VFSNAME_VNSTATE, offsetof(vfsops_t, vfs_vnstate),
99653d4ee642c6528e88224f12409a5f23060994eschrock            (fs_generic_func_p)fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock            (fs_generic_func_p)fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock        NULL, 0, NULL, NULL
99653d4ee642c6528e88224f12409a5f23060994eschrock    };
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    return (fs_build_vector(actual, unused_ops, vfs_ops_table, template));
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrockzfs_boot_init() {
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    if (strcmp(rootfs.bo_fstype, MNTTYPE_ZFS) == 0)
99653d4ee642c6528e88224f12409a5f23060994eschrock        spa_boot_init();
99653d4ee642c6528e88224f12409a5f23060994eschrock}
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksint
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksvfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual)
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks{
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks    int error;
b7661ccca92e6bf5160f4d5d2601efaeaa1f5161mmusante    int unused_ops;
b7661ccca92e6bf5160f4d5d2601efaeaa1f5161mmusante
b7661ccca92e6bf5160f4d5d2601efaeaa1f5161mmusante    /*
40ff3960ec49d8bb707c8b7f0030f2ac0c014033ck     * Verify that fstype refers to a valid fs.  Note that
40ff3960ec49d8bb707c8b7f0030f2ac0c014033ck     * 0 is valid since it's used to set "stray" ops.
40ff3960ec49d8bb707c8b7f0030f2ac0c014033ck     */
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson    if ((fstype < 0) || (fstype >= nfstype))
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson        return (EINVAL);
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson    if (!ALLOCATED_VFSSW(&vfssw[fstype]))
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson        return (EINVAL);
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    /* Set up the operations vector. */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    error = fs_copyfsops(template, &vfssw[fstype].vsw_vfsops, &unused_ops);
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    if (error != 0)
99653d4ee642c6528e88224f12409a5f23060994eschrock        return (error);
99653d4ee642c6528e88224f12409a5f23060994eschrock
fa9e4066f08beec538e775443c5be79dd423fcabahrens    vfssw[fstype].vsw_flag |= VSW_INSTALLED;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling    if (actual != NULL)
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling        *actual = &vfssw[fstype].vsw_vfsops;
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling#if DEBUG
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling    if (unused_ops != 0)
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling        cmn_err(CE_WARN, "vfs_setfsops: %s: %d operations supplied "
99653d4ee642c6528e88224f12409a5f23060994eschrock            "but not used", vfssw[fstype].vsw_name, unused_ops);
99653d4ee642c6528e88224f12409a5f23060994eschrock#endif
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling
fa9e4066f08beec538e775443c5be79dd423fcabahrens    return (0);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_makefsops(const fs_operation_def_t *template, vfsops_t **actual)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock    int error;
99653d4ee642c6528e88224f12409a5f23060994eschrock    int unused_ops;
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    *actual = (vfsops_t *)kmem_alloc(sizeof (vfsops_t), KM_SLEEP);
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    error = fs_copyfsops(template, *actual, &unused_ops);
99653d4ee642c6528e88224f12409a5f23060994eschrock    if (error != 0) {
99653d4ee642c6528e88224f12409a5f23060994eschrock        kmem_free(*actual, sizeof (vfsops_t));
99653d4ee642c6528e88224f12409a5f23060994eschrock        *actual = NULL;
99653d4ee642c6528e88224f12409a5f23060994eschrock        return (error);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    }
b1b8ab34de515a5e83206da22c3d7e563241b021lling
99653d4ee642c6528e88224f12409a5f23060994eschrock    return (0);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Free a vfsops structure created as a result of vfs_makefsops().
99653d4ee642c6528e88224f12409a5f23060994eschrock * NOTE: For a vfsops structure initialized by vfs_setfsops(), use
99653d4ee642c6528e88224f12409a5f23060994eschrock * vfs_freevfsops_by_type().
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_freevfsops(vfsops_t *vfsops)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
43afaaa8b73f73af765f4fa90f39a0f86cb8a364Eric Schrock    kmem_free(vfsops, sizeof (vfsops_t));
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Since the vfsops structure is part of the vfssw table and wasn't
99653d4ee642c6528e88224f12409a5f23060994eschrock * really allocated, we're not really freeing anything.  We keep
99653d4ee642c6528e88224f12409a5f23060994eschrock * the name for consistency with vfs_freevfsops().  We do, however,
99653d4ee642c6528e88224f12409a5f23060994eschrock * need to take care of a little bookkeeping.
99653d4ee642c6528e88224f12409a5f23060994eschrock * NOTE: For a vfsops structure created by vfs_setfsops(), use
99653d4ee642c6528e88224f12409a5f23060994eschrock * vfs_freevfsops_by_type().
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_freevfsops_by_type(int fstype)
b1b8ab34de515a5e83206da22c3d7e563241b021lling{
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /* Verify that fstype refers to a loaded fs (and not fsid 0). */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    if ((fstype <= 0) || (fstype >= nfstype))
b1b8ab34de515a5e83206da22c3d7e563241b021lling        return (EINVAL);
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    WLOCK_VFSSW();
b1b8ab34de515a5e83206da22c3d7e563241b021lling    if ((vfssw[fstype].vsw_flag & VSW_INSTALLED) == 0) {
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks        WUNLOCK_VFSSW();
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks        return (EINVAL);
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks    }
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson    vfssw[fstype].vsw_flag &= ~VSW_INSTALLED;
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson    WUNLOCK_VFSSW();
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson    return (0);
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson}
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
99653d4ee642c6528e88224f12409a5f23060994eschrock/* Support routines used to reference vfs_op */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/* Set the operations vector for a vfs */
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_setops(vfs_t *vfsp, vfsops_t *vfsops)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock    vfsops_t    *op;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens    ASSERT(vfsp != NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens    ASSERT(vfsops != NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens    op = vfsp->vfs_op;
99653d4ee642c6528e88224f12409a5f23060994eschrock    membar_consumer();
99653d4ee642c6528e88224f12409a5f23060994eschrock    if (vfsp->vfs_femhead == NULL &&
fa9e4066f08beec538e775443c5be79dd423fcabahrens        casptr(&vfsp->vfs_op, op, vfsops) == op) {
99653d4ee642c6528e88224f12409a5f23060994eschrock        return;
fa9e4066f08beec538e775443c5be79dd423fcabahrens    }
fa9e4066f08beec538e775443c5be79dd423fcabahrens    fsem_setvfsops(vfsp, vfsops);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* Retrieve the operations vector for a vfs */
fa9e4066f08beec538e775443c5be79dd423fcabahrensvfsops_t *
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_getops(vfs_t *vfsp)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens    vfsops_t    *op;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens    ASSERT(vfsp != NULL);
99653d4ee642c6528e88224f12409a5f23060994eschrock
fa9e4066f08beec538e775443c5be79dd423fcabahrens    op = vfsp->vfs_op;
fa9e4066f08beec538e775443c5be79dd423fcabahrens    membar_consumer();
fa9e4066f08beec538e775443c5be79dd423fcabahrens    if (vfsp->vfs_femhead == NULL && op == vfsp->vfs_op) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens        return (op);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    } else {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        return (fsem_getvfsops(vfsp));
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    }
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock}
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock/*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Returns non-zero (1) if the vfsops matches that of the vfs.
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Returns zero (0) if not.
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrockint
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrockvfs_matchops(vfs_t *vfsp, vfsops_t *vfsops)
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock{
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    return (vfs_getops(vfsp) == vfsops);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock}
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock/*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Returns non-zero (1) if the file system has installed a non-default,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * non-error vfs_sync routine.  Returns zero (0) otherwise.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensint
fa9e4066f08beec538e775443c5be79dd423fcabahrensvfs_can_sync(vfs_t *vfsp)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
fa9e4066f08beec538e775443c5be79dd423fcabahrens    /* vfs_sync() routine is not the default/error function */
fa9e4066f08beec538e775443c5be79dd423fcabahrens    return (vfs_getops(vfsp)->vfs_sync != fs_sync);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Initialize a vfs structure.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensvfs_init(vfs_t *vfsp, vfsops_t *op, void *data)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens    /* Other initialization has been moved to vfs_alloc() */
fa9e4066f08beec538e775443c5be79dd423fcabahrens    vfsp->vfs_count = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens    vfsp->vfs_next = vfsp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens    vfsp->vfs_prev = vfsp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens    vfsp->vfs_zone_next = vfsp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens    vfsp->vfs_zone_prev = vfsp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens    vfsp->vfs_lofi_minor = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens    sema_init(&vfsp->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens    vfsimpl_setup(vfsp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens    vfsp->vfs_data = (data);
5c7098917783942b65876f681a21342761227dadeschrock    vfs_setops((vfsp), (op));
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Allocate and initialize the vfs implementation private data
fa9e4066f08beec538e775443c5be79dd423fcabahrens * structure, vfs_impl_t.
5c7098917783942b65876f681a21342761227dadeschrock */
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
5c7098917783942b65876f681a21342761227dadeschrockvfsimpl_setup(vfs_t *vfsp)
5c7098917783942b65876f681a21342761227dadeschrock{
5c7098917783942b65876f681a21342761227dadeschrock    int i;
5c7098917783942b65876f681a21342761227dadeschrock
5c7098917783942b65876f681a21342761227dadeschrock    if (vfsp->vfs_implp != NULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens        return;
5c7098917783942b65876f681a21342761227dadeschrock    }
5c7098917783942b65876f681a21342761227dadeschrock
5c7098917783942b65876f681a21342761227dadeschrock    vfsp->vfs_implp = kmem_alloc(sizeof (vfs_impl_t), KM_SLEEP);
5c7098917783942b65876f681a21342761227dadeschrock    /* Note that these are #define'd in vfs.h */
5c7098917783942b65876f681a21342761227dadeschrock    vfsp->vfs_vskap = NULL;
5c7098917783942b65876f681a21342761227dadeschrock    vfsp->vfs_fstypevsp = NULL;
5c7098917783942b65876f681a21342761227dadeschrock
5c7098917783942b65876f681a21342761227dadeschrock    /* Set size of counted array, then zero the array */
5c7098917783942b65876f681a21342761227dadeschrock    vfsp->vfs_featureset[0] = VFS_FEATURE_MAXSZ - 1;
5c7098917783942b65876f681a21342761227dadeschrock    for (i = 1; i <  VFS_FEATURE_MAXSZ; i++) {
5c7098917783942b65876f681a21342761227dadeschrock        vfsp->vfs_featureset[i] = 0;
5c7098917783942b65876f681a21342761227dadeschrock    }
3d7072f8bd27709dba14f6fe336f149d25d9e207eschrock}
3d7072f8bd27709dba14f6fe336f149d25d9e207eschrock
5c7098917783942b65876f681a21342761227dadeschrock/*
5c7098917783942b65876f681a21342761227dadeschrock * Release the vfs_impl_t structure, if it exists. Some unbundled
5c7098917783942b65876f681a21342761227dadeschrock * filesystems may not use the newer version of vfs and thus
fa9e4066f08beec538e775443c5be79dd423fcabahrens * would not contain this implementation private data structure.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrockvfsimpl_teardown(vfs_t *vfsp)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock    vfs_impl_t  *vip = vfsp->vfs_implp;
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    if (vip == NULL)
99653d4ee642c6528e88224f12409a5f23060994eschrock        return;
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock    kmem_free(vfsp->vfs_implp, sizeof (vfs_impl_t));
99653d4ee642c6528e88224f12409a5f23060994eschrock    vfsp->vfs_implp = NULL;
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * VFS system calls: mount, umount, syssync, statfs, fstatfs, statvfs,
99653d4ee642c6528e88224f12409a5f23060994eschrock * fstatvfs, and sysfs moved to common/syscall.
c08432ebe688ed6410b302771a9afd8e23e2a7cceschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Update every mounted file system.  We call the vfs_sync operation of
99653d4ee642c6528e88224f12409a5f23060994eschrock * each file system type, passing it a NULL vfsp to indicate that all
99653d4ee642c6528e88224f12409a5f23060994eschrock * mounted file systems of that type should be updated.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_sync(int flag)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock    struct vfssw *vswp;
99653d4ee642c6528e88224f12409a5f23060994eschrock    RLOCK_VFSSW();
99653d4ee642c6528e88224f12409a5f23060994eschrock    for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
91ebeef555ce7f899b6270a3c2df47b51f7ad59aahrens        if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling            vfs_refvfssw(vswp);
b2634b9c57bbcfa01bb5dec2e196aec32957925fEric Taylor            RUNLOCK_VFSSW();
91ebeef555ce7f899b6270a3c2df47b51f7ad59aahrens            (void) (*vswp->vsw_vfsops.vfs_sync)(NULL, flag,
99653d4ee642c6528e88224f12409a5f23060994eschrock                CRED());
99653d4ee642c6528e88224f12409a5f23060994eschrock            vfs_unrefvfssw(vswp);
99653d4ee642c6528e88224f12409a5f23060994eschrock            RLOCK_VFSSW();
99653d4ee642c6528e88224f12409a5f23060994eschrock        }
99653d4ee642c6528e88224f12409a5f23060994eschrock    }
99653d4ee642c6528e88224f12409a5f23060994eschrock    RUNLOCK_VFSSW();
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrocksync(void)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
6733190958bbcc0bd6d1d601e7ae0a6994dafb45dougm    vfs_sync(0);
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks}
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
29ab75c9a733dad2978c4860efd954b5625e3467rm/*
069f55e237020c4a4907b235fc38fafc6442ce94Eric Schrock * External routines.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
b2634b9c57bbcfa01bb5dec2e196aec32957925fEric Taylor
99653d4ee642c6528e88224f12409a5f23060994eschrockkrwlock_t vfssw_lock;   /* lock accesses to vfssw */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Lock for accessing the vfs linked list.  Initialized in vfs_mountroot(),
99653d4ee642c6528e88224f12409a5f23060994eschrock * but otherwise should be accessed only via vfs_list_lock() and
99653d4ee642c6528e88224f12409a5f23060994eschrock * vfs_list_unlock().  Also used to protect the timestamp for mods to the list.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic krwlock_t vfslist;
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Mount devfs on /devices. This is done right after root is mounted
99653d4ee642c6528e88224f12409a5f23060994eschrock * to provide device access support for the system
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrockvfs_mountdevices(void)
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris{
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris    struct vfssw *vsw;
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris    struct vnode *mvp;
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris    struct mounta mounta = {    /* fake mounta for devfs_mount() */
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris        NULL,
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris        NULL,
5aba80db367b061758a29154d304977d00d8e4f4ck        MS_SYSSPACE,
5aba80db367b061758a29154d304977d00d8e4f4ck        NULL,
5aba80db367b061758a29154d304977d00d8e4f4ck        NULL,
5aba80db367b061758a29154d304977d00d8e4f4ck        0,
5aba80db367b061758a29154d304977d00d8e4f4ck        NULL,
5aba80db367b061758a29154d304977d00d8e4f4ck        0
5aba80db367b061758a29154d304977d00d8e4f4ck    };
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck    /*
5aba80db367b061758a29154d304977d00d8e4f4ck     * _init devfs module to fill in the vfssw
5aba80db367b061758a29154d304977d00d8e4f4ck     */
5aba80db367b061758a29154d304977d00d8e4f4ck    if (modload("fs", "devfs") == -1)
5aba80db367b061758a29154d304977d00d8e4f4ck        panic("Cannot _init devfs module");
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck    /*
5aba80db367b061758a29154d304977d00d8e4f4ck     * Hold vfs
5aba80db367b061758a29154d304977d00d8e4f4ck     */
5aba80db367b061758a29154d304977d00d8e4f4ck    RLOCK_VFSSW();
5aba80db367b061758a29154d304977d00d8e4f4ck    vsw = vfs_getvfsswbyname("devfs");
5aba80db367b061758a29154d304977d00d8e4f4ck    VFS_INIT(&devices, &vsw->vsw_vfsops, NULL);
5aba80db367b061758a29154d304977d00d8e4f4ck    VFS_HOLD(&devices);
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck    /*
5aba80db367b061758a29154d304977d00d8e4f4ck     * Locate mount point
5aba80db367b061758a29154d304977d00d8e4f4ck     */
5aba80db367b061758a29154d304977d00d8e4f4ck    if (lookupname("/devices", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp))
5aba80db367b061758a29154d304977d00d8e4f4ck        panic("Cannot find /devices");
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck    /*
5aba80db367b061758a29154d304977d00d8e4f4ck     * Perform the mount of /devices
5aba80db367b061758a29154d304977d00d8e4f4ck     */
5aba80db367b061758a29154d304977d00d8e4f4ck    if (VFS_MOUNT(&devices, mvp, &mounta, CRED()))
5aba80db367b061758a29154d304977d00d8e4f4ck        panic("Cannot mount /devices");
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck    RUNLOCK_VFSSW();
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck    /*
5aba80db367b061758a29154d304977d00d8e4f4ck     * Set appropriate members and add to vfs list for mnttab display
5aba80db367b061758a29154d304977d00d8e4f4ck     */
5aba80db367b061758a29154d304977d00d8e4f4ck    vfs_setresource(&devices, "/devices");
5aba80db367b061758a29154d304977d00d8e4f4ck    vfs_setmntpoint(&devices, "/devices");
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck    /*
5aba80db367b061758a29154d304977d00d8e4f4ck     * Hold the root of /devices so it won't go away
5aba80db367b061758a29154d304977d00d8e4f4ck     */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    if (VFS_ROOT(&devices, &devicesdir))
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        panic("vfs_mountdevices: not devices root");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    if (vfs_lock(&devices) != 0) {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        VN_RELE(devicesdir);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /devices");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        return;
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    }
9966ca11f4a1481acce85f690fa59e4084050627Matthew Ahrens
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    if (vn_vfswlock(mvp) != 0) {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        vfs_unlock(&devices);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        VN_RELE(devicesdir);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        cmn_err(CE_NOTE, "Cannot acquire vfswlock of /devices");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        return;
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    }
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    vfs_add(mvp, &devices, 0);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    vn_vfsunlock(mvp);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    vfs_unlock(&devices);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    VN_RELE(devicesdir);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock}
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock/*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * mount the first instance of /dev  to root and remain mounted
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrockstatic void
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrockvfs_mountdev1(void)
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock{
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    struct vfssw *vsw;
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    struct vnode *mvp;
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    struct mounta mounta = {    /* fake mounta for sdev_mount() */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        NULL,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        NULL,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        MS_SYSSPACE | MS_OVERLAY,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        NULL,
a2eea2e101e6a163a537dcc6d4e3c4da2a0ea5b2ahrens        NULL,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        0,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        NULL,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        0
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    };
990b4856d0eaada6f8140335733a1b1771ed2746lling
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    /*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock     * _init dev module to fill in the vfssw
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock     */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    if (modload("fs", "dev") == -1)
990b4856d0eaada6f8140335733a1b1771ed2746lling        cmn_err(CE_PANIC, "Cannot _init dev module\n");
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    /*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock     * Hold vfs
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock     */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    RLOCK_VFSSW();
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    vsw = vfs_getvfsswbyname("dev");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    VFS_INIT(&dev, &vsw->vsw_vfsops, NULL);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    VFS_HOLD(&dev);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    /*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock     * Locate mount point
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock     */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    if (lookupname("/dev", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp))
990b4856d0eaada6f8140335733a1b1771ed2746lling        cmn_err(CE_PANIC, "Cannot find /dev\n");
990b4856d0eaada6f8140335733a1b1771ed2746lling
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    /*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock     * Perform the mount of /dev
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock     */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    if (VFS_MOUNT(&dev, mvp, &mounta, CRED()))
990b4856d0eaada6f8140335733a1b1771ed2746lling        cmn_err(CE_PANIC, "Cannot mount /dev 1\n");
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    RUNLOCK_VFSSW();
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    /*
990b4856d0eaada6f8140335733a1b1771ed2746lling     * Set appropriate members and add to vfs list for mnttab display
990b4856d0eaada6f8140335733a1b1771ed2746lling     */
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfs_setresource(&dev, "/dev");
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfs_setmntpoint(&dev, "/dev");
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    /*
990b4856d0eaada6f8140335733a1b1771ed2746lling     * Hold the root of /dev so it won't go away
990b4856d0eaada6f8140335733a1b1771ed2746lling     */
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (VFS_ROOT(&dev, &devdir))
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        cmn_err(CE_PANIC, "vfs_mountdev1: not dev root");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    if (vfs_lock(&dev) != 0) {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        VN_RELE(devdir);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /dev");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        return;
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    }
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock    if (vn_vfswlock(mvp) != 0) {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        vfs_unlock(&dev);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        VN_RELE(devdir);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock        cmn_err(CE_NOTE, "Cannot acquire vfswlock of /dev");
b1b8ab34de515a5e83206da22c3d7e563241b021lling        return;
990b4856d0eaada6f8140335733a1b1771ed2746lling    }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfs_add(mvp, &dev, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling    vn_vfsunlock(mvp);
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfs_unlock(&dev);
990b4856d0eaada6f8140335733a1b1771ed2746lling    VN_RELE(devdir);
990b4856d0eaada6f8140335733a1b1771ed2746lling}
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling/*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Mount required filesystem. This is done right after root is mounted.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746llingstatic void
990b4856d0eaada6f8140335733a1b1771ed2746llingvfs_mountfs(char *module, char *spec, char *path)
990b4856d0eaada6f8140335733a1b1771ed2746lling{
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct vnode *mvp;
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct mounta mounta;
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfs_t *vfsp;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    mounta.flags = MS_SYSSPACE | MS_DATA;
990b4856d0eaada6f8140335733a1b1771ed2746lling    mounta.fstype = module;
990b4856d0eaada6f8140335733a1b1771ed2746lling    mounta.spec = spec;
990b4856d0eaada6f8140335733a1b1771ed2746lling    mounta.dir = path;
b1b8ab34de515a5e83206da22c3d7e563241b021lling    if (lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling        cmn_err(CE_WARN, "Cannot find %s", path);
b1b8ab34de515a5e83206da22c3d7e563241b021lling        return;
990b4856d0eaada6f8140335733a1b1771ed2746lling    }
b1b8ab34de515a5e83206da22c3d7e563241b021lling    if (domount(NULL, &mounta, mvp, CRED(), &vfsp))
b1b8ab34de515a5e83206da22c3d7e563241b021lling        cmn_err(CE_WARN, "Cannot mount %s", path);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    else
b1b8ab34de515a5e83206da22c3d7e563241b021lling        VFS_RELE(vfsp);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    VN_RELE(mvp);
b1b8ab34de515a5e83206da22c3d7e563241b021lling}
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling/*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * vfs_mountroot is called by main() to mount the root filesystem.
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021llingvoid
b1b8ab34de515a5e83206da22c3d7e563241b021llingvfs_mountroot(void)
b1b8ab34de515a5e83206da22c3d7e563241b021lling{
b1b8ab34de515a5e83206da22c3d7e563241b021lling    struct vnode    *rvp = NULL;
b1b8ab34de515a5e83206da22c3d7e563241b021lling    char        *path;
b1b8ab34de515a5e83206da22c3d7e563241b021lling    size_t      plen;
b1b8ab34de515a5e83206da22c3d7e563241b021lling    struct vfssw    *vswp;
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    rw_init(&vfssw_lock, NULL, RW_DEFAULT, NULL);
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante    rw_init(&vfslist, NULL, RW_DEFAULT, NULL);
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante    /*
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante     * Alloc the vfs hash bucket array and locks
b1b8ab34de515a5e83206da22c3d7e563241b021lling     */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    rvfs_list = kmem_zalloc(vfshsz * sizeof (rvfs_t), KM_SLEEP);
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * Call machine-dependent routine "rootconf" to choose a root
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * file system type.
b1b8ab34de515a5e83206da22c3d7e563241b021lling     */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    if (rootconf())
b1b8ab34de515a5e83206da22c3d7e563241b021lling        panic("vfs_mountroot: cannot mount root");
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * Get vnode for '/'.  Set up rootdir, u.u_rdir and u.u_cdir
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * to point to it.  These are used by lookuppn() so that it
990b4856d0eaada6f8140335733a1b1771ed2746lling     * knows where to start from ('/' or '.').
990b4856d0eaada6f8140335733a1b1771ed2746lling     */
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfs_setmntpoint(rootvfs, "/");
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (VFS_ROOT(rootvfs, &rootdir))
990b4856d0eaada6f8140335733a1b1771ed2746lling        panic("vfs_mountroot: no root vnode");
990b4856d0eaada6f8140335733a1b1771ed2746lling    PTOU(curproc)->u_cdir = rootdir;
b1b8ab34de515a5e83206da22c3d7e563241b021lling    VN_HOLD(PTOU(curproc)->u_cdir);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    PTOU(curproc)->u_rdir = NULL;
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * Setup the global zone's rootvp, now that it exists.
b1b8ab34de515a5e83206da22c3d7e563241b021lling     */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    global_zone->zone_rootvp = rootdir;
b1b8ab34de515a5e83206da22c3d7e563241b021lling    VN_HOLD(global_zone->zone_rootvp);
b1b8ab34de515a5e83206da22c3d7e563241b021lling
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante    /*
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante     * Notify the module code that it can begin using the
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante     * root filesystem instead of the boot program's services.
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante     */
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante    modrootloaded = 1;
b1b8ab34de515a5e83206da22c3d7e563241b021lling
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante    /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * Special handling for a ZFS root file system.
b1b8ab34de515a5e83206da22c3d7e563241b021lling     */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    zfs_boot_init();
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * Set up mnttab information for root
990b4856d0eaada6f8140335733a1b1771ed2746lling     */
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfs_setresource(rootvfs, rootfs.bo_name);
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * Notify cluster software that the root filesystem is available.
b1b8ab34de515a5e83206da22c3d7e563241b021lling     */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    clboot_mountroot();
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /* Now that we're all done with the root FS, set up its vopstats */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) != NULL) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling        /* Set flag for statistics collection */
b1b8ab34de515a5e83206da22c3d7e563241b021lling        if (vswp->vsw_flag & VSW_STATS) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling            initialize_vopstats(&rootvfs->vfs_vopstats);
b1b8ab34de515a5e83206da22c3d7e563241b021lling            rootvfs->vfs_flag |= VFS_STATS;
b1b8ab34de515a5e83206da22c3d7e563241b021lling            rootvfs->vfs_fstypevsp =
b1b8ab34de515a5e83206da22c3d7e563241b021lling                get_fstype_vopstats(rootvfs, vswp);
b1b8ab34de515a5e83206da22c3d7e563241b021lling            rootvfs->vfs_vskap = get_vskstat_anchor(rootvfs);
b1b8ab34de515a5e83206da22c3d7e563241b021lling        }
b1b8ab34de515a5e83206da22c3d7e563241b021lling        vfs_unrefvfssw(vswp);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    }
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * Mount /devices, /dev instance 1, /system/contract, /etc/mnttab,
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * /etc/svc/volatile, /etc/dfs/sharetab, /system/object, and /proc.
b1b8ab34de515a5e83206da22c3d7e563241b021lling     */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    vfs_mountdevices();
b1b8ab34de515a5e83206da22c3d7e563241b021lling    vfs_mountdev1();
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    vfs_mountfs("ctfs", "ctfs", CTFS_ROOT);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    vfs_mountfs("proc", "/proc", "/proc");
b1b8ab34de515a5e83206da22c3d7e563241b021lling    vfs_mountfs("mntfs", "/etc/mnttab", "/etc/mnttab");
b1b8ab34de515a5e83206da22c3d7e563241b021lling    vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile");
b1b8ab34de515a5e83206da22c3d7e563241b021lling    vfs_mountfs("objfs", "objfs", OBJFS_ROOT);
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    if (getzoneid() == GLOBAL_ZONEID) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling        vfs_mountfs("sharefs", "sharefs", "/etc/dfs/sharetab");
b1b8ab34de515a5e83206da22c3d7e563241b021lling    }
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling#ifdef __sparc
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * This bit of magic can go away when we convert sparc to
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * the new boot architecture based on ramdisk.
b1b8ab34de515a5e83206da22c3d7e563241b021lling     *
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * Booting off a mirrored root volume:
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * At this point, we have booted and mounted root on a
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * single component of the mirror.  Complete the boot
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * by configuring SVM and converting the root to the
990b4856d0eaada6f8140335733a1b1771ed2746lling     * dev_t of the mirrored root device.  This dev_t conversion
990b4856d0eaada6f8140335733a1b1771ed2746lling     * only works because the underlying device doesn't change.
b1b8ab34de515a5e83206da22c3d7e563241b021lling     */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    if (root_is_svm) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling        if (svm_rootconf()) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling            panic("vfs_mountroot: cannot remount root");
b1b8ab34de515a5e83206da22c3d7e563241b021lling        }
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling        /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling         * mnttab should reflect the new root device
b1b8ab34de515a5e83206da22c3d7e563241b021lling         */
b1b8ab34de515a5e83206da22c3d7e563241b021lling        vfs_lock_wait(rootvfs);
b1b8ab34de515a5e83206da22c3d7e563241b021lling        vfs_setresource(rootvfs, rootfs.bo_name);
b1b8ab34de515a5e83206da22c3d7e563241b021lling        vfs_unlock(rootvfs);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    }
990b4856d0eaada6f8140335733a1b1771ed2746lling#endif /* __sparc */
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * Look up the root device via devfs so that a dv_node is
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * created for it. The vnode is never VN_RELE()ed.
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * We allocate more than MAXPATHLEN so that the
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * buffer passed to i_ddi_prompath_to_devfspath() is
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * exactly MAXPATHLEN (the function expects a buffer
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * of that length).
b1b8ab34de515a5e83206da22c3d7e563241b021lling     */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    plen = strlen("/devices");
b1b8ab34de515a5e83206da22c3d7e563241b021lling    path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    (void) strcpy(path, "/devices");
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen)
b1b8ab34de515a5e83206da22c3d7e563241b021lling        != DDI_SUCCESS ||
b1b8ab34de515a5e83206da22c3d7e563241b021lling        lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling
990b4856d0eaada6f8140335733a1b1771ed2746lling        /* NUL terminate in case "path" has garbage */
b1b8ab34de515a5e83206da22c3d7e563241b021lling        path[plen + MAXPATHLEN - 1] = '\0';
b1b8ab34de515a5e83206da22c3d7e563241b021lling#ifdef  DEBUG
b1b8ab34de515a5e83206da22c3d7e563241b021lling        cmn_err(CE_WARN, "!Cannot lookup root device: %s", path);
990b4856d0eaada6f8140335733a1b1771ed2746lling#endif
b1b8ab34de515a5e83206da22c3d7e563241b021lling    }
b1b8ab34de515a5e83206da22c3d7e563241b021lling    kmem_free(path, plen + MAXPATHLEN);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    vfs_mnttabvp_setup();
990b4856d0eaada6f8140335733a1b1771ed2746lling}
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling/*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * If remount failed and we're in a zone we need to check for the zone
990b4856d0eaada6f8140335733a1b1771ed2746lling * root path and strip it before the call to vfs_setpath().
b1b8ab34de515a5e83206da22c3d7e563241b021lling *
b1b8ab34de515a5e83206da22c3d7e563241b021lling * If strpath doesn't begin with the zone_rootpath the original
b1b8ab34de515a5e83206da22c3d7e563241b021lling * strpath is returned unchanged.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic const char *
b1b8ab34de515a5e83206da22c3d7e563241b021llingstripzonepath(const char *strpath)
b1b8ab34de515a5e83206da22c3d7e563241b021lling{
b1b8ab34de515a5e83206da22c3d7e563241b021lling    char *str1, *str2;
b1b8ab34de515a5e83206da22c3d7e563241b021lling    int i;
b1b8ab34de515a5e83206da22c3d7e563241b021lling    zone_t *zonep = curproc->p_zone;
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    if (zonep->zone_rootpath == NULL || strpath == NULL) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling        return (NULL);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    }
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling    /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * we check for the end of the string at one past the
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * current position because the zone_rootpath always
b1b8ab34de515a5e83206da22c3d7e563241b021lling     * ends with "/" but we don't want to strip that off.
b1b8ab34de515a5e83206da22c3d7e563241b021lling     */
b1b8ab34de515a5e83206da22c3d7e563241b021lling    str1 = zonep->zone_rootpath;
b1b8ab34de515a5e83206da22c3d7e563241b021lling    str2 = (char *)strpath;
b1b8ab34de515a5e83206da22c3d7e563241b021lling    ASSERT(str1[0] != '\0');
b1b8ab34de515a5e83206da22c3d7e563241b021lling    for (i = 0; str1[i + 1] != '\0'; i++) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling        if (str1[i] != str2[i])
b1b8ab34de515a5e83206da22c3d7e563241b021lling            return ((char *)strpath);
b1b8ab34de515a5e83206da22c3d7e563241b021lling    }
b1b8ab34de515a5e83206da22c3d7e563241b021lling    return (&str2[i]);
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks}
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling/*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Check to see if our "block device" is actually a file.  If so,
990b4856d0eaada6f8140335733a1b1771ed2746lling * automatically add a lofi device, and keep track of this fact.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746llingstatic int
990b4856d0eaada6f8140335733a1b1771ed2746llinglofi_add(const char *fsname, struct vfs *vfsp,
990b4856d0eaada6f8140335733a1b1771ed2746lling    mntopts_t *mntopts, struct mounta *uap)
990b4856d0eaada6f8140335733a1b1771ed2746lling{
990b4856d0eaada6f8140335733a1b1771ed2746lling    int fromspace = (uap->flags & MS_SYSSPACE) ?
990b4856d0eaada6f8140335733a1b1771ed2746lling        UIO_SYSSPACE : UIO_USERSPACE;
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct lofi_ioctl *li = NULL;
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct vnode *vp = NULL;
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct pathname pn = { NULL };
990b4856d0eaada6f8140335733a1b1771ed2746lling    ldi_ident_t ldi_id;
990b4856d0eaada6f8140335733a1b1771ed2746lling    ldi_handle_t ldi_hdl;
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfssw_t *vfssw;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int minor;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int err = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (fsname == NULL ||
990b4856d0eaada6f8140335733a1b1771ed2746lling        (vfssw = vfs_getvfssw(fsname)) == NULL)
990b4856d0eaada6f8140335733a1b1771ed2746lling        return (0);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (!(vfssw->vsw_flag & VSW_CANLOFI)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling        vfs_unrefvfssw(vfssw);
990b4856d0eaada6f8140335733a1b1771ed2746lling        return (0);
990b4856d0eaada6f8140335733a1b1771ed2746lling    }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfs_unrefvfssw(vfssw);
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfssw = NULL;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (pn_get(uap->spec, fromspace, &pn) != 0)
990b4856d0eaada6f8140335733a1b1771ed2746lling        return (0);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (lookupname(uap->spec, fromspace, FOLLOW, NULL, &vp) != 0)
990b4856d0eaada6f8140335733a1b1771ed2746lling        goto out;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (vp->v_type != VREG)
990b4856d0eaada6f8140335733a1b1771ed2746lling        goto out;
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
990b4856d0eaada6f8140335733a1b1771ed2746lling    /* OK, this is a lofi mount. */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
990b4856d0eaada6f8140335733a1b1771ed2746lling    if ((uap->flags & (MS_REMOUNT|MS_GLOBAL)) ||
990b4856d0eaada6f8140335733a1b1771ed2746lling        vfs_optionisset_nolock(mntopts, MNTOPT_SUID, NULL) ||
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks        vfs_optionisset_nolock(mntopts, MNTOPT_SETUID, NULL) ||
990b4856d0eaada6f8140335733a1b1771ed2746lling        vfs_optionisset_nolock(mntopts, MNTOPT_DEVICES, NULL)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling        err = EINVAL;
990b4856d0eaada6f8140335733a1b1771ed2746lling        goto out;
990b4856d0eaada6f8140335733a1b1771ed2746lling    }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    ldi_id = ldi_ident_from_anon();
990b4856d0eaada6f8140335733a1b1771ed2746lling    li = kmem_zalloc(sizeof (*li), KM_SLEEP);
990b4856d0eaada6f8140335733a1b1771ed2746lling    (void) strlcpy(li->li_filename, pn.pn_path, MAXPATHLEN + 1);
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks    /*
069f55e237020c4a4907b235fc38fafc6442ce94Eric Schrock     * The lofi control node is currently exclusive-open.  We'd like
990b4856d0eaada6f8140335733a1b1771ed2746lling     * to improve this, but in the meantime, we'll loop waiting for
f67f35c39aa272d43489ee49625b4965cc83add2Eric Schrock     * access.
990b4856d0eaada6f8140335733a1b1771ed2746lling     */
990b4856d0eaada6f8140335733a1b1771ed2746lling    for (;;) {
990b4856d0eaada6f8140335733a1b1771ed2746lling        err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL,
990b4856d0eaada6f8140335733a1b1771ed2746lling            kcred, &ldi_hdl, ldi_id);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling        if (err != EBUSY)
990b4856d0eaada6f8140335733a1b1771ed2746lling            break;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling        if ((err = delay_sig(hz / 8)) == EINTR)
990b4856d0eaada6f8140335733a1b1771ed2746lling            break;
990b4856d0eaada6f8140335733a1b1771ed2746lling    }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (err)
990b4856d0eaada6f8140335733a1b1771ed2746lling        goto out2;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li,
990b4856d0eaada6f8140335733a1b1771ed2746lling        FREAD | FWRITE | FEXCL | FKIOCTL, kcred, &minor);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (!err)
990b4856d0eaada6f8140335733a1b1771ed2746lling        vfsp->vfs_lofi_minor = minor;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746llingout2:
990b4856d0eaada6f8140335733a1b1771ed2746lling    ldi_ident_release(ldi_id);
990b4856d0eaada6f8140335733a1b1771ed2746llingout:
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (li != NULL)
990b4856d0eaada6f8140335733a1b1771ed2746lling        kmem_free(li, sizeof (*li));
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (vp != NULL)
990b4856d0eaada6f8140335733a1b1771ed2746lling        VN_RELE(vp);
990b4856d0eaada6f8140335733a1b1771ed2746lling    pn_free(&pn);
990b4856d0eaada6f8140335733a1b1771ed2746lling    return (err);
990b4856d0eaada6f8140335733a1b1771ed2746lling}
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746llingstatic void
990b4856d0eaada6f8140335733a1b1771ed2746llinglofi_remove(struct vfs *vfsp)
990b4856d0eaada6f8140335733a1b1771ed2746lling{
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct lofi_ioctl *li = NULL;
990b4856d0eaada6f8140335733a1b1771ed2746lling    ldi_ident_t ldi_id;
990b4856d0eaada6f8140335733a1b1771ed2746lling    ldi_handle_t ldi_hdl;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int err;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (vfsp->vfs_lofi_minor == 0)
990b4856d0eaada6f8140335733a1b1771ed2746lling        return;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    ldi_id = ldi_ident_from_anon();
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    li = kmem_zalloc(sizeof (*li), KM_SLEEP);
990b4856d0eaada6f8140335733a1b1771ed2746lling    li->li_minor = vfsp->vfs_lofi_minor;
990b4856d0eaada6f8140335733a1b1771ed2746lling    li->li_cleanup = B_TRUE;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    do {
990b4856d0eaada6f8140335733a1b1771ed2746lling        err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL,
990b4856d0eaada6f8140335733a1b1771ed2746lling            kcred, &ldi_hdl, ldi_id);
990b4856d0eaada6f8140335733a1b1771ed2746lling    } while (err == EBUSY);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (err)
990b4856d0eaada6f8140335733a1b1771ed2746lling        goto out;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    err = ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE_MINOR, (intptr_t)li,
990b4856d0eaada6f8140335733a1b1771ed2746lling        FREAD | FWRITE | FEXCL | FKIOCTL, kcred, NULL);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (!err)
990b4856d0eaada6f8140335733a1b1771ed2746lling        vfsp->vfs_lofi_minor = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746llingout:
990b4856d0eaada6f8140335733a1b1771ed2746lling    ldi_ident_release(ldi_id);
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (li != NULL)
990b4856d0eaada6f8140335733a1b1771ed2746lling        kmem_free(li, sizeof (*li));
990b4856d0eaada6f8140335733a1b1771ed2746lling}
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling/*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Common mount code.  Called from the system call entry point, from autofs,
990b4856d0eaada6f8140335733a1b1771ed2746lling * nfsv4 trigger mounts, and from pxfs.
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * Takes the effective file system type, mount arguments, the mount point
990b4856d0eaada6f8140335733a1b1771ed2746lling * vnode, flags specifying whether the mount is a remount and whether it
990b4856d0eaada6f8140335733a1b1771ed2746lling * should be entered into the vfs list, and credentials.  Fills in its vfspp
990b4856d0eaada6f8140335733a1b1771ed2746lling * parameter with the mounted file system instance's vfs.
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * Note that the effective file system type is specified as a string.  It may
990b4856d0eaada6f8140335733a1b1771ed2746lling * be null, in which case it's determined from the mount arguments, and may
990b4856d0eaada6f8140335733a1b1771ed2746lling * differ from the type specified in the mount arguments; this is a hook to
990b4856d0eaada6f8140335733a1b1771ed2746lling * allow interposition when instantiating file system instances.
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * The caller is responsible for releasing its own hold on the mount point
990b4856d0eaada6f8140335733a1b1771ed2746lling * vp (this routine does its own hold when necessary).
990b4856d0eaada6f8140335733a1b1771ed2746lling * Also note that for remounts, the mount point vp should be the vnode for
990b4856d0eaada6f8140335733a1b1771ed2746lling * the root of the file system rather than the vnode that the file system
990b4856d0eaada6f8140335733a1b1771ed2746lling * is mounted on top of.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746llingint
990b4856d0eaada6f8140335733a1b1771ed2746llingdomount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct vfs **vfspp)
990b4856d0eaada6f8140335733a1b1771ed2746lling{
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct vfssw    *vswp;
990b4856d0eaada6f8140335733a1b1771ed2746lling    vfsops_t    *vfsops;
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct vfs  *vfsp;
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct vnode    *bvp;
990b4856d0eaada6f8140335733a1b1771ed2746lling    dev_t       bdev = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling    mntopts_t   mnt_mntopts;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     error = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     copyout_error = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     ovflags;
990b4856d0eaada6f8140335733a1b1771ed2746lling    char        *opts = uap->optptr;
990b4856d0eaada6f8140335733a1b1771ed2746lling    char        *inargs = opts;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     optlen = uap->optlen;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     remount;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     rdonly;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     nbmand = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     delmip = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     addmip = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     splice = ((uap->flags & MS_NOSPLICE) == 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling    int     fromspace = (uap->flags & MS_SYSSPACE) ?
990b4856d0eaada6f8140335733a1b1771ed2746lling        UIO_SYSSPACE : UIO_USERSPACE;
990b4856d0eaada6f8140335733a1b1771ed2746lling    char        *resource = NULL, *mountpt = NULL;
990b4856d0eaada6f8140335733a1b1771ed2746lling    refstr_t    *oldresource, *oldmntpt;
990b4856d0eaada6f8140335733a1b1771ed2746lling    struct pathname pn, rpn;
990b4856d0eaada6f8140335733a1b1771ed2746lling    vsk_anchor_t    *vskap;
a9799022bd90b13722204e80112efaa5bf573099ck    char fstname[FSTYPSZ];
a9799022bd90b13722204e80112efaa5bf573099ck
990b4856d0eaada6f8140335733a1b1771ed2746lling    /*
a9799022bd90b13722204e80112efaa5bf573099ck     * The v_flag value for the mount point vp is permanently set
990b4856d0eaada6f8140335733a1b1771ed2746lling     * to VVFSLOCK so that no one bypasses the vn_vfs*locks routine
990b4856d0eaada6f8140335733a1b1771ed2746lling     * for mount point locking.
990b4856d0eaada6f8140335733a1b1771ed2746lling     */
990b4856d0eaada6f8140335733a1b1771ed2746lling    mutex_enter(&vp->v_lock);
990b4856d0eaada6f8140335733a1b1771ed2746lling    vp->v_flag |= VVFSLOCK;
a9799022bd90b13722204e80112efaa5bf573099ck    mutex_exit(&vp->v_lock);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    mnt_mntopts.mo_count = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling    /*
990b4856d0eaada6f8140335733a1b1771ed2746lling     * Find the ops vector to use to invoke the file system-specific mount
990b4856d0eaada6f8140335733a1b1771ed2746lling     * method.  If the fsname argument is non-NULL, use it directly.
a9799022bd90b13722204e80112efaa5bf573099ck     * Otherwise, dig the file system type information out of the mount
a9799022bd90b13722204e80112efaa5bf573099ck     * arguments.
990b4856d0eaada6f8140335733a1b1771ed2746lling     *
990b4856d0eaada6f8140335733a1b1771ed2746lling     * A side effect is to hold the vfssw entry.
990b4856d0eaada6f8140335733a1b1771ed2746lling     *
990b4856d0eaada6f8140335733a1b1771ed2746lling     * Mount arguments can be specified in several ways, which are
990b4856d0eaada6f8140335733a1b1771ed2746lling     * distinguished by flag bit settings.  The preferred way is to set
990b4856d0eaada6f8140335733a1b1771ed2746lling     * MS_OPTIONSTR, indicating an 8 argument mount with the file system
990b4856d0eaada6f8140335733a1b1771ed2746lling     * type supplied as a character string and the last two arguments
990b4856d0eaada6f8140335733a1b1771ed2746lling     * being a pointer to a character buffer and the size of the buffer.
990b4856d0eaada6f8140335733a1b1771ed2746lling     * On entry, the buffer holds a null terminated list of options; on
990b4856d0eaada6f8140335733a1b1771ed2746lling     * return, the string is the list of options the file system
990b4856d0eaada6f8140335733a1b1771ed2746lling     * recognized. If MS_DATA is set arguments five and six point to a
990b4856d0eaada6f8140335733a1b1771ed2746lling     * block of binary data which the file system interprets.
990b4856d0eaada6f8140335733a1b1771ed2746lling     * A further wrinkle is that some callers don't set MS_FSS and MS_DATA
990b4856d0eaada6f8140335733a1b1771ed2746lling     * consistently with these conventions.  To handle them, we check to
990b4856d0eaada6f8140335733a1b1771ed2746lling     * see whether the pointer to the file system name has a numeric value
990b4856d0eaada6f8140335733a1b1771ed2746lling     * less than 256.  If so, we treat it as an index.
990b4856d0eaada6f8140335733a1b1771ed2746lling     */
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (fsname != NULL) {
990b4856d0eaada6f8140335733a1b1771ed2746lling        if ((vswp = vfs_getvfssw(fsname)) == NULL) {
990b4856d0eaada6f8140335733a1b1771ed2746lling            return (EINVAL);
990b4856d0eaada6f8140335733a1b1771ed2746lling        }
990b4856d0eaada6f8140335733a1b1771ed2746lling    } else if (uap->flags & (MS_OPTIONSTR | MS_DATA | MS_FSS)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling        size_t n;
990b4856d0eaada6f8140335733a1b1771ed2746lling        uint_t fstype;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling        fsname = fstname;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling        if ((fstype = (uintptr_t)uap->fstype) < 256) {
990b4856d0eaada6f8140335733a1b1771ed2746lling            RLOCK_VFSSW();
990b4856d0eaada6f8140335733a1b1771ed2746lling            if (fstype == 0 || fstype >= nfstype ||
990b4856d0eaada6f8140335733a1b1771ed2746lling                !ALLOCATED_VFSSW(&vfssw[fstype])) {
990b4856d0eaada6f8140335733a1b1771ed2746lling                RUNLOCK_VFSSW();
990b4856d0eaada6f8140335733a1b1771ed2746lling                return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            (void) strcpy(fsname, vfssw[fstype].vsw_name);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            RUNLOCK_VFSSW();
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            if ((vswp = vfs_getvfssw(fsname)) == NULL)
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens                return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        } else {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens             * Handle either kernel or user address space.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens             */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            if (uap->flags & MS_SYSSPACE) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens                error = copystr(uap->fstype, fsname,
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens                    FSTYPSZ, &n);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            } else {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens                error = copyinstr(uap->fstype, fsname,
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens                    FSTYPSZ, &n);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            if (error) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens                if (error == ENAMETOOLONG)
148434217c040ea38dc844384f6ba68d9b325906Matthew Ahrens                    return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens                return (error);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            if ((vswp = vfs_getvfssw(fsname)) == NULL)
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens                return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    } else {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) == NULL)
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    if (!VFS_INSTALLED(vswp))
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    vfsops = &vswp->vsw_vfsops;
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens     * Fetch mount options and parse them for generic vfs options
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens     */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    if (uap->flags & MS_OPTIONSTR) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens         * Limit the buffer size
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens         */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        if (optlen < 0 || optlen > MAX_MNTOPT_STR) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            error = EINVAL;
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            goto errout;
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        if ((uap->flags & MS_SYSSPACE) == 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling            inargs = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP);
990b4856d0eaada6f8140335733a1b1771ed2746lling            inargs[0] = '\0';
990b4856d0eaada6f8140335733a1b1771ed2746lling            if (optlen) {
990b4856d0eaada6f8140335733a1b1771ed2746lling                error = copyinstr(opts, inargs, (size_t)optlen,
990b4856d0eaada6f8140335733a1b1771ed2746lling                    NULL);
990b4856d0eaada6f8140335733a1b1771ed2746lling                if (error) {
990b4856d0eaada6f8140335733a1b1771ed2746lling                    goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling                }
990b4856d0eaada6f8140335733a1b1771ed2746lling            }
990b4856d0eaada6f8140335733a1b1771ed2746lling        }
990b4856d0eaada6f8140335733a1b1771ed2746lling        vfs_parsemntopts(&mnt_mntopts, inargs, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling    }
990b4856d0eaada6f8140335733a1b1771ed2746lling    /*
990b4856d0eaada6f8140335733a1b1771ed2746lling     * Flag bits override the options string.
990b4856d0eaada6f8140335733a1b1771ed2746lling     */
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (uap->flags & MS_REMOUNT)
990b4856d0eaada6f8140335733a1b1771ed2746lling        vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_REMOUNT, NULL, 0, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (uap->flags & MS_RDONLY)
990b4856d0eaada6f8140335733a1b1771ed2746lling        vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_RO, NULL, 0, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (uap->flags & MS_NOSUID)
990b4856d0eaada6f8140335733a1b1771ed2746lling        vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling    /*
990b4856d0eaada6f8140335733a1b1771ed2746lling     * Check if this is a remount; must be set in the option string and
990b4856d0eaada6f8140335733a1b1771ed2746lling     * the file system must support a remount option.
990b4856d0eaada6f8140335733a1b1771ed2746lling     */
990b4856d0eaada6f8140335733a1b1771ed2746lling    if (remount = vfs_optionisset_nolock(&mnt_mntopts,
990b4856d0eaada6f8140335733a1b1771ed2746lling        MNTOPT_REMOUNT, NULL)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling        if (!(vswp->vsw_flag & VSW_CANREMOUNT)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling            error = ENOTSUP;
990b4856d0eaada6f8140335733a1b1771ed2746lling            goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling        }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        uap->flags |= MS_REMOUNT;
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens     * uap->flags and vfs_optionisset() should agree.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens     */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    if (rdonly = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_RO, NULL)) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        uap->flags |= MS_RDONLY;
990b4856d0eaada6f8140335733a1b1771ed2746lling    }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    if (vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling        uap->flags |= MS_NOSUID;
990b4856d0eaada6f8140335733a1b1771ed2746lling    }
990b4856d0eaada6f8140335733a1b1771ed2746lling    nbmand = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NBMAND, NULL);
990b4856d0eaada6f8140335733a1b1771ed2746lling    ASSERT(splice || !remount);
990b4856d0eaada6f8140335733a1b1771ed2746lling    /*
990b4856d0eaada6f8140335733a1b1771ed2746lling     * If we are splicing the fs into the namespace,
990b4856d0eaada6f8140335733a1b1771ed2746lling     * perform mount point checks.
990b4856d0eaada6f8140335733a1b1771ed2746lling     *
990b4856d0eaada6f8140335733a1b1771ed2746lling     * We want to resolve the path for the mount point to eliminate
990b4856d0eaada6f8140335733a1b1771ed2746lling     * '.' and ".." and symlinks in mount points; we can't do the
990b4856d0eaada6f8140335733a1b1771ed2746lling     * same for the resource string, since it would turn
990b4856d0eaada6f8140335733a1b1771ed2746lling     * "/dev/dsk/c0t0d0s0" into "/devices/pci@...".  We need to do
990b4856d0eaada6f8140335733a1b1771ed2746lling     * this before grabbing vn_vfswlock(), because otherwise we
990b4856d0eaada6f8140335733a1b1771ed2746lling     * would deadlock with lookuppn().
990b4856d0eaada6f8140335733a1b1771ed2746lling     */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens    if (splice) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        ASSERT(vp->v_count > 0);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens         * Pick up mount point and device from appropriate space.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens         */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        if (pn_get(uap->spec, fromspace, &pn) == 0) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            resource = kmem_alloc(pn.pn_pathlen + 1,
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens                KM_SLEEP);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            (void) strcpy(resource, pn.pn_path);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            pn_free(&pn);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens         * Do a lookupname prior to taking the
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens         * writelock. Mark this as completed if
990b4856d0eaada6f8140335733a1b1771ed2746lling         * successful for later cleanup and addition to
990b4856d0eaada6f8140335733a1b1771ed2746lling         * the mount in progress table.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens         */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        if ((uap->flags & MS_GLOBAL) == 0 &&
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens            lookupname(uap->spec, fromspace,
990b4856d0eaada6f8140335733a1b1771ed2746lling            FOLLOW, NULL, &bvp) == 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling            addmip = 1;
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        }
990b4856d0eaada6f8140335733a1b1771ed2746lling
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens        if ((error = pn_get(uap->dir, fromspace, &pn)) == 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling            pathname_t *pnp;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling            if (*pn.pn_path != '/') {
990b4856d0eaada6f8140335733a1b1771ed2746lling                error = EINVAL;
990b4856d0eaada6f8140335733a1b1771ed2746lling                pn_free(&pn);
990b4856d0eaada6f8140335733a1b1771ed2746lling                goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling            }
990b4856d0eaada6f8140335733a1b1771ed2746lling            pn_alloc(&rpn);
990b4856d0eaada6f8140335733a1b1771ed2746lling            /*
990b4856d0eaada6f8140335733a1b1771ed2746lling             * Kludge to prevent autofs from deadlocking with
990b4856d0eaada6f8140335733a1b1771ed2746lling             * itself when it calls domount().
990b4856d0eaada6f8140335733a1b1771ed2746lling             *
990b4856d0eaada6f8140335733a1b1771ed2746lling             * If autofs is calling, it is because it is doing
990b4856d0eaada6f8140335733a1b1771ed2746lling             * (autofs) mounts in the process of an NFS mount.  A
990b4856d0eaada6f8140335733a1b1771ed2746lling             * lookuppn() here would cause us to block waiting for
990b4856d0eaada6f8140335733a1b1771ed2746lling             * said NFS mount to complete, which can't since this
990b4856d0eaada6f8140335733a1b1771ed2746lling             * is the thread that was supposed to doing it.
990b4856d0eaada6f8140335733a1b1771ed2746lling             */
990b4856d0eaada6f8140335733a1b1771ed2746lling            if (fromspace == UIO_USERSPACE) {
990b4856d0eaada6f8140335733a1b1771ed2746lling                if ((error = lookuppn(&pn, &rpn, FOLLOW, NULL,
990b4856d0eaada6f8140335733a1b1771ed2746lling                    NULL)) == 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling                    pnp = &rpn;
990b4856d0eaada6f8140335733a1b1771ed2746lling                } else {
990b4856d0eaada6f8140335733a1b1771ed2746lling                    /*
990b4856d0eaada6f8140335733a1b1771ed2746lling                     * The file disappeared or otherwise
990b4856d0eaada6f8140335733a1b1771ed2746lling                     * became inaccessible since we opened
990b4856d0eaada6f8140335733a1b1771ed2746lling                     * it; might as well fail the mount
990b4856d0eaada6f8140335733a1b1771ed2746lling                     * since the mount point is no longer
990b4856d0eaada6f8140335733a1b1771ed2746lling                     * accessible.
990b4856d0eaada6f8140335733a1b1771ed2746lling                     */
990b4856d0eaada6f8140335733a1b1771ed2746lling                    pn_free(&rpn);
990b4856d0eaada6f8140335733a1b1771ed2746lling                    pn_free(&pn);
990b4856d0eaada6f8140335733a1b1771ed2746lling                    goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling                }
990b4856d0eaada6f8140335733a1b1771ed2746lling            } else {
990b4856d0eaada6f8140335733a1b1771ed2746lling                pnp = &pn;
990b4856d0eaada6f8140335733a1b1771ed2746lling            }
990b4856d0eaada6f8140335733a1b1771ed2746lling            mountpt = kmem_alloc(pnp->pn_pathlen + 1, KM_SLEEP);
990b4856d0eaada6f8140335733a1b1771ed2746lling            (void) strcpy(mountpt, pnp->pn_path);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling            /*
990b4856d0eaada6f8140335733a1b1771ed2746lling             * If the addition of the zone's rootpath
990b4856d0eaada6f8140335733a1b1771ed2746lling             * would push us over a total path length
990b4856d0eaada6f8140335733a1b1771ed2746lling             * of MAXPATHLEN, we fail the mount with
990b4856d0eaada6f8140335733a1b1771ed2746lling             * ENAMETOOLONG, which is what we would have
990b4856d0eaada6f8140335733a1b1771ed2746lling             * gotten if we were trying to perform the same
990b4856d0eaada6f8140335733a1b1771ed2746lling             * mount in the global zone.
990b4856d0eaada6f8140335733a1b1771ed2746lling             *
990b4856d0eaada6f8140335733a1b1771ed2746lling             * strlen() doesn't count the trailing
990b4856d0eaada6f8140335733a1b1771ed2746lling             * '\0', but zone_rootpathlen counts both a
990b4856d0eaada6f8140335733a1b1771ed2746lling             * trailing '/' and the terminating '\0'.
990b4856d0eaada6f8140335733a1b1771ed2746lling             */
990b4856d0eaada6f8140335733a1b1771ed2746lling            if ((curproc->p_zone->zone_rootpathlen - 1 +
990b4856d0eaada6f8140335733a1b1771ed2746lling                strlen(mountpt)) > MAXPATHLEN ||
990b4856d0eaada6f8140335733a1b1771ed2746lling                (resource != NULL &&
990b4856d0eaada6f8140335733a1b1771ed2746lling                (curproc->p_zone->zone_rootpathlen - 1 +
990b4856d0eaada6f8140335733a1b1771ed2746lling                strlen(resource)) > MAXPATHLEN)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling                error = ENAMETOOLONG;
990b4856d0eaada6f8140335733a1b1771ed2746lling            }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling            pn_free(&rpn);
990b4856d0eaada6f8140335733a1b1771ed2746lling            pn_free(&pn);
990b4856d0eaada6f8140335733a1b1771ed2746lling        }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling        if (error)
990b4856d0eaada6f8140335733a1b1771ed2746lling            goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling        /*
990b4856d0eaada6f8140335733a1b1771ed2746lling         * Prevent path name resolution from proceeding past
990b4856d0eaada6f8140335733a1b1771ed2746lling         * the mount point.
990b4856d0eaada6f8140335733a1b1771ed2746lling         */
990b4856d0eaada6f8140335733a1b1771ed2746lling        if (vn_vfswlock(vp) != 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling            error = EBUSY;
990b4856d0eaada6f8140335733a1b1771ed2746lling            goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling        }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling        /*
990b4856d0eaada6f8140335733a1b1771ed2746lling         * Verify that it's legitimate to establish a mount on
990b4856d0eaada6f8140335733a1b1771ed2746lling         * the prospective mount point.
990b4856d0eaada6f8140335733a1b1771ed2746lling         */
990b4856d0eaada6f8140335733a1b1771ed2746lling        if (vn_mountedvfs(vp) != NULL) {
990b4856d0eaada6f8140335733a1b1771ed2746lling            /*
990b4856d0eaada6f8140335733a1b1771ed2746lling             * The mount point lock was obtained after some
990b4856d0eaada6f8140335733a1b1771ed2746lling             * other thread raced through and established a mount.
990b4856d0eaada6f8140335733a1b1771ed2746lling             */
990b4856d0eaada6f8140335733a1b1771ed2746lling            vn_vfsunlock(vp);
990b4856d0eaada6f8140335733a1b1771ed2746lling            error = EBUSY;
990b4856d0eaada6f8140335733a1b1771ed2746lling            goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling        }
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks        if (vp->v_flag & VNOMOUNT) {
            vn_vfsunlock(vp);
            error = EINVAL;
            goto errout;
        }
    }
    if ((uap->flags & (MS_DATA | MS_OPTIONSTR)) == 0) {
        uap->dataptr = NULL;
        uap->datalen = 0;
    }

    /*
     * If this is a remount, we don't want to create a new VFS.
     * Instead, we pass the existing one with a remount flag.
     */
    if (remount) {
        /*
         * Confirm that the mount point is the root vnode of the
         * file system that is being remounted.
         * This can happen if the user specifies a different
         * mount point directory pathname in the (re)mount command.
         *
         * Code below can only be reached if splice is true, so it's
         * safe to do vn_vfsunlock() here.
         */
        if ((vp->v_flag & VROOT) == 0) {
            vn_vfsunlock(vp);
            error = ENOENT;
            goto errout;
        }
        /*
         * Disallow making file systems read-only unless file system
         * explicitly allows it in its vfssw.  Ignore other flags.
         */
        if (rdonly && vn_is_readonly(vp) == 0 &&
            (vswp->vsw_flag & VSW_CANRWRO) == 0) {
            vn_vfsunlock(vp);
            error = EINVAL;
            goto errout;
        }
        /*
         * Disallow changing the NBMAND disposition of the file
         * system on remounts.
         */
        if ((nbmand && ((vp->v_vfsp->vfs_flag & VFS_NBMAND) == 0)) ||
            (!nbmand && (vp->v_vfsp->vfs_flag & VFS_NBMAND))) {
            vn_vfsunlock(vp);
            error = EINVAL;
            goto errout;
        }
        vfsp = vp->v_vfsp;
        ovflags = vfsp->vfs_flag;
        vfsp->vfs_flag |= VFS_REMOUNT;
        vfsp->vfs_flag &= ~VFS_RDONLY;
    } else {
        vfsp = vfs_alloc(KM_SLEEP);
        VFS_INIT(vfsp, vfsops, NULL);
    }

    VFS_HOLD(vfsp);

    if ((error = lofi_add(fsname, vfsp, &mnt_mntopts, uap)) != 0) {
        if (!remount) {
            if (splice)
                vn_vfsunlock(vp);
            vfs_free(vfsp);
        } else {
            vn_vfsunlock(vp);
            VFS_RELE(vfsp);
        }
        goto errout;
    }

    /*
     * PRIV_SYS_MOUNT doesn't mean you can become root.
     */
    if (vfsp->vfs_lofi_minor != 0) {
        uap->flags |= MS_NOSUID;
        vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0);
    }

    /*
     * The vfs_reflock is not used anymore the code below explicitly
     * holds it preventing others accesing it directly.
     */
    if ((sema_tryp(&vfsp->vfs_reflock) == 0) &&
        !(vfsp->vfs_flag & VFS_REMOUNT))
        cmn_err(CE_WARN,
            "mount type %s couldn't get vfs_reflock", vswp->vsw_name);

    /*
     * Lock the vfs. If this is a remount we want to avoid spurious umount
     * failures that happen as a side-effect of fsflush() and other mount
     * and unmount operations that might be going on simultaneously and
     * may have locked the vfs currently. To not return EBUSY immediately
     * here we use vfs_lock_wait() instead vfs_lock() for the remount case.
     */
    if (!remount) {
        if (error = vfs_lock(vfsp)) {
            vfsp->vfs_flag = ovflags;

            lofi_remove(vfsp);

            if (splice)
                vn_vfsunlock(vp);
            vfs_free(vfsp);
            goto errout;
        }
    } else {
        vfs_lock_wait(vfsp);
    }

    /*
     * Add device to mount in progress table, global mounts require special
     * handling. It is possible that we have already done the lookupname
     * on a spliced, non-global fs. If so, we don't want to do it again
     * since we cannot do a lookupname after taking the
     * wlock above. This case is for a non-spliced, non-global filesystem.
     */
    if (!addmip) {
        if ((uap->flags & MS_GLOBAL) == 0 &&
            lookupname(uap->spec, fromspace, FOLLOW, NULL, &bvp) == 0) {
            addmip = 1;
        }
    }

    if (addmip) {
        vnode_t *lvp = NULL;

        error = vfs_get_lofi(vfsp, &lvp);
        if (error > 0) {
            lofi_remove(vfsp);

            if (splice)
                vn_vfsunlock(vp);
            vfs_unlock(vfsp);

            if (remount) {
                VFS_RELE(vfsp);
            } else {
                vfs_free(vfsp);
            }

            goto errout;
        } else if (error == -1) {
            bdev = bvp->v_rdev;
            VN_RELE(bvp);
        } else {
            bdev = lvp->v_rdev;
            VN_RELE(lvp);
            VN_RELE(bvp);
        }

        vfs_addmip(bdev, vfsp);
        addmip = 0;
        delmip = 1;
    }
    /*
     * Invalidate cached entry for the mount point.
     */
    if (splice)
        dnlc_purge_vp(vp);

    /*
     * If have an option string but the filesystem doesn't supply a
     * prototype options table, create a table with the global
     * options and sufficient room to accept all the options in the
     * string.  Then parse the passed in option string
     * accepting all the options in the string.  This gives us an
     * option table with all the proper cancel properties for the
     * global options.
     *
     * Filesystems that supply a prototype options table are handled
     * earlier in this function.
     */
    if (uap->flags & MS_OPTIONSTR) {
        if (!(vswp->vsw_flag & VSW_HASPROTO)) {
            mntopts_t tmp_mntopts;

            tmp_mntopts.mo_count = 0;
            vfs_createopttbl_extend(&tmp_mntopts, inargs,
                &mnt_mntopts);
            vfs_parsemntopts(&tmp_mntopts, inargs, 1);
            vfs_swapopttbl_nolock(&mnt_mntopts, &tmp_mntopts);
            vfs_freeopttbl(&tmp_mntopts);
        }
    }

    /*
     * Serialize with zone creations.
     */
    mount_in_progress();
    /*
     * Instantiate (or reinstantiate) the file system.  If appropriate,
     * splice it into the file system name space.
     *
     * We want VFS_MOUNT() to be able to override the vfs_resource
     * string if necessary (ie, mntfs), and also for a remount to
     * change the same (necessary when remounting '/' during boot).
     * So we set up vfs_mntpt and vfs_resource to what we think they
     * should be, then hand off control to VFS_MOUNT() which can
     * override this.
     *
     * For safety's sake, when changing vfs_resource or vfs_mntpt of
     * a vfs which is on the vfs list (i.e. during a remount), we must
     * never set those fields to NULL. Several bits of code make
     * assumptions that the fields are always valid.
     */
    vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts);
    if (remount) {
        if ((oldresource = vfsp->vfs_resource) != NULL)
            refstr_hold(oldresource);
        if ((oldmntpt = vfsp->vfs_mntpt) != NULL)
            refstr_hold(oldmntpt);
    }
    vfs_setresource(vfsp, resource);
    vfs_setmntpoint(vfsp, mountpt);

    /*
     * going to mount on this vnode, so notify.
     */
    vnevent_mountedover(vp, NULL);
    error = VFS_MOUNT(vfsp, vp, uap, credp);

    if (uap->flags & MS_RDONLY)
        vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
    if (uap->flags & MS_NOSUID)
        vfs_setmntopt(vfsp, MNTOPT_NOSUID, NULL, 0);
    if (uap->flags & MS_GLOBAL)
        vfs_setmntopt(vfsp, MNTOPT_GLOBAL, NULL, 0);

    if (error) {
        lofi_remove(vfsp);

        if (remount) {
            /* put back pre-remount options */
            vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts);
            vfs_setmntpoint(vfsp, (stripzonepath(
                refstr_value(oldmntpt))));
            if (oldmntpt)
                refstr_rele(oldmntpt);
            vfs_setresource(vfsp, (stripzonepath(
                refstr_value(oldresource))));
            if (oldresource)
                refstr_rele(oldresource);
            vfsp->vfs_flag = ovflags;
            vfs_unlock(vfsp);
            VFS_RELE(vfsp);
        } else {
            vfs_unlock(vfsp);
            vfs_freemnttab(vfsp);
            vfs_free(vfsp);
        }
    } else {
        /*
         * Set the mount time to now
         */
        vfsp->vfs_mtime = ddi_get_time();
        if (remount) {
            vfsp->vfs_flag &= ~VFS_REMOUNT;
            if (oldresource)
                refstr_rele(oldresource);
            if (oldmntpt)
                refstr_rele(oldmntpt);
        } else if (splice) {
            /*
             * Link vfsp into the name space at the mount
             * point. Vfs_add() is responsible for
             * holding the mount point which will be
             * released when vfs_remove() is called.
             */
            vfs_add(vp, vfsp, uap->flags);
        } else {
            /*
             * Hold the reference to file system which is
             * not linked into the name space.
             */
            vfsp->vfs_zone = NULL;
            VFS_HOLD(vfsp);
            vfsp->vfs_vnodecovered = NULL;
        }
        /*
         * Set flags for global options encountered
         */
        if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
            vfsp->vfs_flag |= VFS_RDONLY;
        else
            vfsp->vfs_flag &= ~VFS_RDONLY;
        if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
            vfsp->vfs_flag |= (VFS_NOSETUID|VFS_NODEVICES);
        } else {
            if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
                vfsp->vfs_flag |= VFS_NODEVICES;
            else
                vfsp->vfs_flag &= ~VFS_NODEVICES;
            if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
                vfsp->vfs_flag |= VFS_NOSETUID;
            else
                vfsp->vfs_flag &= ~VFS_NOSETUID;
        }
        if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
            vfsp->vfs_flag |= VFS_NBMAND;
        else
            vfsp->vfs_flag &= ~VFS_NBMAND;

        if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
            vfsp->vfs_flag |= VFS_XATTR;
        else
            vfsp->vfs_flag &= ~VFS_XATTR;

        if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
            vfsp->vfs_flag |= VFS_NOEXEC;
        else
            vfsp->vfs_flag &= ~VFS_NOEXEC;

        /*
         * Now construct the output option string of options
         * we recognized.
         */
        if (uap->flags & MS_OPTIONSTR) {
            vfs_list_read_lock();
            copyout_error = vfs_buildoptionstr(
                &vfsp->vfs_mntopts, inargs, optlen);
            vfs_list_unlock();
            if (copyout_error == 0 &&
                (uap->flags & MS_SYSSPACE) == 0) {
                copyout_error = copyoutstr(inargs, opts,
                    optlen, NULL);
            }
        }

        /*
         * If this isn't a remount, set up the vopstats before
         * anyone can touch this. We only allow spliced file
         * systems (file systems which are in the namespace) to
         * have the VFS_STATS flag set.
         * NOTE: PxFS mounts the underlying file system with
         * MS_NOSPLICE set and copies those vfs_flags to its private
         * vfs structure. As a result, PxFS should never have
         * the VFS_STATS flag or else we might access the vfs
         * statistics-related fields prior to them being
         * properly initialized.
         */
        if (!remount && (vswp->vsw_flag & VSW_STATS) && splice) {
            initialize_vopstats(&vfsp->vfs_vopstats);
            /*
             * We need to set vfs_vskap to NULL because there's
             * a chance it won't be set below.  This is checked
             * in teardown_vopstats() so we can't have garbage.
             */
            vfsp->vfs_vskap = NULL;
            vfsp->vfs_flag |= VFS_STATS;
            vfsp->vfs_fstypevsp = get_fstype_vopstats(vfsp, vswp);
        }

        if (vswp->vsw_flag & VSW_XID)
            vfsp->vfs_flag |= VFS_XID;

        vfs_unlock(vfsp);
    }
    mount_completed();
    if (splice)
        vn_vfsunlock(vp);

    if ((error == 0) && (copyout_error == 0)) {
        if (!remount) {
            /*
             * Don't call get_vskstat_anchor() while holding
             * locks since it allocates memory and calls
             * VFS_STATVFS().  For NFS, the latter can generate
             * an over-the-wire call.
             */
            vskap = get_vskstat_anchor(vfsp);
            /* Only take the lock if we have something to do */
            if (vskap != NULL) {
                vfs_lock_wait(vfsp);
                if (vfsp->vfs_flag & VFS_STATS) {
                    vfsp->vfs_vskap = vskap;
                }
                vfs_unlock(vfsp);
            }
        }
        /* Return vfsp to caller. */
        *vfspp = vfsp;
    }
errout:
    vfs_freeopttbl(&mnt_mntopts);
    if (resource != NULL)
        kmem_free(resource, strlen(resource) + 1);
    if (mountpt != NULL)
        kmem_free(mountpt, strlen(mountpt) + 1);
    /*
     * It is possible we errored prior to adding to mount in progress
     * table. Must free vnode we acquired with successful lookupname.
     */
    if (addmip)
        VN_RELE(bvp);
    if (delmip)
        vfs_delmip(vfsp);
    ASSERT(vswp != NULL);
    vfs_unrefvfssw(vswp);
    if (inargs != opts)
        kmem_free(inargs, MAX_MNTOPT_STR);
    if (copyout_error) {
        lofi_remove(vfsp);
        VFS_RELE(vfsp);
        error = copyout_error;
    }
    return (error);
}

static void
vfs_setpath(struct vfs *vfsp, refstr_t **refp, const char *newpath)
{
    size_t len;
    refstr_t *ref;
    zone_t *zone = curproc->p_zone;
    char *sp;
    int have_list_lock = 0;

    ASSERT(!VFS_ON_LIST(vfsp) || vfs_lock_held(vfsp));

    /*
     * New path must be less than MAXPATHLEN because mntfs
     * will only display up to MAXPATHLEN bytes. This is currently
     * safe, because domount() uses pn_get(), and other callers
     * similarly cap the size to fewer than MAXPATHLEN bytes.
     */

    ASSERT(strlen(newpath) < MAXPATHLEN);

    /* mntfs requires consistency while vfs list lock is held */

    if (VFS_ON_LIST(vfsp)) {
        have_list_lock = 1;
        vfs_list_lock();
    }

    if (*refp != NULL)
        refstr_rele(*refp);

    /* Do we need to modify the path? */

    if (zone == global_zone || *newpath != '/') {
        ref = refstr_alloc(newpath);
        goto out;
    }

    /*
     * Truncate the trailing '/' in the zoneroot, and merge
     * in the zone's rootpath with the "newpath" (resource
     * or mountpoint) passed in.
     *
     * The size of the required buffer is thus the size of
     * the buffer required for the passed-in newpath
     * (strlen(newpath) + 1), plus the size of the buffer
     * required to hold zone_rootpath (zone_rootpathlen)
     * minus one for one of the now-superfluous NUL
     * terminations, minus one for the trailing '/'.
     *
     * That gives us:
     *
     * (strlen(newpath) + 1) + zone_rootpathlen - 1 - 1
     *
     * Which is what we have below.
     */

    len = strlen(newpath) + zone->zone_rootpathlen - 1;
    sp = kmem_alloc(len, KM_SLEEP);

    /*
     * Copy everything including the trailing slash, which
     * we then overwrite with the NUL character.
     */

    (void) strcpy(sp, zone->zone_rootpath);
    sp[zone->zone_rootpathlen - 2] = '\0';
    (void) strcat(sp, newpath);

    ref = refstr_alloc(sp);
    kmem_free(sp, len);
out:
    *refp = ref;

    if (have_list_lock) {
        vfs_mnttab_modtimeupd();
        vfs_list_unlock();
    }
}

/*
 * Record a mounted resource name in a vfs structure.
 * If vfsp is already mounted, caller must hold the vfs lock.
 */
void
vfs_setresource(struct vfs *vfsp, const char *resource)
{
    if (resource == NULL || resource[0] == '\0')
        resource = VFS_NORESOURCE;
    vfs_setpath(vfsp, &vfsp->vfs_resource, resource);
}

/*
 * Record a mount point name in a vfs structure.
 * If vfsp is already mounted, caller must hold the vfs lock.
 */
void
vfs_setmntpoint(struct vfs *vfsp, const char *mntpt)
{
    if (mntpt == NULL || mntpt[0] == '\0')
        mntpt = VFS_NOMNTPT;
    vfs_setpath(vfsp, &vfsp->vfs_mntpt, mntpt);
}

/* Returns the vfs_resource. Caller must call refstr_rele() when finished. */

refstr_t *
vfs_getresource(const struct vfs *vfsp)
{
    refstr_t *resource;

    vfs_list_read_lock();
    resource = vfsp->vfs_resource;
    refstr_hold(resource);
    vfs_list_unlock();

    return (resource);
}

/* Returns the vfs_mntpt. Caller must call refstr_rele() when finished. */

refstr_t *
vfs_getmntpoint(const struct vfs *vfsp)
{
    refstr_t *mntpt;

    vfs_list_read_lock();
    mntpt = vfsp->vfs_mntpt;
    refstr_hold(mntpt);
    vfs_list_unlock();

    return (mntpt);
}

/*
 * Create an empty options table with enough empty slots to hold all
 * The options in the options string passed as an argument.
 * Potentially prepend another options table.
 *
 * Note: caller is responsible for locking the vfs list, if needed,
 *       to protect mops.
 */
static void
vfs_createopttbl_extend(mntopts_t *mops, const char *opts,
    const mntopts_t *mtmpl)
{
    const char *s = opts;
    uint_t count;

    if (opts == NULL || *opts == '\0') {
        count = 0;
    } else {
        count = 1;

        /*
         * Count number of options in the string
         */
        for (s = strchr(s, ','); s != NULL; s = strchr(s, ',')) {
            count++;
            s++;
        }
    }
    vfs_copyopttbl_extend(mtmpl, mops, count);
}

/*
 * Create an empty options table with enough empty slots to hold all
 * The options in the options string passed as an argument.
 *
 * This function is *not* for general use by filesystems.
 *
 * Note: caller is responsible for locking the vfs list, if needed,
 *       to protect mops.
 */
void
vfs_createopttbl(mntopts_t *mops, const char *opts)
{
    vfs_createopttbl_extend(mops, opts, NULL);
}


/*
 * Swap two mount options tables
 */
static void
vfs_swapopttbl_nolock(mntopts_t *optbl1, mntopts_t *optbl2)
{
    uint_t tmpcnt;
    mntopt_t *tmplist;

    tmpcnt = optbl2->mo_count;
    tmplist = optbl2->mo_list;
    optbl2->mo_count = optbl1->mo_count;
    optbl2->mo_list = optbl1->mo_list;
    optbl1->mo_count = tmpcnt;
    optbl1->mo_list = tmplist;
}

static void
vfs_swapopttbl(mntopts_t *optbl1, mntopts_t *optbl2)
{
    vfs_list_lock();
    vfs_swapopttbl_nolock(optbl1, optbl2);
    vfs_mnttab_modtimeupd();
    vfs_list_unlock();
}

static char **
vfs_copycancelopt_extend(char **const moc, int extend)
{
    int i = 0;
    int j;
    char **result;

    if (moc != NULL) {
        for (; moc[i] != NULL; i++)
            /* count number of options to cancel */;
    }

    if (i + extend == 0)
        return (NULL);

    result = kmem_alloc((i + extend + 1) * sizeof (char *), KM_SLEEP);

    for (j = 0; j < i; j++) {
        result[j] = kmem_alloc(strlen(moc[j]) + 1, KM_SLEEP);
        (void) strcpy(result[j], moc[j]);
    }
    for (; j <= i + extend; j++)
        result[j] = NULL;

    return (result);
}

static void
vfs_copyopt(const mntopt_t *s, mntopt_t *d)
{
    char *sp, *dp;

    d->mo_flags = s->mo_flags;
    d->mo_data = s->mo_data;
    sp = s->mo_name;
    if (sp != NULL) {
        dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP);
        (void) strcpy(dp, sp);
        d->mo_name = dp;
    } else {
        d->mo_name = NULL; /* should never happen */
    }

    d->mo_cancel = vfs_copycancelopt_extend(s->mo_cancel, 0);

    sp = s->mo_arg;
    if (sp != NULL) {
        dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP);
        (void) strcpy(dp, sp);
        d->mo_arg = dp;
    } else {
        d->mo_arg = NULL;
    }
}

/*
 * Copy a mount options table, possibly allocating some spare
 * slots at the end.  It is permissible to copy_extend the NULL table.
 */
static void
vfs_copyopttbl_extend(const mntopts_t *smo, mntopts_t *dmo, int extra)
{
    uint_t i, count;
    mntopt_t *motbl;

    /*
     * Clear out any existing stuff in the options table being initialized
     */
    vfs_freeopttbl(dmo);
    count = (smo == NULL) ? 0 : smo->mo_count;
    if ((count + extra) == 0)   /* nothing to do */
        return;
    dmo->mo_count = count + extra;
    motbl = kmem_zalloc((count + extra) * sizeof (mntopt_t), KM_SLEEP);
    dmo->mo_list = motbl;
    for (i = 0; i < count; i++) {
        vfs_copyopt(&smo->mo_list[i], &motbl[i]);
    }
    for (i = count; i < count + extra; i++) {
        motbl[i].mo_flags = MO_EMPTY;
    }
}

/*
 * Copy a mount options table.
 *
 * This function is *not* for general use by filesystems.
 *
 * Note: caller is responsible for locking the vfs list, if needed,
 *       to protect smo and dmo.
 */
void
vfs_copyopttbl(const mntopts_t *smo, mntopts_t *dmo)
{
    vfs_copyopttbl_extend(smo, dmo, 0);
}

static char **
vfs_mergecancelopts(const mntopt_t *mop1, const mntopt_t *mop2)
{
    int c1 = 0;
    int c2 = 0;
    char **result;
    char **sp1, **sp2, **dp;

    /*
     * First we count both lists of cancel options.
     * If either is NULL or has no elements, we return a copy of
     * the other.
     */
    if (mop1->mo_cancel != NULL) {
        for (; mop1->mo_cancel[c1] != NULL; c1++)
            /* count cancel options in mop1 */;
    }

    if (c1 == 0)
        return (vfs_copycancelopt_extend(mop2->mo_cancel, 0));

    if (mop2->mo_cancel != NULL) {
        for (; mop2->mo_cancel[c2] != NULL; c2++)
            /* count cancel options in mop2 */;
    }

    result = vfs_copycancelopt_extend(mop1->mo_cancel, c2);

    if (c2 == 0)
        return (result);

    /*
     * When we get here, we've got two sets of cancel options;
     * we need to merge the two sets.  We know that the result
     * array has "c1+c2+1" entries and in the end we might shrink
     * it.
     * Result now has a copy of the c1 entries from mop1; we'll
     * now lookup all the entries of mop2 in mop1 and copy it if
     * it is unique.
     * This operation is O(n^2) but it's only called once per
     * filesystem per duplicate option.  This is a situation
     * which doesn't arise with the filesystems in ON and
     * n is generally 1.
     */

    dp = &result[c1];
    for (sp2 = mop2->mo_cancel; *sp2 != NULL; sp2++) {
        for (sp1 = mop1->mo_cancel; *sp1 != NULL; sp1++) {
            if (strcmp(*sp1, *sp2) == 0)
                break;
        }
        if (*sp1 == NULL) {
            /*
             * Option *sp2 not found in mop1, so copy it.
             * The calls to vfs_copycancelopt_extend()
             * guarantee that there's enough room.
             */
            *dp = kmem_alloc(strlen(*sp2) + 1, KM_SLEEP);
            (void) strcpy(*dp++, *sp2);
        }
    }
    if (dp != &result[c1+c2]) {
        size_t bytes = (dp - result + 1) * sizeof (char *);
        char **nres = kmem_alloc(bytes, KM_SLEEP);

        bcopy(result, nres, bytes);
        kmem_free(result, (c1 + c2 + 1) * sizeof (char *));
        result = nres;
    }
    return (result);
}

/*
 * Merge two mount option tables (outer and inner) into one.  This is very
 * similar to "merging" global variables and automatic variables in C.
 *
 * This isn't (and doesn't have to be) fast.
 *
 * This function is *not* for general use by filesystems.
 *
 * Note: caller is responsible for locking the vfs list, if needed,
 *       to protect omo, imo & dmo.
 */
void
vfs_mergeopttbl(const mntopts_t *omo, const mntopts_t *imo, mntopts_t *dmo)
{
    uint_t i, count;
    mntopt_t *mop, *motbl;
    uint_t freeidx;

    /*
     * First determine how much space we need to allocate.
     */
    count = omo->mo_count;
    for (i = 0; i < imo->mo_count; i++) {
        if (imo->mo_list[i].mo_flags & MO_EMPTY)
            continue;
        if (vfs_hasopt(omo, imo->mo_list[i].mo_name) == NULL)
            count++;
    }
    ASSERT(count >= omo->mo_count &&
        count <= omo->mo_count + imo->mo_count);
    motbl = kmem_alloc(count * sizeof (mntopt_t), KM_SLEEP);
    for (i = 0; i < omo->mo_count; i++)
        vfs_copyopt(&omo->mo_list[i], &motbl[i]);
    freeidx = omo->mo_count;
    for (i = 0; i < imo->mo_count; i++) {
        if (imo->mo_list[i].mo_flags & MO_EMPTY)
            continue;
        if ((mop = vfs_hasopt(omo, imo->mo_list[i].mo_name)) != NULL) {
            char **newcanp;
            uint_t index = mop - omo->mo_list;

            newcanp = vfs_mergecancelopts(mop, &motbl[index]);

            vfs_freeopt(&motbl[index]);
            vfs_copyopt(&imo->mo_list[i], &motbl[index]);

            vfs_freecancelopt(motbl[index].mo_cancel);
            motbl[index].mo_cancel = newcanp;
        } else {
            /*
             * If it's a new option, just copy it over to the first
             * free location.
             */
            vfs_copyopt(&imo->mo_list[i], &motbl[freeidx++]);
        }
    }
    dmo->mo_count = count;
    dmo->mo_list = motbl;
}

/*
 * Functions to set and clear mount options in a mount options table.
 */

/*
 * Clear a mount option, if it exists.
 *
 * The update_mnttab arg indicates whether mops is part of a vfs that is on
 * the vfs list.
 */
static void
vfs_clearmntopt_nolock(mntopts_t *mops, const char *opt, int update_mnttab)
{
    struct mntopt *mop;
    uint_t i, count;

    ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist));

    count = mops->mo_count;
    for (i = 0; i < count; i++) {
        mop = &mops->mo_list[i];

        if (mop->mo_flags & MO_EMPTY)
            continue;
        if (strcmp(opt, mop->mo_name))
            continue;
        mop->mo_flags &= ~MO_SET;
        if (mop->mo_arg != NULL) {
            kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
        }
        mop->mo_arg = NULL;
        if (update_mnttab)
            vfs_mnttab_modtimeupd();
        break;
    }
}

void
vfs_clearmntopt(struct vfs *vfsp, const char *opt)
{
    int gotlock = 0;

    if (VFS_ON_LIST(vfsp)) {
        gotlock = 1;
        vfs_list_lock();
    }
    vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, opt, gotlock);
    if (gotlock)
        vfs_list_unlock();
}


/*
 * Set a mount option on.  If it's not found in the table, it's silently
 * ignored.  If the option has MO_IGNORE set, it is still set unless the
 * VFS_NOFORCEOPT bit is set in the flags.  Also, VFS_DISPLAY/VFS_NODISPLAY flag
 * bits can be used to toggle the MO_NODISPLAY bit for the option.
 * If the VFS_CREATEOPT flag bit is set then the first option slot with
 * MO_EMPTY set is created as the option passed in.
 *
 * The update_mnttab arg indicates whether mops is part of a vfs that is on
 * the vfs list.
 */
static void
vfs_setmntopt_nolock(mntopts_t *mops, const char *opt,
    const char *arg, int flags, int update_mnttab)
{
    mntopt_t *mop;
    uint_t i, count;
    char *sp;

    ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist));

    if (flags & VFS_CREATEOPT) {
        if (vfs_hasopt(mops, opt) != NULL) {
            flags &= ~VFS_CREATEOPT;
        }
    }
    count = mops->mo_count;
    for (i = 0; i < count; i++) {
        mop = &mops->mo_list[i];

        if (mop->mo_flags & MO_EMPTY) {
            if ((flags & VFS_CREATEOPT) == 0)
                continue;
            sp = kmem_alloc(strlen(opt) + 1, KM_SLEEP);
            (void) strcpy(sp, opt);
            mop->mo_name = sp;
            if (arg != NULL)
                mop->mo_flags = MO_HASVALUE;
            else
                mop->mo_flags = 0;
        } else if (strcmp(opt, mop->mo_name)) {
            continue;
        }
        if ((mop->mo_flags & MO_IGNORE) && (flags & VFS_NOFORCEOPT))
            break;
        if (arg != NULL && (mop->mo_flags & MO_HASVALUE) != 0) {
            sp = kmem_alloc(strlen(arg) + 1, KM_SLEEP);
            (void) strcpy(sp, arg);
        } else {
            sp = NULL;
        }
        if (mop->mo_arg != NULL)
            kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
        mop->mo_arg = sp;
        if (flags & VFS_DISPLAY)
            mop->mo_flags &= ~MO_NODISPLAY;
        if (flags & VFS_NODISPLAY)
            mop->mo_flags |= MO_NODISPLAY;
        mop->mo_flags |= MO_SET;
        if (mop->mo_cancel != NULL) {
            char **cp;

            for (cp = mop->mo_cancel; *cp != NULL; cp++)
                vfs_clearmntopt_nolock(mops, *cp, 0);
        }
        if (update_mnttab)
            vfs_mnttab_modtimeupd();
        break;
    }
}

void
vfs_setmntopt(struct vfs *vfsp, const char *opt, const char *arg, int flags)
{
    int gotlock = 0;

    if (VFS_ON_LIST(vfsp)) {
        gotlock = 1;
        vfs_list_lock();
    }
    vfs_setmntopt_nolock(&vfsp->vfs_mntopts, opt, arg, flags, gotlock);
    if (gotlock)
        vfs_list_unlock();
}


/*
 * Add a "tag" option to a mounted file system's options list.
 *
 * Note: caller is responsible for locking the vfs list, if needed,
 *       to protect mops.
 */
static mntopt_t *
vfs_addtag(mntopts_t *mops, const char *tag)
{
    uint_t count;
    mntopt_t *mop, *motbl;

    count = mops->mo_count + 1;
    motbl = kmem_zalloc(count * sizeof (mntopt_t), KM_SLEEP);
    if (mops->mo_count) {
        size_t len = (count - 1) * sizeof (mntopt_t);

        bcopy(mops->mo_list, motbl, len);
        kmem_free(mops->mo_list, len);
    }
    mops->mo_count = count;
    mops->mo_list = motbl;
    mop = &motbl[count - 1];
    mop->mo_flags = MO_TAG;
    mop->mo_name = kmem_alloc(strlen(tag) + 1, KM_SLEEP);
    (void) strcpy(mop->mo_name, tag);
    return (mop);
}

/*
 * Allow users to set arbitrary "tags" in a vfs's mount options.
 * Broader use within the kernel is discouraged.
 */
int
vfs_settag(uint_t major, uint_t minor, const char *mntpt, const char *tag,
    cred_t *cr)
{
    vfs_t *vfsp;
    mntopts_t *mops;
    mntopt_t *mop;
    int found = 0;
    dev_t dev = makedevice(major, minor);
    int err = 0;
    char *buf = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP);

    /*
     * Find the desired mounted file system
     */
    vfs_list_lock();
    vfsp = rootvfs;
    do {
        if (vfsp->vfs_dev == dev &&
            strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) {
            found = 1;
            break;
        }
        vfsp = vfsp->vfs_next;
    } while (vfsp != rootvfs);

    if (!found) {
        err = EINVAL;
        goto out;
    }
    err = secpolicy_fs_config(cr, vfsp);
    if (err != 0)
        goto out;

    mops = &vfsp->vfs_mntopts;
    /*
     * Add tag if it doesn't already exist
     */
    if ((mop = vfs_hasopt(mops, tag)) == NULL) {
        int len;

        (void) vfs_buildoptionstr(mops, buf, MAX_MNTOPT_STR);
        len = strlen(buf);
        if (len + strlen(tag) + 2 > MAX_MNTOPT_STR) {
            err = ENAMETOOLONG;
            goto out;
        }
        mop = vfs_addtag(mops, tag);
    }
    if ((mop->mo_flags & MO_TAG) == 0) {
        err = EINVAL;
        goto out;
    }
    vfs_setmntopt_nolock(mops, tag, NULL, 0, 1);
out:
    vfs_list_unlock();
    kmem_free(buf, MAX_MNTOPT_STR);
    return (err);
}

/*
 * Allow users to remove arbitrary "tags" in a vfs's mount options.
 * Broader use within the kernel is discouraged.
 */
int
vfs_clrtag(uint_t major, uint_t minor, const char *mntpt, const char *tag,
    cred_t *cr)
{
    vfs_t *vfsp;
    mntopt_t *mop;
    int found = 0;
    dev_t dev = makedevice(major, minor);
    int err = 0;

    /*
     * Find the desired mounted file system
     */
    vfs_list_lock();
    vfsp = rootvfs;
    do {
        if (vfsp->vfs_dev == dev &&
            strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) {
            found = 1;
            break;
        }
        vfsp = vfsp->vfs_next;
    } while (vfsp != rootvfs);

    if (!found) {
        err = EINVAL;
        goto out;
    }
    err = secpolicy_fs_config(cr, vfsp);
    if (err != 0)
        goto out;

    if ((mop = vfs_hasopt(&vfsp->vfs_mntopts, tag)) == NULL) {
        err = EINVAL;
        goto out;
    }
    if ((mop->mo_flags & MO_TAG) == 0) {
        err = EINVAL;
        goto out;
    }
    vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, tag, 1);
out:
    vfs_list_unlock();
    return (err);
}

/*
 * Function to parse an option string and fill in a mount options table.
 * Unknown options are silently ignored.  The input option string is modified
 * by replacing separators with nulls.  If the create flag is set, options
 * not found in the table are just added on the fly.  The table must have
 * an option slot marked MO_EMPTY to add an option on the fly.
 *
 * This function is *not* for general use by filesystems.
 *
 * Note: caller is responsible for locking the vfs list, if needed,
 *       to protect mops..
 */
void
vfs_parsemntopts(mntopts_t *mops, char *osp, int create)
{
    char *s = osp, *p, *nextop, *valp, *cp, *ep;
    int setflg = VFS_NOFORCEOPT;

    if (osp == NULL)
        return;
    while (*s != '\0') {
        p = strchr(s, ','); /* find next option */
        if (p == NULL) {
            cp = NULL;
            p = s + strlen(s);
        } else {
            cp = p;     /* save location of comma */
            *p++ = '\0';    /* mark end and point to next option */
        }
        nextop = p;
        p = strchr(s, '='); /* look for value */
        if (p == NULL) {
            valp = NULL;    /* no value supplied */
        } else {
            ep = p;     /* save location of equals */
            *p++ = '\0';    /* end option and point to value */
            valp = p;
        }
        /*
         * set option into options table
         */
        if (create)
            setflg |= VFS_CREATEOPT;
        vfs_setmntopt_nolock(mops, s, valp, setflg, 0);
        if (cp != NULL)
            *cp = ',';  /* restore the comma */
        if (valp != NULL)
            *ep = '=';  /* restore the equals */
        s = nextop;
    }
}

/*
 * Function to inquire if an option exists in a mount options table.
 * Returns a pointer to the option if it exists, else NULL.
 *
 * This function is *not* for general use by filesystems.
 *
 * Note: caller is responsible for locking the vfs list, if needed,
 *       to protect mops.
 */
struct mntopt *
vfs_hasopt(const mntopts_t *mops, const char *opt)
{
    struct mntopt *mop;
    uint_t i, count;

    count = mops->mo_count;
    for (i = 0; i < count; i++) {
        mop = &mops->mo_list[i];

        if (mop->mo_flags & MO_EMPTY)
            continue;
        if (strcmp(opt, mop->mo_name) == 0)
            return (mop);
    }
    return (NULL);
}

/*
 * Function to inquire if an option is set in a mount options table.
 * Returns non-zero if set and fills in the arg pointer with a pointer to
 * the argument string or NULL if there is no argument string.
 */
static int
vfs_optionisset_nolock(const mntopts_t *mops, const char *opt, char **argp)
{
    struct mntopt *mop;
    uint_t i, count;

    count = mops->mo_count;
    for (i = 0; i < count; i++) {
        mop = &mops->mo_list[i];

        if (mop->mo_flags & MO_EMPTY)
            continue;
        if (strcmp(opt, mop->mo_name))
            continue;
        if ((mop->mo_flags & MO_SET) == 0)
            return (0);
        if (argp != NULL && (mop->mo_flags & MO_HASVALUE) != 0)
            *argp = mop->mo_arg;
        return (1);
    }
    return (0);
}


int
vfs_optionisset(const struct vfs *vfsp, const char *opt, char **argp)
{
    int ret;

    vfs_list_read_lock();
    ret = vfs_optionisset_nolock(&vfsp->vfs_mntopts, opt, argp);
    vfs_list_unlock();
    return (ret);
}


/*
 * Construct a comma separated string of the options set in the given
 * mount table, return the string in the given buffer.  Return non-zero if
 * the buffer would overflow.
 *
 * This function is *not* for general use by filesystems.
 *
 * Note: caller is responsible for locking the vfs list, if needed,
 *       to protect mp.
 */
int
vfs_buildoptionstr(const mntopts_t *mp, char *buf, int len)
{
    char *cp;
    uint_t i;

    buf[0] = '\0';
    cp = buf;
    for (i = 0; i < mp->mo_count; i++) {
        struct mntopt *mop;

        mop = &mp->mo_list[i];
        if (mop->mo_flags & MO_SET) {
            int optlen, comma = 0;

            if (buf[0] != '\0')
                comma = 1;
            optlen = strlen(mop->mo_name);
            if (strlen(buf) + comma + optlen + 1 > len)
                goto err;
            if (comma)
                *cp++ = ',';
            (void) strcpy(cp, mop->mo_name);
            cp += optlen;
            /*
             * Append option value if there is one
             */
            if (mop->mo_arg != NULL) {
                int arglen;

                arglen = strlen(mop->mo_arg);
                if (strlen(buf) + arglen + 2 > len)
                    goto err;
                *cp++ = '=';
                (void) strcpy(cp, mop->mo_arg);
                cp += arglen;
            }
        }
    }
    return (0);
err:
    return (EOVERFLOW);
}

static void
vfs_freecancelopt(char **moc)
{
    if (moc != NULL) {
        int ccnt = 0;
        char **cp;

        for (cp = moc; *cp != NULL; cp++) {
            kmem_free(*cp, strlen(*cp) + 1);
            ccnt++;
        }
        kmem_free(moc, (ccnt + 1) * sizeof (char *));
    }
}

static void
vfs_freeopt(mntopt_t *mop)
{
    if (mop->mo_name != NULL)
        kmem_free(mop->mo_name, strlen(mop->mo_name) + 1);

    vfs_freecancelopt(mop->mo_cancel);

    if (mop->mo_arg != NULL)
        kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
}

/*
 * Free a mount options table
 *
 * This function is *not* for general use by filesystems.
 *
 * Note: caller is responsible for locking the vfs list, if needed,
 *       to protect mp.
 */
void
vfs_freeopttbl(mntopts_t *mp)
{
    uint_t i, count;

    count = mp->mo_count;
    for (i = 0; i < count; i++) {
        vfs_freeopt(&mp->mo_list[i]);
    }
    if (count) {
        kmem_free(mp->mo_list, sizeof (mntopt_t) * count);
        mp->mo_count = 0;
        mp->mo_list = NULL;
    }
}


/* ARGSUSED */
static int
vfs_mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
    caller_context_t *ct)
{
    return (0);
}

/* ARGSUSED */
static int
vfs_mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
    caller_context_t *ct)
{
    return (0);
}

/*
 * The dummy vnode is currently used only by file events notification
 * module which is just interested in the timestamps.
 */
/* ARGSUSED */
static int
vfs_mntdummygetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
    caller_context_t *ct)
{
    bzero(vap, sizeof (vattr_t));
    vap->va_type = VREG;
    vap->va_nlink = 1;
    vap->va_ctime = vfs_mnttab_ctime;
    /*
     * it is ok to just copy mtime as the time will be monotonically
     * increasing.
     */
    vap->va_mtime = vfs_mnttab_mtime;
    vap->va_atime = vap->va_mtime;
    return (0);
}

static void
vfs_mnttabvp_setup(void)
{
    vnode_t *tvp;
    vnodeops_t *vfs_mntdummyvnops;
    const fs_operation_def_t mnt_dummyvnodeops_template[] = {
        VOPNAME_READ,       { .vop_read = vfs_mntdummyread },
        VOPNAME_WRITE,      { .vop_write = vfs_mntdummywrite },
        VOPNAME_GETATTR,    { .vop_getattr = vfs_mntdummygetattr },
        VOPNAME_VNEVENT,    { .vop_vnevent = fs_vnevent_support },
        NULL,           NULL
    };

    if (vn_make_ops("mnttab", mnt_dummyvnodeops_template,
        &vfs_mntdummyvnops) != 0) {
        cmn_err(CE_WARN, "vfs_mnttabvp_setup: vn_make_ops failed");
        /* Shouldn't happen, but not bad enough to panic */
        return;
    }

    /*
     * A global dummy vnode is allocated to represent mntfs files.
     * The mntfs file (/etc/mnttab) can be monitored for file events
     * and receive an event when mnttab changes. Dummy VOP calls
     * will be made on this vnode. The file events notification module
     * intercepts this vnode and delivers relevant events.
     */
    tvp = vn_alloc(KM_SLEEP);
    tvp->v_flag = VNOMOUNT|VNOMAP|VNOSWAP|VNOCACHE;
    vn_setops(tvp, vfs_mntdummyvnops);
    tvp->v_type = VREG;
    /*
     * The mnt dummy ops do not reference v_data.
     * No other module intercepting this vnode should either.
     * Just set it to point to itself.
     */
    tvp->v_data = (caddr_t)tvp;
    tvp->v_vfsp = rootvfs;
    vfs_mntdummyvp = tvp;
}

/*
 * performs fake read/write ops
 */
static void
vfs_mnttab_rwop(int rw)
{
    struct uio  uio;
    struct iovec    iov;
    char    buf[1];

    if (vfs_mntdummyvp == NULL)
        return;

    bzero(&uio, sizeof (uio));
    bzero(&iov, sizeof (iov));
    iov.iov_base = buf;
    iov.iov_len = 0;
    uio.uio_iov = &iov;
    uio.uio_iovcnt = 1;
    uio.uio_loffset = 0;
    uio.uio_segflg = UIO_SYSSPACE;
    uio.uio_resid = 0;
    if (rw) {
        (void) VOP_WRITE(vfs_mntdummyvp, &uio, 0, kcred, NULL);
    } else {
        (void) VOP_READ(vfs_mntdummyvp, &uio, 0, kcred, NULL);
    }
}

/*
 * Generate a write operation.
 */
void
vfs_mnttab_writeop(void)
{
    vfs_mnttab_rwop(1);
}

/*
 * Generate a read operation.
 */
void
vfs_mnttab_readop(void)
{
    vfs_mnttab_rwop(0);
}

/*
 * Free any mnttab information recorded in the vfs struct.
 * The vfs must not be on the vfs list.
 */
static void
vfs_freemnttab(struct vfs *vfsp)
{
    ASSERT(!VFS_ON_LIST(vfsp));

    /*
     * Free device and mount point information
     */
    if (vfsp->vfs_mntpt != NULL) {
        refstr_rele(vfsp->vfs_mntpt);
        vfsp->vfs_mntpt = NULL;
    }
    if (vfsp->vfs_resource != NULL) {
        refstr_rele(vfsp->vfs_resource);
        vfsp->vfs_resource = NULL;
    }
    /*
     * Now free mount options information
     */
    vfs_freeopttbl(&vfsp->vfs_mntopts);
}

/*
 * Return the last mnttab modification time
 */
void
vfs_mnttab_modtime(timespec_t *ts)
{
    ASSERT(RW_LOCK_HELD(&vfslist));
    *ts = vfs_mnttab_mtime;
}

/*
 * See if mnttab is changed
 */
void
vfs_mnttab_poll(timespec_t *old, struct pollhead **phpp)
{
    int changed;

    *phpp = (struct pollhead *)NULL;

    /*
     * Note: don't grab vfs list lock before accessing vfs_mnttab_mtime.
     * Can lead to deadlock against vfs_mnttab_modtimeupd(). It is safe
     * to not grab the vfs list lock because tv_sec is monotonically
     * increasing.
     */

    changed = (old->tv_nsec != vfs_mnttab_mtime.tv_nsec) ||
        (old->tv_sec != vfs_mnttab_mtime.tv_sec);
    if (!changed) {
        *phpp = &vfs_pollhd;
    }
}

/*
 * Update the mnttab modification time and wake up any waiters for
 * mnttab changes
 */
void
vfs_mnttab_modtimeupd()
{
    hrtime_t oldhrt, newhrt;

    ASSERT(RW_WRITE_HELD(&vfslist));
    oldhrt = ts2hrt(&vfs_mnttab_mtime);
    gethrestime(&vfs_mnttab_mtime);
    newhrt = ts2hrt(&vfs_mnttab_mtime);
    if (oldhrt == (hrtime_t)0)
        vfs_mnttab_ctime = vfs_mnttab_mtime;
    /*
     * Attempt to provide unique mtime (like uniqtime but not).
     */
    if (newhrt == oldhrt) {
        newhrt++;
        hrt2ts(newhrt, &vfs_mnttab_mtime);
    }
    pollwakeup(&vfs_pollhd, (short)POLLRDBAND);
    vfs_mnttab_writeop();
}

int
dounmount(struct vfs *vfsp, int flag, cred_t *cr)
{
    vnode_t *coveredvp;
    int error;
    extern void teardown_vopstats(vfs_t *);

    /*
     * Get covered vnode. This will be NULL if the vfs is not linked
     * into the file system name space (i.e., domount() with MNT_NOSPICE).
     */
    coveredvp = vfsp->vfs_vnodecovered;
    ASSERT(coveredvp == NULL || vn_vfswlock_held(coveredvp));

    /*
     * Purge all dnlc entries for this vfs.
     */
    (void) dnlc_purge_vfsp(vfsp, 0);

    /* For forcible umount, skip VFS_SYNC() since it may hang */
    if ((flag & MS_FORCE) == 0)
        (void) VFS_SYNC(vfsp, 0, cr);

    /*
     * Lock the vfs to maintain fs status quo during unmount.  This
     * has to be done after the sync because ufs_update tries to acquire
     * the vfs_reflock.
     */
    vfs_lock_wait(vfsp);

    if (error = VFS_UNMOUNT(vfsp, flag, cr)) {
        vfs_unlock(vfsp);
        if (coveredvp != NULL)
            vn_vfsunlock(coveredvp);
    } else if (coveredvp != NULL) {
        teardown_vopstats(vfsp);
        /*
         * vfs_remove() will do a VN_RELE(vfsp->vfs_vnodecovered)
         * when it frees vfsp so we do a VN_HOLD() so we can
         * continue to use coveredvp afterwards.
         */
        VN_HOLD(coveredvp);
        vfs_remove(vfsp);
        vn_vfsunlock(coveredvp);
        VN_RELE(coveredvp);
    } else {
        teardown_vopstats(vfsp);
        /*
         * Release the reference to vfs that is not linked
         * into the name space.
         */
        vfs_unlock(vfsp);
        VFS_RELE(vfsp);
    }
    return (error);
}


/*
 * Vfs_unmountall() is called by uadmin() to unmount all
 * mounted file systems (except the root file system) during shutdown.
 * It follows the existing locking protocol when traversing the vfs list
 * to sync and unmount vfses. Even though there should be no
 * other thread running while the system is shutting down, it is prudent
 * to still follow the locking protocol.
 */
void
vfs_unmountall(void)
{
    struct vfs *vfsp;
    struct vfs *prev_vfsp = NULL;
    int error;

    /*
     * Toss all dnlc entries now so that the per-vfs sync
     * and unmount operations don't have to slog through
     * a bunch of uninteresting vnodes over and over again.
     */
    dnlc_purge();

    vfs_list_lock();
    for (vfsp = rootvfs->vfs_prev; vfsp != rootvfs; vfsp = prev_vfsp) {
        prev_vfsp = vfsp->vfs_prev;

        if (vfs_lock(vfsp) != 0)
            continue;
        error = vn_vfswlock(vfsp->vfs_vnodecovered);
        vfs_unlock(vfsp);
        if (error)
            continue;

        vfs_list_unlock();

        (void) VFS_SYNC(vfsp, SYNC_CLOSE, CRED());
        (void) dounmount(vfsp, 0, CRED());

        /*
         * Since we dropped the vfslist lock above we must
         * verify that next_vfsp still exists, else start over.
         */
        vfs_list_lock();
        for (vfsp = rootvfs->vfs_prev;
            vfsp != rootvfs; vfsp = vfsp->vfs_prev)
            if (vfsp == prev_vfsp)
                break;
        if (vfsp == rootvfs && prev_vfsp != rootvfs)
            prev_vfsp = rootvfs->vfs_prev;
    }
    vfs_list_unlock();
}

/*
 * Called to add an entry to the end of the vfs mount in progress list
 */
void
vfs_addmip(dev_t dev, struct vfs *vfsp)
{
    struct ipmnt *mipp;

    mipp = (struct ipmnt *)kmem_alloc(sizeof (struct ipmnt), KM_SLEEP);
    mipp->mip_next = NULL;
    mipp->mip_dev = dev;
    mipp->mip_vfsp = vfsp;
    mutex_enter(&vfs_miplist_mutex);
    if (vfs_miplist_end != NULL)
        vfs_miplist_end->mip_next = mipp;
    else
        vfs_miplist = mipp;
    vfs_miplist_end = mipp;
    mutex_exit(&vfs_miplist_mutex);
}

/*
 * Called to remove an entry from the mount in progress list
 * Either because the mount completed or it failed.
 */
void
vfs_delmip(struct vfs *vfsp)
{
    struct ipmnt *mipp, *mipprev;

    mutex_enter(&vfs_miplist_mutex);
    mipprev = NULL;
    for (mipp = vfs_miplist;
        mipp && mipp->mip_vfsp != vfsp; mipp = mipp->mip_next) {
        mipprev = mipp;
    }
    if (mipp == NULL)
        return; /* shouldn't happen */
    if (mipp == vfs_miplist_end)
        vfs_miplist_end = mipprev;
    if (mipprev == NULL)
        vfs_miplist = mipp->mip_next;
    else
        mipprev->mip_next = mipp->mip_next;
    mutex_exit(&vfs_miplist_mutex);
    kmem_free(mipp, sizeof (struct ipmnt));
}

/*
 * vfs_add is called by a specific filesystem's mount routine to add
 * the new vfs into the vfs list/hash and to cover the mounted-on vnode.
 * The vfs should already have been locked by the caller.
 *
 * coveredvp is NULL if this is the root.
 */
void
vfs_add(vnode_t *coveredvp, struct vfs *vfsp, int mflag)
{
    int newflag;

    ASSERT(vfs_lock_held(vfsp));
    VFS_HOLD(vfsp);
    newflag = vfsp->vfs_flag;
    if (mflag & MS_RDONLY)
        newflag |= VFS_RDONLY;
    else
        newflag &= ~VFS_RDONLY;
    if (mflag & MS_NOSUID)
        newflag |= (VFS_NOSETUID|VFS_NODEVICES);
    else
        newflag &= ~(VFS_NOSETUID|VFS_NODEVICES);
    if (mflag & MS_NOMNTTAB)
        newflag |= VFS_NOMNTTAB;
    else
        newflag &= ~VFS_NOMNTTAB;

    if (coveredvp != NULL) {
        ASSERT(vn_vfswlock_held(coveredvp));
        coveredvp->v_vfsmountedhere = vfsp;
        VN_HOLD(coveredvp);
    }
    vfsp->vfs_vnodecovered = coveredvp;
    vfsp->vfs_flag = newflag;

    vfs_list_add(vfsp);
}

/*
 * Remove a vfs from the vfs list, null out the pointer from the
 * covered vnode to the vfs (v_vfsmountedhere), and null out the pointer
 * from the vfs to the covered vnode (vfs_vnodecovered). Release the
 * reference to the vfs and to the covered vnode.
 *
 * Called from dounmount after it's confirmed with the file system
 * that the unmount is legal.
 */
void
vfs_remove(struct vfs *vfsp)
{
    vnode_t *vp;

    ASSERT(vfs_lock_held(vfsp));

    /*
     * Can't unmount root.  Should never happen because fs will
     * be busy.
     */
    if (vfsp == rootvfs)
        panic("vfs_remove: unmounting root");

    vfs_list_remove(vfsp);

    /*
     * Unhook from the file system name space.
     */
    vp = vfsp->vfs_vnodecovered;
    ASSERT(vn_vfswlock_held(vp));
    vp->v_vfsmountedhere = NULL;
    vfsp->vfs_vnodecovered = NULL;
    VN_RELE(vp);

    /*
     * Release lock and wakeup anybody waiting.
     */
    vfs_unlock(vfsp);
    VFS_RELE(vfsp);
}

/*
 * Lock a filesystem to prevent access to it while mounting,
 * unmounting and syncing.  Return EBUSY immediately if lock
 * can't be acquired.
 */
int
vfs_lock(vfs_t *vfsp)
{
    vn_vfslocks_entry_t *vpvfsentry;

    vpvfsentry = vn_vfslocks_getlock(vfsp);
    if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
        return (0);

    vn_vfslocks_rele(vpvfsentry);
    return (EBUSY);
}

int
vfs_rlock(vfs_t *vfsp)
{
    vn_vfslocks_entry_t *vpvfsentry;

    vpvfsentry = vn_vfslocks_getlock(vfsp);

    if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
        return (0);

    vn_vfslocks_rele(vpvfsentry);
    return (EBUSY);
}

void
vfs_lock_wait(vfs_t *vfsp)
{
    vn_vfslocks_entry_t *vpvfsentry;

    vpvfsentry = vn_vfslocks_getlock(vfsp);
    rwst_enter(&vpvfsentry->ve_lock, RW_WRITER);
}

void
vfs_rlock_wait(vfs_t *vfsp)
{
    vn_vfslocks_entry_t *vpvfsentry;

    vpvfsentry = vn_vfslocks_getlock(vfsp);
    rwst_enter(&vpvfsentry->ve_lock, RW_READER);
}

/*
 * Unlock a locked filesystem.
 */
void
vfs_unlock(vfs_t *vfsp)
{
    vn_vfslocks_entry_t *vpvfsentry;

    /*
     * vfs_unlock will mimic sema_v behaviour to fix 4748018.
     * And these changes should remain for the patch changes as it is.
     */
    if (panicstr)
        return;

    /*
     * ve_refcount needs to be dropped twice here.
     * 1. To release refernce after a call to vfs_locks_getlock()
     * 2. To release the reference from the locking routines like
     *    vfs_rlock_wait/vfs_wlock_wait/vfs_wlock etc,.
     */

    vpvfsentry = vn_vfslocks_getlock(vfsp);
    vn_vfslocks_rele(vpvfsentry);

    rwst_exit(&vpvfsentry->ve_lock);
    vn_vfslocks_rele(vpvfsentry);
}

/*
 * Utility routine that allows a filesystem to construct its
 * fsid in "the usual way" - by munging some underlying dev_t and
 * the filesystem type number into the 64-bit fsid.  Note that
 * this implicitly relies on dev_t persistence to make filesystem
 * id's persistent.
 *
 * There's nothing to prevent an individual fs from constructing its
 * fsid in a different way, and indeed they should.
 *
 * Since we want fsids to be 32-bit quantities (so that they can be
 * exported identically by either 32-bit or 64-bit APIs, as well as
 * the fact that fsid's are "known" to NFS), we compress the device
 * number given down to 32-bits, and panic if that isn't possible.
 */
void
vfs_make_fsid(fsid_t *fsi, dev_t dev, int val)
{
    if (!cmpldev((dev32_t *)&fsi->val[0], dev))
        panic("device number too big for fsid!");
    fsi->val[1] = val;
}

int
vfs_lock_held(vfs_t *vfsp)
{
    int held;
    vn_vfslocks_entry_t *vpvfsentry;

    /*
     * vfs_lock_held will mimic sema_held behaviour
     * if panicstr is set. And these changes should remain
     * for the patch changes as it is.
     */
    if (panicstr)
        return (1);

    vpvfsentry = vn_vfslocks_getlock(vfsp);
    held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);

    vn_vfslocks_rele(vpvfsentry);
    return (held);
}

struct _kthread *
vfs_lock_owner(vfs_t *vfsp)
{
    struct _kthread *owner;
    vn_vfslocks_entry_t *vpvfsentry;

    /*
     * vfs_wlock_held will mimic sema_held behaviour
     * if panicstr is set. And these changes should remain
     * for the patch changes as it is.
     */
    if (panicstr)
        return (NULL);

    vpvfsentry = vn_vfslocks_getlock(vfsp);
    owner = rwst_owner(&vpvfsentry->ve_lock);

    vn_vfslocks_rele(vpvfsentry);
    return (owner);
}

/*
 * vfs list locking.
 *
 * Rather than manipulate the vfslist lock directly, we abstract into lock
 * and unlock routines to allow the locking implementation to be changed for
 * clustering.
 *
 * Whenever the vfs list is modified through its hash links, the overall list
 * lock must be obtained before locking the relevant hash bucket.  But to see
 * whether a given vfs is on the list, it suffices to obtain the lock for the
 * hash bucket without getting the overall list lock.  (See getvfs() below.)
 */

void
vfs_list_lock()
{
    rw_enter(&vfslist, RW_WRITER);
}

void
vfs_list_read_lock()
{
    rw_enter(&vfslist, RW_READER);
}

void
vfs_list_unlock()
{
    rw_exit(&vfslist);
}

/*
 * Low level worker routines for adding entries to and removing entries from
 * the vfs list.
 */

static void
vfs_hash_add(struct vfs *vfsp, int insert_at_head)
{
    int vhno;
    struct vfs **hp;
    dev_t dev;

    ASSERT(RW_WRITE_HELD(&vfslist));

    dev = expldev(vfsp->vfs_fsid.val[0]);
    vhno = VFSHASH(getmajor(dev), getminor(dev));

    mutex_enter(&rvfs_list[vhno].rvfs_lock);

    /*
     * Link into the hash table, inserting it at the end, so that LOFS
     * with the same fsid as UFS (or other) file systems will not hide the
     * UFS.
     */
    if (insert_at_head) {
        vfsp->vfs_hash = rvfs_list[vhno].rvfs_head;
        rvfs_list[vhno].rvfs_head = vfsp;
    } else {
        for (hp = &rvfs_list[vhno].rvfs_head; *hp != NULL;
            hp = &(*hp)->vfs_hash)
            continue;
        /*
         * hp now contains the address of the pointer to update
         * to effect the insertion.
         */
        vfsp->vfs_hash = NULL;
        *hp = vfsp;
    }

    rvfs_list[vhno].rvfs_len++;
    mutex_exit(&rvfs_list[vhno].rvfs_lock);
}


static void
vfs_hash_remove(struct vfs *vfsp)
{
    int vhno;
    struct vfs *tvfsp;
    dev_t dev;

    ASSERT(RW_WRITE_HELD(&vfslist));

    dev = expldev(vfsp->vfs_fsid.val[0]);
    vhno = VFSHASH(getmajor(dev), getminor(dev));

    mutex_enter(&rvfs_list[vhno].rvfs_lock);

    /*
     * Remove from hash.
     */
    if (rvfs_list[vhno].rvfs_head == vfsp) {
        rvfs_list[vhno].rvfs_head = vfsp->vfs_hash;
        rvfs_list[vhno].rvfs_len--;
        goto foundit;
    }
    for (tvfsp = rvfs_list[vhno].rvfs_head; tvfsp != NULL;
        tvfsp = tvfsp->vfs_hash) {
        if (tvfsp->vfs_hash == vfsp) {
            tvfsp->vfs_hash = vfsp->vfs_hash;
            rvfs_list[vhno].rvfs_len--;
            goto foundit;
        }
    }
    cmn_err(CE_WARN, "vfs_list_remove: vfs not found in hash");

foundit:

    mutex_exit(&rvfs_list[vhno].rvfs_lock);
}


void
vfs_list_add(struct vfs *vfsp)
{
    zone_t *zone;

    /*
     * The zone that owns the mount is the one that performed the mount.
     * Note that this isn't necessarily the same as the zone mounted into.
     * The corresponding zone_rele() will be done when the vfs_t is
     * being free'd.
     */
    vfsp->vfs_zone = curproc->p_zone;
    zone_hold(vfsp->vfs_zone);

    /*
     * Find the zone mounted into, and put this mount on its vfs list.
     */
    zone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
    ASSERT(zone != NULL);
    /*
     * Special casing for the root vfs.  This structure is allocated
     * statically and hooked onto rootvfs at link time.  During the
     * vfs_mountroot call at system startup time, the root file system's
     * VFS_MOUNTROOT routine will call vfs_add with this root vfs struct
     * as argument.  The code below must detect and handle this special
     * case.  The only apparent justification for this special casing is
     * to ensure that the root file system appears at the head of the
     * list.
     *
     * XXX: I'm assuming that it's ok to do normal list locking when
     *  adding the entry for the root file system (this used to be
     *  done with no locks held).
     */
    vfs_list_lock();
    /*
     * Link into the vfs list proper.
     */
    if (vfsp == &root) {
        /*
         * Assert: This vfs is already on the list as its first entry.
         * Thus, there's nothing to do.
         */
        ASSERT(rootvfs == vfsp);
        /*
         * Add it to the head of the global zone's vfslist.
         */
        ASSERT(zone == global_zone);
        ASSERT(zone->zone_vfslist == NULL);
        zone->zone_vfslist = vfsp;
    } else {
        /*
         * Link to end of list using vfs_prev (as rootvfs is now a
         * doubly linked circular list) so list is in mount order for
         * mnttab use.
         */
        rootvfs->vfs_prev->vfs_next = vfsp;
        vfsp->vfs_prev = rootvfs->vfs_prev;
        rootvfs->vfs_prev = vfsp;
        vfsp->vfs_next = rootvfs;

        /*
         * Do it again for the zone-private list (which may be NULL).
         */
        if (zone->zone_vfslist == NULL) {
            ASSERT(zone != global_zone);
            zone->zone_vfslist = vfsp;
        } else {
            zone->zone_vfslist->vfs_zone_prev->vfs_zone_next = vfsp;
            vfsp->vfs_zone_prev = zone->zone_vfslist->vfs_zone_prev;
            zone->zone_vfslist->vfs_zone_prev = vfsp;
            vfsp->vfs_zone_next = zone->zone_vfslist;
        }
    }

    /*
     * Link into the hash table, inserting it at the end, so that LOFS
     * with the same fsid as UFS (or other) file systems will not hide
     * the UFS.
     */
    vfs_hash_add(vfsp, 0);

    /*
     * update the mnttab modification time
     */
    vfs_mnttab_modtimeupd();
    vfs_list_unlock();
    zone_rele(zone);
}

void
vfs_list_remove(struct vfs *vfsp)
{
    zone_t *zone;

    zone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
    ASSERT(zone != NULL);
    /*
     * Callers are responsible for preventing attempts to unmount the
     * root.
     */
    ASSERT(vfsp != rootvfs);

    vfs_list_lock();

    /*
     * Remove from hash.
     */
    vfs_hash_remove(vfsp);

    /*
     * Remove from vfs list.
     */
    vfsp->vfs_prev->vfs_next = vfsp->vfs_next;
    vfsp->vfs_next->vfs_prev = vfsp->vfs_prev;
    vfsp->vfs_next = vfsp->vfs_prev = NULL;

    /*
     * Remove from zone-specific vfs list.
     */
    if (zone->zone_vfslist == vfsp)
        zone->zone_vfslist = vfsp->vfs_zone_next;

    if (vfsp->vfs_zone_next == vfsp) {
        ASSERT(vfsp->vfs_zone_prev == vfsp);
        ASSERT(zone->zone_vfslist == vfsp);
        zone->zone_vfslist = NULL;
    }

    vfsp->vfs_zone_prev->vfs_zone_next = vfsp->vfs_zone_next;
    vfsp->vfs_zone_next->vfs_zone_prev = vfsp->vfs_zone_prev;
    vfsp->vfs_zone_next = vfsp->vfs_zone_prev = NULL;

    /*
     * update the mnttab modification time
     */
    vfs_mnttab_modtimeupd();
    vfs_list_unlock();
    zone_rele(zone);
}

struct vfs *
getvfs(fsid_t *fsid)
{
    struct vfs *vfsp;
    int val0 = fsid->val[0];
    int val1 = fsid->val[1];
    dev_t dev = expldev(val0);
    int vhno = VFSHASH(getmajor(dev), getminor(dev));
    kmutex_t *hmp = &rvfs_list[vhno].rvfs_lock;

    mutex_enter(hmp);
    for (vfsp = rvfs_list[vhno].rvfs_head; vfsp; vfsp = vfsp->vfs_hash) {
        if (vfsp->vfs_fsid.val[0] == val0 &&
            vfsp->vfs_fsid.val[1] == val1) {
            VFS_HOLD(vfsp);
            mutex_exit(hmp);
            return (vfsp);
        }
    }
    mutex_exit(hmp);
    return (NULL);
}

/*
 * Search the vfs mount in progress list for a specified device/vfs entry.
 * Returns 0 if the first entry in the list that the device matches has the
 * given vfs pointer as well.  If the device matches but a different vfs
 * pointer is encountered in the list before the given vfs pointer then
 * a 1 is returned.
 */

int
vfs_devmounting(dev_t dev, struct vfs *vfsp)
{
    int retval = 0;
    struct ipmnt *mipp;

    mutex_enter(&vfs_miplist_mutex);
    for (mipp = vfs_miplist; mipp != NULL; mipp = mipp->mip_next) {
        if (mipp->mip_dev == dev) {
            if (mipp->mip_vfsp != vfsp)
                retval = 1;
            break;
        }
    }
    mutex_exit(&vfs_miplist_mutex);
    return (retval);
}

/*
 * Search the vfs list for a specified device.  Returns 1, if entry is found
 * or 0 if no suitable entry is found.
 */

int
vfs_devismounted(dev_t dev)
{
    struct vfs *vfsp;
    int found;

    vfs_list_read_lock();
    vfsp = rootvfs;
    found = 0;
    do {
        if (vfsp->vfs_dev == dev) {
            found = 1;
            break;
        }
        vfsp = vfsp->vfs_next;
    } while (vfsp != rootvfs);

    vfs_list_unlock();
    return (found);
}

/*
 * Search the vfs list for a specified device.  Returns a pointer to it
 * or NULL if no suitable entry is found. The caller of this routine
 * is responsible for releasing the returned vfs pointer.
 */
struct vfs *
vfs_dev2vfsp(dev_t dev)
{
    struct vfs *vfsp;
    int found;

    vfs_list_read_lock();
    vfsp = rootvfs;
    found = 0;
    do {
        /*
         * The following could be made more efficient by making
         * the entire loop use vfs_zone_next if the call is from
         * a zone.  The only callers, however, ustat(2) and
         * umount2(2), don't seem to justify the added
         * complexity at present.
         */
        if (vfsp->vfs_dev == dev &&
            ZONE_PATH_VISIBLE(refstr_value(vfsp->vfs_mntpt),
            curproc->p_zone)) {
            VFS_HOLD(vfsp);
            found = 1;
            break;
        }
        vfsp = vfsp->vfs_next;
    } while (vfsp != rootvfs);
    vfs_list_unlock();
    return (found ? vfsp: NULL);
}

/*
 * Search the vfs list for a specified mntpoint.  Returns a pointer to it
 * or NULL if no suitable entry is found. The caller of this routine
 * is responsible for releasing the returned vfs pointer.
 *
 * Note that if multiple mntpoints match, the last one matching is
 * returned in an attempt to return the "top" mount when overlay
 * mounts are covering the same mount point.  This is accomplished by starting
 * at the end of the list and working our way backwards, stopping at the first
 * matching mount.
 */
struct vfs *
vfs_mntpoint2vfsp(const char *mp)
{
    struct vfs *vfsp;
    struct vfs *retvfsp = NULL;
    zone_t *zone = curproc->p_zone;
    struct vfs *list;

    vfs_list_read_lock();
    if (getzoneid() == GLOBAL_ZONEID) {
        /*
         * The global zone may see filesystems in any zone.
         */
        vfsp = rootvfs->vfs_prev;
        do {
            if (strcmp(refstr_value(vfsp->vfs_mntpt), mp) == 0) {
                retvfsp = vfsp;
                break;
            }
            vfsp = vfsp->vfs_prev;
        } while (vfsp != rootvfs->vfs_prev);
    } else if ((list = zone->zone_vfslist) != NULL) {
        const char *mntpt;

        vfsp = list->vfs_zone_prev;
        do {
            mntpt = refstr_value(vfsp->vfs_mntpt);
            mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
            if (strcmp(mntpt, mp) == 0) {
                retvfsp = vfsp;
                break;
            }
            vfsp = vfsp->vfs_zone_prev;
        } while (vfsp != list->vfs_zone_prev);
    }
    if (retvfsp)
        VFS_HOLD(retvfsp);
    vfs_list_unlock();
    return (retvfsp);
}

/*
 * Search the vfs list for a specified vfsops.
 * if vfs entry is found then return 1, else 0.
 */
int
vfs_opsinuse(vfsops_t *ops)
{
    struct vfs *vfsp;
    int found;

    vfs_list_read_lock();
    vfsp = rootvfs;
    found = 0;
    do {
        if (vfs_getops(vfsp) == ops) {
            found = 1;
            break;
        }
        vfsp = vfsp->vfs_next;
    } while (vfsp != rootvfs);
    vfs_list_unlock();
    return (found);
}

/*
 * Allocate an entry in vfssw for a file system type
 */
struct vfssw *
allocate_vfssw(const char *type)
{
    struct vfssw *vswp;

    if (type[0] == '\0' || strlen(type) + 1 > _ST_FSTYPSZ) {
        /*
         * The vfssw table uses the empty string to identify an
         * available entry; we cannot add any type which has
         * a leading NUL. The string length is limited to
         * the size of the st_fstype array in struct stat.
         */
        return (NULL);
    }

    ASSERT(VFSSW_WRITE_LOCKED());
    for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++)
        if (!ALLOCATED_VFSSW(vswp)) {
            vswp->vsw_name = kmem_alloc(strlen(type) + 1, KM_SLEEP);
            (void) strcpy(vswp->vsw_name, type);
            ASSERT(vswp->vsw_count == 0);
            vswp->vsw_count = 1;
            mutex_init(&vswp->vsw_lock, NULL, MUTEX_DEFAULT, NULL);
            return (vswp);
        }
    return (NULL);
}

/*
 * Impose additional layer of translation between vfstype names
 * and module names in the filesystem.
 */
static const char *
vfs_to_modname(const char *vfstype)
{
    if (strcmp(vfstype, "proc") == 0) {
        vfstype = "procfs";
    } else if (strcmp(vfstype, "fd") == 0) {
        vfstype = "fdfs";
    } else if (strncmp(vfstype, "nfs", 3) == 0) {
        vfstype = "nfs";
    }

    return (vfstype);
}

/*
 * Find a vfssw entry given a file system type name.
 * Try to autoload the filesystem if it's not found.
 * If it's installed, return the vfssw locked to prevent unloading.
 */
struct vfssw *
vfs_getvfssw(const char *type)
{
    struct vfssw *vswp;
    const char *modname;

    RLOCK_VFSSW();
    vswp = vfs_getvfsswbyname(type);
    modname = vfs_to_modname(type);

    if (rootdir == NULL) {
        /*
         * If we haven't yet loaded the root file system, then our
         * _init won't be called until later. Allocate vfssw entry,
         * because mod_installfs won't be called.
         */
        if (vswp == NULL) {
            RUNLOCK_VFSSW();
            WLOCK_VFSSW();
            if ((vswp = vfs_getvfsswbyname(type)) == NULL) {
                if ((vswp = allocate_vfssw(type)) == NULL) {
                    WUNLOCK_VFSSW();
                    return (NULL);
                }
            }
            WUNLOCK_VFSSW();
            RLOCK_VFSSW();
        }
        if (!VFS_INSTALLED(vswp)) {
            RUNLOCK_VFSSW();
            (void) modloadonly("fs", modname);
        } else
            RUNLOCK_VFSSW();
        return (vswp);
    }

    /*
     * Try to load the filesystem.  Before calling modload(), we drop
     * our lock on the VFS switch table, and pick it up after the
     * module is loaded.  However, there is a potential race:  the
     * module could be unloaded after the call to modload() completes
     * but before we pick up the lock and drive on.  Therefore,
     * we keep reloading the module until we've loaded the module
     * _and_ we have the lock on the VFS switch table.
     */
    while (vswp == NULL || !VFS_INSTALLED(vswp)) {
        RUNLOCK_VFSSW();
        if (modload("fs", modname) == -1)
            return (NULL);
        RLOCK_VFSSW();
        if (vswp == NULL)
            if ((vswp = vfs_getvfsswbyname(type)) == NULL)
                break;
    }
    RUNLOCK_VFSSW();

    return (vswp);
}

/*
 * Find a vfssw entry given a file system type name.
 */
struct vfssw *
vfs_getvfsswbyname(const char *type)
{
    struct vfssw *vswp;

    ASSERT(VFSSW_LOCKED());
    if (type == NULL || *type == '\0')
        return (NULL);

    for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
        if (strcmp(type, vswp->vsw_name) == 0) {
            vfs_refvfssw(vswp);
            return (vswp);
        }
    }

    return (NULL);
}

/*
 * Find a vfssw entry given a set of vfsops.
 */
struct vfssw *
vfs_getvfsswbyvfsops(vfsops_t *vfsops)
{
    struct vfssw *vswp;

    RLOCK_VFSSW();
    for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
        if (ALLOCATED_VFSSW(vswp) && &vswp->vsw_vfsops == vfsops) {
            vfs_refvfssw(vswp);
            RUNLOCK_VFSSW();
            return (vswp);
        }
    }
    RUNLOCK_VFSSW();

    return (NULL);
}

/*
 * Reference a vfssw entry.
 */
void
vfs_refvfssw(struct vfssw *vswp)
{

    mutex_enter(&vswp->vsw_lock);
    vswp->vsw_count++;
    mutex_exit(&vswp->vsw_lock);
}

/*
 * Unreference a vfssw entry.
 */
void
vfs_unrefvfssw(struct vfssw *vswp)
{

    mutex_enter(&vswp->vsw_lock);
    vswp->vsw_count--;
    mutex_exit(&vswp->vsw_lock);
}

int sync_timeout = 30;      /* timeout for syncing a page during panic */
int sync_timeleft;      /* portion of sync_timeout remaining */

static int sync_retries = 20;   /* number of retries when not making progress */
static int sync_triesleft;  /* portion of sync_retries remaining */

static pgcnt_t old_pgcnt, new_pgcnt;
static int new_bufcnt, old_bufcnt;

/*
 * Sync all of the mounted filesystems, and then wait for the actual i/o to
 * complete.  We wait by counting the number of dirty pages and buffers,
 * pushing them out using bio_busy() and page_busy(), and then counting again.
 * This routine is used during both the uadmin A_SHUTDOWN code as well as
 * the SYNC phase of the panic code (see comments in panic.c).  It should only
 * be used after some higher-level mechanism has quiesced the system so that
 * new writes are not being initiated while we are waiting for completion.
 *
 * To ensure finite running time, our algorithm uses two timeout mechanisms:
 * sync_timeleft (a timer implemented by the omnipresent deadman() cyclic), and
 * sync_triesleft (a progress counter used by the vfs_syncall() loop below).
 * Together these ensure that syncing completes if our i/o paths are stuck.
 * The counters are declared above so they can be found easily in the debugger.
 *
 * The sync_timeleft counter is reset by bio_busy() and page_busy() using the
 * vfs_syncprogress() subroutine whenever we make progress through the lists of
 * pages and buffers.  It is decremented and expired by the deadman() cyclic.
 * When vfs_syncall() decides it is done, we disable the deadman() counter by
 * setting sync_timeleft to zero.  This timer guards against vfs_syncall()
 * deadlocking or hanging inside of a broken filesystem or driver routine.
 *
 * The sync_triesleft counter is updated by vfs_syncall() itself.  If we make
 * sync_retries consecutive calls to bio_busy() and page_busy() without
 * decreasing either the number of dirty buffers or dirty pages below the
 * lowest count we have seen so far, we give up and return from vfs_syncall().
 *
 * Each loop iteration ends with a call to delay() one second to allow time for
 * i/o completion and to permit the user time to read our progress messages.
 */
void
vfs_syncall(void)
{
    if (rootdir == NULL && !modrootloaded)
        return; /* panic during boot - no filesystems yet */

    printf("syncing file systems...");
    vfs_syncprogress();
    sync();

    vfs_syncprogress();
    sync_triesleft = sync_retries;

    old_bufcnt = new_bufcnt = INT_MAX;
    old_pgcnt = new_pgcnt = ULONG_MAX;

    while (sync_triesleft > 0) {
        old_bufcnt = MIN(old_bufcnt, new_bufcnt);
        old_pgcnt = MIN(old_pgcnt, new_pgcnt);

        new_bufcnt = bio_busy(B_TRUE);
        new_pgcnt = page_busy(B_TRUE);
        vfs_syncprogress();

        if (new_bufcnt == 0 && new_pgcnt == 0)
            break;

        if (new_bufcnt < old_bufcnt || new_pgcnt < old_pgcnt)
            sync_triesleft = sync_retries;
        else
            sync_triesleft--;

        if (new_bufcnt)
            printf(" [%d]", new_bufcnt);
        if (new_pgcnt)
            printf(" %lu", new_pgcnt);

        delay(hz);
    }

    if (new_bufcnt != 0 || new_pgcnt != 0)
        printf(" done (not all i/o completed)\n");
    else
        printf(" done\n");

    sync_timeleft = 0;
    delay(hz);
}

/*
 * If we are in the middle of the sync phase of panic, reset sync_timeleft to
 * sync_timeout to indicate that we are making progress and the deadman()
 * omnipresent cyclic should not yet time us out.  Note that it is safe to
 * store to sync_timeleft here since the deadman() is firing at high-level
 * on top of us.  If we are racing with the deadman(), either the deadman()
 * will decrement the old value and then we will reset it, or we will
 * reset it and then the deadman() will immediately decrement it.  In either
 * case, correct behavior results.
 */
void
vfs_syncprogress(void)
{
    if (panicstr)
        sync_timeleft = sync_timeout;
}

/*
 * Map VFS flags to statvfs flags.  These shouldn't really be separate
 * flags at all.
 */
uint_t
vf_to_stf(uint_t vf)
{
    uint_t stf = 0;

    if (vf & VFS_RDONLY)
        stf |= ST_RDONLY;
    if (vf & VFS_NOSETUID)
        stf |= ST_NOSUID;
    if (vf & VFS_NOTRUNC)
        stf |= ST_NOTRUNC;

    return (stf);
}

/*
 * Entries for (illegal) fstype 0.
 */
/* ARGSUSED */
int
vfsstray_sync(struct vfs *vfsp, short arg, struct cred *cr)
{
    cmn_err(CE_PANIC, "stray vfs operation");
    return (0);
}

/*
 * Entries for (illegal) fstype 0.
 */
int
vfsstray(void)
{
    cmn_err(CE_PANIC, "stray vfs operation");
    return (0);
}

/*
 * Support for dealing with forced UFS unmount and its interaction with
 * LOFS. Could be used by any filesystem.
 * See bug 1203132.
 */
int
vfs_EIO(void)
{
    return (EIO);
}

/*
 * We've gotta define the op for sync separately, since the compiler gets
 * confused if we mix and match ANSI and normal style prototypes when
 * a "short" argument is present and spits out a warning.
 */
/*ARGSUSED*/
int
vfs_EIO_sync(struct vfs *vfsp, short arg, struct cred *cr)
{
    return (EIO);
}

vfs_t EIO_vfs;
vfsops_t *EIO_vfsops;

/*
 * Called from startup() to initialize all loaded vfs's
 */
void
vfsinit(void)
{
    struct vfssw *vswp;
    int error;
    extern int vopstats_enabled;
    extern void vopstats_startup();

    static const fs_operation_def_t EIO_vfsops_template[] = {
        VFSNAME_MOUNT,      { .error = vfs_EIO },
        VFSNAME_UNMOUNT,    { .error = vfs_EIO },
        VFSNAME_ROOT,       { .error = vfs_EIO },
        VFSNAME_STATVFS,    { .error = vfs_EIO },
        VFSNAME_SYNC,       { .vfs_sync = vfs_EIO_sync },
        VFSNAME_VGET,       { .error = vfs_EIO },
        VFSNAME_MOUNTROOT,  { .error = vfs_EIO },
        VFSNAME_FREEVFS,    { .error = vfs_EIO },
        VFSNAME_VNSTATE,    { .error = vfs_EIO },
        NULL, NULL
    };

    static const fs_operation_def_t stray_vfsops_template[] = {
        VFSNAME_MOUNT,      { .error = vfsstray },
        VFSNAME_UNMOUNT,    { .error = vfsstray },
        VFSNAME_ROOT,       { .error = vfsstray },
        VFSNAME_STATVFS,    { .error = vfsstray },
        VFSNAME_SYNC,       { .vfs_sync = vfsstray_sync },
        VFSNAME_VGET,       { .error = vfsstray },
        VFSNAME_MOUNTROOT,  { .error = vfsstray },
        VFSNAME_FREEVFS,    { .error = vfsstray },
        VFSNAME_VNSTATE,    { .error = vfsstray },
        NULL, NULL
    };

    /* Create vfs cache */
    vfs_cache = kmem_cache_create("vfs_cache", sizeof (struct vfs),
        sizeof (uintptr_t), NULL, NULL, NULL, NULL, NULL, 0);

    /* Initialize the vnode cache (file systems may use it during init). */
    vn_create_cache();

    /* Setup event monitor framework */
    fem_init();

    /* Initialize the dummy stray file system type. */
    error = vfs_setfsops(0, stray_vfsops_template, NULL);

    /* Initialize the dummy EIO file system. */
    error = vfs_makefsops(EIO_vfsops_template, &EIO_vfsops);
    if (error != 0) {
        cmn_err(CE_WARN, "vfsinit: bad EIO vfs ops template");
        /* Shouldn't happen, but not bad enough to panic */
    }

    VFS_INIT(&EIO_vfs, EIO_vfsops, (caddr_t)NULL);

    /*
     * Default EIO_vfs.vfs_flag to VFS_UNMOUNTED so a lookup
     * on this vfs can immediately notice it's invalid.
     */
    EIO_vfs.vfs_flag |= VFS_UNMOUNTED;

    /*
     * Call the init routines of non-loadable filesystems only.
     * Filesystems which are loaded as separate modules will be
     * initialized by the module loading code instead.
     */

    for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
        RLOCK_VFSSW();
        if (vswp->vsw_init != NULL)
            (*vswp->vsw_init)(vswp - vfssw, vswp->vsw_name);
        RUNLOCK_VFSSW();
    }

    vopstats_startup();

    if (vopstats_enabled) {
        /* EIO_vfs can collect stats, but we don't retrieve them */
        initialize_vopstats(&EIO_vfs.vfs_vopstats);
        EIO_vfs.vfs_fstypevsp = NULL;
        EIO_vfs.vfs_vskap = NULL;
        EIO_vfs.vfs_flag |= VFS_STATS;
    }

    xattr_init();
}

vfs_t *
vfs_alloc(int kmflag)
{
    vfs_t *vfsp;

    vfsp = kmem_cache_alloc(vfs_cache, kmflag);

    /*
     * Do the simplest initialization here.
     * Everything else gets done in vfs_init()
     */
    bzero(vfsp, sizeof (vfs_t));
    return (vfsp);
}

void
vfs_free(vfs_t *vfsp)
{
    /*
     * One would be tempted to assert that "vfsp->vfs_count == 0".
     * The problem is that this gets called out of domount() with
     * a partially initialized vfs and a vfs_count of 1.  This is
     * also called from vfs_rele() with a vfs_count of 0.  We can't
     * call VFS_RELE() from domount() if VFS_MOUNT() hasn't successfully
     * returned.  This is because VFS_MOUNT() fully initializes the
     * vfs structure and its associated data.  VFS_RELE() will call
     * VFS_FREEVFS() which may panic the system if the data structures
     * aren't fully initialized from a successful VFS_MOUNT()).
     */

    /* If FEM was in use, make sure everything gets cleaned up */
    if (vfsp->vfs_femhead) {
        ASSERT(vfsp->vfs_femhead->femh_list == NULL);
        mutex_destroy(&vfsp->vfs_femhead->femh_lock);
        kmem_free(vfsp->vfs_femhead, sizeof (*(vfsp->vfs_femhead)));
        vfsp->vfs_femhead = NULL;
    }

    if (vfsp->vfs_implp)
        vfsimpl_teardown(vfsp);
    sema_destroy(&vfsp->vfs_reflock);
    kmem_cache_free(vfs_cache, vfsp);
}

/*
 * Increments the vfs reference count by one atomically.
 */
void
vfs_hold(vfs_t *vfsp)
{
    atomic_add_32(&vfsp->vfs_count, 1);
    ASSERT(vfsp->vfs_count != 0);
}

/*
 * Decrements the vfs reference count by one atomically. When
 * vfs reference count becomes zero, it calls the file system
 * specific vfs_freevfs() to free up the resources.
 */
void
vfs_rele(vfs_t *vfsp)
{
    ASSERT(vfsp->vfs_count != 0);
    if (atomic_add_32_nv(&vfsp->vfs_count, -1) == 0) {
        VFS_FREEVFS(vfsp);
        lofi_remove(vfsp);
        if (vfsp->vfs_zone)
            zone_rele(vfsp->vfs_zone);
        vfs_freemnttab(vfsp);
        vfs_free(vfsp);
    }
}

/*
 * Generic operations vector support.
 *
 * This is used to build operations vectors for both the vfs and vnode.
 * It's normally called only when a file system is loaded.
 *
 * There are many possible algorithms for this, including the following:
 *
 *   (1) scan the list of known operations; for each, see if the file system
 *       includes an entry for it, and fill it in as appropriate.
 *
 *   (2) set up defaults for all known operations.  scan the list of ops
 *       supplied by the file system; for each which is both supplied and
 *       known, fill it in.
 *
 *   (3) sort the lists of known ops & supplied ops; scan the list, filling
 *       in entries as we go.
 *
 * we choose (1) for simplicity, and because performance isn't critical here.
 * note that (2) could be sped up using a precomputed hash table on known ops.
 * (3) could be faster than either, but only if the lists were very large or
 * supplied in sorted order.
 *
 */

int
fs_build_vector(void *vector, int *unused_ops,
    const fs_operation_trans_def_t *translation,
    const fs_operation_def_t *operations)
{
    int i, num_trans, num_ops, used;

    /*
     * Count the number of translations and the number of supplied
     * operations.
     */

    {
        const fs_operation_trans_def_t *p;

        for (num_trans = 0, p = translation;
            p->name != NULL;
            num_trans++, p++)
            ;
    }

    {
        const fs_operation_def_t *p;

        for (num_ops = 0, p = operations;
            p->name != NULL;
            num_ops++, p++)
            ;
    }

    /* Walk through each operation known to our caller.  There will be */
    /* one entry in the supplied "translation table" for each. */

    used = 0;

    for (i = 0; i < num_trans; i++) {
        int j, found;
        char *curname;
        fs_generic_func_p result;
        fs_generic_func_p *location;

        curname = translation[i].name;

        /* Look for a matching operation in the list supplied by the */
        /* file system. */

        found = 0;

        for (j = 0; j < num_ops; j++) {
            if (strcmp(operations[j].name, curname) == 0) {
                used++;
                found = 1;
                break;
            }
        }

        /*
         * If the file system is using a "placeholder" for default
         * or error functions, grab the appropriate function out of
         * the translation table.  If the file system didn't supply
         * this operation at all, use the default function.
         */

        if (found) {
            result = operations[j].func.fs_generic;
            if (result == fs_default) {
                result = translation[i].defaultFunc;
            } else if (result == fs_error) {
                result = translation[i].errorFunc;
            } else if (result == NULL) {
                /* Null values are PROHIBITED */
                return (EINVAL);
            }
        } else {
            result = translation[i].defaultFunc;
        }

        /* Now store the function into the operations vector. */

        location = (fs_generic_func_p *)
            (((char *)vector) + translation[i].offset);

        *location = result;
    }

    *unused_ops = num_ops - used;

    return (0);
}

/* Placeholder functions, should never be called. */

int
fs_error(void)
{
    cmn_err(CE_PANIC, "fs_error called");
    return (0);
}

int
fs_default(void)
{
    cmn_err(CE_PANIC, "fs_default called");
    return (0);
}

#ifdef __sparc

/*
 * Part of the implementation of booting off a mirrored root
 * involves a change of dev_t for the root device.  To
 * accomplish this, first remove the existing hash table
 * entry for the root device, convert to the new dev_t,
 * then re-insert in the hash table at the head of the list.
 */
void
vfs_root_redev(vfs_t *vfsp, dev_t ndev, int fstype)
{
    vfs_list_lock();

    vfs_hash_remove(vfsp);

    vfsp->vfs_dev = ndev;
    vfs_make_fsid(&vfsp->vfs_fsid, ndev, fstype);

    vfs_hash_add(vfsp, 1);

    vfs_list_unlock();
}

#else /* x86 NEWBOOT */

#if defined(__x86)
extern int hvmboot_rootconf();
#endif /* __x86 */

int
rootconf()
{
    int error;
    struct vfssw *vsw;
    extern void pm_init();
    char *fstyp, *fsmod;

    getrootfs(&fstyp, &fsmod);

#if defined(__x86)
    /*
     * hvmboot_rootconf() is defined in the hvm_bootstrap misc module,
     * which lives in /platform/i86hvm, and hence is only available when
     * booted in an x86 hvm environment.  If the hvm_bootstrap misc module
     * is not available then the modstub for this function will return 0.
     * If the hvm_bootstrap misc module is available it will be loaded
     * and hvmboot_rootconf() will be invoked.
     */
    if (error = hvmboot_rootconf())
        return (error);
#endif /* __x86 */

    if (error = clboot_rootconf())
        return (error);

    if (modload("fs", fsmod) == -1)
        panic("Cannot _init %s module", fsmod);

    RLOCK_VFSSW();
    vsw = vfs_getvfsswbyname(fstyp);
    RUNLOCK_VFSSW();
    if (vsw == NULL) {
        cmn_err(CE_CONT, "Cannot find %s filesystem\n", fstyp);
        return (ENXIO);
    }
    VFS_INIT(rootvfs, &vsw->vsw_vfsops, 0);
    VFS_HOLD(rootvfs);

    /* always mount readonly first */
    rootvfs->vfs_flag |= VFS_RDONLY;

    pm_init();

    if (netboot)
        (void) strplumb();

    error = VFS_MOUNTROOT(rootvfs, ROOT_INIT);
    vfs_unrefvfssw(vsw);
    rootdev = rootvfs->vfs_dev;

    if (error)
        cmn_err(CE_CONT, "Cannot mount root on %s fstype %s\n",
            rootfs.bo_name, fstyp);
    else
        cmn_err(CE_CONT, "?root on %s fstype %s\n",
            rootfs.bo_name, fstyp);
    return (error);
}

/*
 * XXX this is called by nfs only and should probably be removed
 * If booted with ASKNAME, prompt on the console for a filesystem
 * name and return it.
 */
void
getfsname(char *askfor, char *name, size_t namelen)
{
    if (boothowto & RB_ASKNAME) {
        printf("%s name: ", askfor);
        console_gets(name, namelen);
    }
}

/*
 * Init the root filesystem type (rootfs.bo_fstype) from the "fstype"
 * property.
 *
 * Filesystem types starting with the prefix "nfs" are diskless clients;
 * init the root filename name (rootfs.bo_name), too.
 *
 * If we are booting via NFS we currently have these options:
 *  nfs -   dynamically choose NFS V2, V3, or V4 (default)
 *  nfs2 -  force NFS V2
 *  nfs3 -  force NFS V3
 *  nfs4 -  force NFS V4
 * Because we need to maintain backward compatibility with the naming
 * convention that the NFS V2 filesystem name is "nfs" (see vfs_conf.c)
 * we need to map "nfs" => "nfsdyn" and "nfs2" => "nfs".  The dynamic
 * nfs module will map the type back to either "nfs", "nfs3", or "nfs4".
 * This is only for root filesystems, all other uses such as cachefs
 * will expect that "nfs" == NFS V2.
 */
static void
getrootfs(char **fstypp, char **fsmodp)
{
    extern char *strplumb_get_netdev_path(void);
    char *propstr = NULL;

    /*
     * Check fstype property; for diskless it should be one of "nfs",
     * "nfs2", "nfs3" or "nfs4".
     */
    if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
        DDI_PROP_DONTPASS, "fstype", &propstr)
        == DDI_SUCCESS) {
        (void) strncpy(rootfs.bo_fstype, propstr, BO_MAXFSNAME);
        ddi_prop_free(propstr);

    /*
     * if the boot property 'fstype' is not set, but 'zfs-bootfs' is set,
     * assume the type of this root filesystem is 'zfs'.
     */
    } else if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
        DDI_PROP_DONTPASS, "zfs-bootfs", &propstr)
        == DDI_SUCCESS) {
        (void) strncpy(rootfs.bo_fstype, "zfs", BO_MAXFSNAME);
        ddi_prop_free(propstr);
    }

    if (strncmp(rootfs.bo_fstype, "nfs", 3) != 0) {
        *fstypp = *fsmodp = rootfs.bo_fstype;
        return;
    }

    ++netboot;

    if (strcmp(rootfs.bo_fstype, "nfs2") == 0)
        (void) strcpy(rootfs.bo_fstype, "nfs");
    else if (strcmp(rootfs.bo_fstype, "nfs") == 0)
        (void) strcpy(rootfs.bo_fstype, "nfsdyn");

    /*
     * check if path to network interface is specified in bootpath
     * or by a hypervisor domain configuration file.
     * XXPV - enable strlumb_get_netdev_path()
     */
    if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), DDI_PROP_DONTPASS,
        "xpv-nfsroot")) {
        (void) strcpy(rootfs.bo_name, "/xpvd/xnf@0");
    } else if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
        DDI_PROP_DONTPASS, "bootpath", &propstr)
        == DDI_SUCCESS) {
        (void) strncpy(rootfs.bo_name, propstr, BO_MAXOBJNAME);
        ddi_prop_free(propstr);
    } else {
        /* attempt to determine netdev_path via boot_mac address */
        netdev_path = strplumb_get_netdev_path();
        if (netdev_path == NULL)
            panic("cannot find boot network interface");
        (void) strncpy(rootfs.bo_name, netdev_path, BO_MAXOBJNAME);
    }
    *fstypp = rootfs.bo_fstype;
    *fsmodp = "nfs";
}
#endif

/*
 * VFS feature routines
 */

#define VFTINDEX(feature)   (((feature) >> 32) & 0xFFFFFFFF)
#define VFTBITS(feature)    ((feature) & 0xFFFFFFFFLL)

/* Register a feature in the vfs */
void
vfs_set_feature(vfs_t *vfsp, vfs_feature_t feature)
{
    /* Note that vfs_featureset[] is found in *vfsp->vfs_implp */
    if (vfsp->vfs_implp == NULL)
        return;

    vfsp->vfs_featureset[VFTINDEX(feature)] |= VFTBITS(feature);
}

/*
 * Query a vfs for a feature.
 * Returns 1 if feature is present, 0 if not
 */
int
vfs_has_feature(vfs_t *vfsp, vfs_feature_t feature)
{
    int ret = 0;

    /* Note that vfs_featureset[] is found in *vfsp->vfs_implp */
    if (vfsp->vfs_implp == NULL)
        return (ret);

    if (vfsp->vfs_featureset[VFTINDEX(feature)] & VFTBITS(feature))
        ret = 1;

    return (ret);
}

/*
 * Propagate feature set from one vfs to another
 */
void
vfs_propagate_features(vfs_t *from, vfs_t *to)
{
    int i;

    if (to->vfs_implp == NULL || from->vfs_implp == NULL)
        return;

    for (i = 1; i <= to->vfs_featureset[0]; i++) {
        to->vfs_featureset[i] = from->vfs_featureset[i];
    }
}

#define LOFICTL_PATH "/devices/pseudo/lofi@0:%d"

/*
 * Return the vnode for the lofi node if there's a lofi mount in place.
 * Returns -1 when there's no lofi node, 0 on success, and > 0 on
 * failure.
 */
int
vfs_get_lofi(vfs_t *vfsp, vnode_t **vpp)
{
    char *path = NULL;
    int strsize;
    int err;

    if (vfsp->vfs_lofi_minor == 0) {
        *vpp = NULL;
        return (-1);
    }

    strsize = snprintf(NULL, 0, LOFICTL_PATH, vfsp->vfs_lofi_minor);
    path = kmem_alloc(strsize + 1, KM_SLEEP);
    (void) snprintf(path, strsize + 1, LOFICTL_PATH, vfsp->vfs_lofi_minor);

    err = lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, vpp);

    if (err)
        *vpp = NULL;

    kmem_free(path, strsize + 1);
    return (err);
}