vfs.c revision 56c0b1dab35897a2c09f93b2a70746ba47df7523
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER START
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The contents of this file are subject to the terms of the
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * Common Development and Distribution License (the "License").
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * You may not use this file except in compliance with the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * or http://www.opensolaris.org/os/licensing.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * See the License for the specific language governing permissions
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and limitations under the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When distributing Covered Code, include this CDDL HEADER in each
fa9e4066f08beec538e775443c5be79dd423fcabahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If applicable, add the following below this CDDL HEADER, with the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * fields enclosed by brackets "[]" replaced with your own identifying
fa9e4066f08beec538e775443c5be79dd423fcabahrens * information: Portions Copyright [yyyy] [name of copyright owner]
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER END
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
b2634b9c57bbcfa01bb5dec2e196aec32957925fEric Taylor * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Use is subject to license terms.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* All Rights Reserved */
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * University Copyright- Copyright (c) 1982, 1986, 1988
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The Regents of the University of California
fa9e4066f08beec538e775443c5be79dd423fcabahrens * All Rights Reserved
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * University Acknowledgment- Portions of this document are derived from
fa9e4066f08beec538e775443c5be79dd423fcabahrens * software developed by the University of California, Berkeley, and its
fa9e4066f08beec538e775443c5be79dd423fcabahrens * contributors.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/types.h>
5aba80db367b061758a29154d304977d00d8e4f4ck#include <sys/t_lock.h>
5aba80db367b061758a29154d304977d00d8e4f4ck#include <sys/param.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/errno.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/user.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/fstyp.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/kmem.h>
91ebeef555ce7f899b6270a3c2df47b51f7ad59aahrens#include <sys/systm.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/proc.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/mount.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/vfs.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/vfs_opreg.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/fem.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/mntent.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/stat.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/statvfs.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/statfs.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/cred.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/vnode.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/rwstlock.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/dnlc.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/file.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/time.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/atomic.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/cmn_err.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/buf.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/swap.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/debug.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/vnode.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/modctl.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/ddi.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/pathname.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/bootconf.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/dumphdr.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/dc_ki.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/poll.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/sunddi.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/sysmacros.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/zone.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/policy.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/ctfs.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/objfs.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/console.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/reboot.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/attr.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/spa.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <sys/lofi.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <vm/page.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock#include <fs/fs_subr.h>
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/* Private interfaces to create vopstats-related data structures */
99653d4ee642c6528e88224f12409a5f23060994eschrockextern void initialize_vopstats(vopstats_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockextern vopstats_t *get_fstype_vopstats(struct vfs *, struct vfssw *);
99653d4ee642c6528e88224f12409a5f23060994eschrockextern vsk_anchor_t *get_vskstat_anchor(struct vfs *);
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_clearmntopt_nolock(mntopts_t *, const char *, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_setmntopt_nolock(mntopts_t *, const char *,
99653d4ee642c6528e88224f12409a5f23060994eschrock const char *, int, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic int vfs_optionisset_nolock(const mntopts_t *, const char *, char **);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_freemnttab(struct vfs *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_freeopt(mntopt_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_swapopttbl(mntopts_t *, mntopts_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_createopttbl_extend(mntopts_t *, const char *,
99653d4ee642c6528e88224f12409a5f23060994eschrock const mntopts_t *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic char **vfs_copycancelopt_extend(char **const, int);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_freecancelopt(char **);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void getrootfs(char **, char **);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic int getmacpath(dev_info_t *, void *);
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void vfs_mnttabvp_setup(void);
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockstruct ipmnt {
99653d4ee642c6528e88224f12409a5f23060994eschrock struct ipmnt *mip_next;
99653d4ee642c6528e88224f12409a5f23060994eschrock dev_t mip_dev;
99653d4ee642c6528e88224f12409a5f23060994eschrock struct vfs *mip_vfsp;
99653d4ee642c6528e88224f12409a5f23060994eschrock};
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic kmutex_t vfs_miplist_mutex;
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic struct ipmnt *vfs_miplist = NULL;
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic struct ipmnt *vfs_miplist_end = NULL;
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic kmem_cache_t *vfs_cache; /* Pointer to VFS kmem cache */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * VFS global data.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockvnode_t *rootdir; /* pointer to root inode vnode. */
99653d4ee642c6528e88224f12409a5f23060994eschrockvnode_t *devicesdir; /* pointer to inode of devices root */
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlvnode_t *devdir; /* pointer to inode of dev root */
99653d4ee642c6528e88224f12409a5f23060994eschrock
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlchar *server_rootpath; /* root path for diskless clients */
99653d4ee642c6528e88224f12409a5f23060994eschrockchar *server_hostname; /* hostname of diskless server */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic struct vfs root;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic struct vfs devices;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic struct vfs dev;
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksstruct vfs *rootvfs = &root; /* pointer to root vfs; head of VFS list. */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksrvfs_t *rvfs_list; /* array of vfs ptrs for vfs hash list */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksint vfshsz = 512; /* # of heads/locks in vfs hash arrays */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks /* must be power of 2! */
99653d4ee642c6528e88224f12409a5f23060994eschrocktimespec_t vfs_mnttab_ctime; /* mnttab created time */
99653d4ee642c6528e88224f12409a5f23060994eschrocktimespec_t vfs_mnttab_mtime; /* mnttab last modified time */
99653d4ee642c6528e88224f12409a5f23060994eschrockchar *vfs_dummyfstype = "\0";
99653d4ee642c6528e88224f12409a5f23060994eschrockstruct pollhead vfs_pollhd; /* for mnttab pollers */
99653d4ee642c6528e88224f12409a5f23060994eschrockstruct vnode *vfs_mntdummyvp; /* to fake mnttab read/write for file events */
99653d4ee642c6528e88224f12409a5f23060994eschrockint mntfstype; /* will be set once mnt fs is mounted */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Table for generic options recognized in the VFS layer and acted
99653d4ee642c6528e88224f12409a5f23060994eschrock * on at this level before parsing file system specific options.
99653d4ee642c6528e88224f12409a5f23060994eschrock * The nosuid option is stronger than any of the devices and setuid
99653d4ee642c6528e88224f12409a5f23060994eschrock * options, so those are canceled when nosuid is seen.
99653d4ee642c6528e88224f12409a5f23060994eschrock *
3bb79bece53191f2cf27aa61a72ea1784a7ce700eschrock * All options which are added here need to be added to the
3bb79bece53191f2cf27aa61a72ea1784a7ce700eschrock * list of standard options in usr/src/cmd/fs.d/fslib.c as well.
06eeb2ad640ce72d394ac521094bed7681044408ek */
06eeb2ad640ce72d394ac521094bed7681044408ek/*
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahl * VFS Mount options table
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahl */
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlstatic char *ro_cancel[] = { MNTOPT_RW, NULL };
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlstatic char *rw_cancel[] = { MNTOPT_RO, NULL };
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlstatic char *suid_cancel[] = { MNTOPT_NOSUID, NULL };
f3861e1a2ceec23a5b699c24d814b7775a9e0b52ahlstatic char *nosuid_cancel[] = { MNTOPT_SUID, MNTOPT_DEVICES, MNTOPT_NODEVICES,
b1b8ab34de515a5e83206da22c3d7e563241b021lling MNTOPT_NOSETUID, MNTOPT_SETUID, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *devices_cancel[] = { MNTOPT_NODEVICES, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nodevices_cancel[] = { MNTOPT_DEVICES, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *setuid_cancel[] = { MNTOPT_NOSETUID, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nosetuid_cancel[] = { MNTOPT_SETUID, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nbmand_cancel[] = { MNTOPT_NONBMAND, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *nonbmand_cancel[] = { MNTOPT_NBMAND, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *exec_cancel[] = { MNTOPT_NOEXEC, NULL };
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic char *noexec_cancel[] = { MNTOPT_EXEC, NULL };
b7661ccca92e6bf5160f4d5d2601efaeaa1f5161mmusante
b7661ccca92e6bf5160f4d5d2601efaeaa1f5161mmusantestatic const mntopt_t mntopts[] = {
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor/*
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor * option name cancel options default arg flags
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor */
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor { MNTOPT_REMOUNT, NULL, NULL,
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor MO_NODISPLAY, (void *)0 },
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor { MNTOPT_RO, ro_cancel, NULL, 0,
8488aeb5df27784d479c16cde06a9e25cd9a1152taylor (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks { MNTOPT_RW, rw_cancel, NULL, 0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks { MNTOPT_SUID, suid_cancel, NULL, 0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks { MNTOPT_NOSUID, nosuid_cancel, NULL, 0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks { MNTOPT_DEVICES, devices_cancel, NULL, 0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks { MNTOPT_NODEVICES, nodevices_cancel, NULL, 0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks { MNTOPT_SETUID, setuid_cancel, NULL, 0,
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks (void *)0 },
2f8aaab38e6371ad39ed90a1211ba8921acbb4d5eschrock { MNTOPT_NOSETUID, nosetuid_cancel, NULL, 0,
2f8aaab38e6371ad39ed90a1211ba8921acbb4d5eschrock (void *)0 },
fa94a07fd0519b8abfd871ad8fe60e6bebe1e2bbbrendan { MNTOPT_NBMAND, nbmand_cancel, NULL, 0,
fa94a07fd0519b8abfd871ad8fe60e6bebe1e2bbbrendan (void *)0 },
e7cbe64f7a72dae5cb44f100db60ca88f3313c65gw { MNTOPT_NONBMAND, nonbmand_cancel, NULL, 0,
e7cbe64f7a72dae5cb44f100db60ca88f3313c65gw (void *)0 },
e7cbe64f7a72dae5cb44f100db60ca88f3313c65gw { MNTOPT_EXEC, exec_cancel, NULL, 0,
15e6edf145a9c2bb0e0272cf8debe823bb97529bgw (void *)0 },
15e6edf145a9c2bb0e0272cf8debe823bb97529bgw { MNTOPT_NOEXEC, noexec_cancel, NULL, 0,
15e6edf145a9c2bb0e0272cf8debe823bb97529bgw (void *)0 },
89a89ebfd7c3b4056afe2c03e959e22824df777dlling};
89a89ebfd7c3b4056afe2c03e959e22824df777dlling
89a89ebfd7c3b4056afe2c03e959e22824df777dllingconst mntopts_t vfs_mntopts = {
e6ca193ded880d478cc39e34ef82d4be36e4445dGeorge Wilson sizeof (mntopts) / sizeof (mntopt_t),
e6ca193ded880d478cc39e34ef82d4be36e4445dGeorge Wilson (mntopt_t *)&mntopts[0]
e6ca193ded880d478cc39e34ef82d4be36e4445dGeorge Wilson};
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby/*
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby * File system operation dispatch functions.
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby */
842727c2f41f01b380de4f5e787d905702870f23Chris Kirby
ca45db4129beff691dc46576c328149443788af2Chris Kirbyint
ca45db4129beff691dc46576c328149443788af2Chris Kirbyfsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
c08432ebe688ed6410b302771a9afd8e23e2a7cceschrock
c08432ebe688ed6410b302771a9afd8e23e2a7cceschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_unmount(vfs_t *vfsp, int flag, cred_t *cr)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
99653d4ee642c6528e88224f12409a5f23060994eschrockint
fa9e4066f08beec538e775443c5be79dd423fcabahrensfsop_root(vfs_t *vfsp, vnode_t **vpp)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens refstr_t *mntpt;
fa9e4066f08beec538e775443c5be79dd423fcabahrens int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Make sure this root has a path. With lofs, it is possible to have
99653d4ee642c6528e88224f12409a5f23060994eschrock * a NULL mountpoint.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrock if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) {
99653d4ee642c6528e88224f12409a5f23060994eschrock mntpt = vfs_getmntpoint(vfsp);
99653d4ee642c6528e88224f12409a5f23060994eschrock vn_setpath_str(*vpp, refstr_value(mntpt),
99653d4ee642c6528e88224f12409a5f23060994eschrock strlen(refstr_value(mntpt)));
99653d4ee642c6528e88224f12409a5f23060994eschrock refstr_rele(mntpt);
99653d4ee642c6528e88224f12409a5f23060994eschrock }
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock return (ret);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_statfs(vfs_t *vfsp, statvfs64_t *sp)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*(vfsp)->vfs_op->vfs_statvfs)(vfsp, sp);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_sync(vfs_t *vfsp, short flag, cred_t *cr)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
b1b8ab34de515a5e83206da22c3d7e563241b021lling{
99653d4ee642c6528e88224f12409a5f23060994eschrock /*
99653d4ee642c6528e88224f12409a5f23060994eschrock * In order to handle system attribute fids in a manner
fa9e4066f08beec538e775443c5be79dd423fcabahrens * transparent to the underlying fs, we embed the fid for
99653d4ee642c6528e88224f12409a5f23060994eschrock * the sysattr parent object in the sysattr fid and tack on
99653d4ee642c6528e88224f12409a5f23060994eschrock * some extra bytes that only the sysattr layer knows about.
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling *
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling * This guarantees that sysattr fids are larger than other fids
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling * for this vfs. If the vfs supports sysattrs (implied
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling * by VFSFT_XVATTR support), we cannot have a size collision
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling * with XATTR_FIDSZ.
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling */
99653d4ee642c6528e88224f12409a5f23060994eschrock if (vfs_has_feature(vfsp, VFSFT_XVATTR) &&
99653d4ee642c6528e88224f12409a5f23060994eschrock fidp->fid_len == XATTR_FIDSZ)
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling return (xattr_dir_vget(vfsp, vpp, fidp));
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_mountroot(vfs_t *vfsp, enum whymountroot reason)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensfsop_freefs(vfs_t *vfsp)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock (*(vfsp)->vfs_op->vfs_freevfs)(vfsp);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockfsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate));
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksint
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksfsop_sync_by_kind(int fstype, short flag, cred_t *cr)
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks{
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks ASSERT((fstype >= 0) && (fstype < nfstype));
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock if (ALLOCATED_VFSSW(&vfssw[fstype]) && VFS_INSTALLED(&vfssw[fstype]))
99653d4ee642c6528e88224f12409a5f23060994eschrock return (*vfssw[fstype].vsw_vfsops.vfs_sync) (NULL, flag, cr);
99653d4ee642c6528e88224f12409a5f23060994eschrock else
99653d4ee642c6528e88224f12409a5f23060994eschrock return (ENOTSUP);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * File system initialization. vfs_setfsops() must be called from a file
99653d4ee642c6528e88224f12409a5f23060994eschrock * system's init routine.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrock
ece3d9b3bacef51a5f34d993935eedbb7bb87059llingstatic int
ece3d9b3bacef51a5f34d993935eedbb7bb87059llingfs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual,
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling int *unused_ops)
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling{
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling static const fs_operation_trans_def_t vfs_ops_table[] = {
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling VFSNAME_MOUNT, offsetof(vfsops_t, vfs_mount),
99653d4ee642c6528e88224f12409a5f23060994eschrock fs_nosys, fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling VFSNAME_UNMOUNT, offsetof(vfsops_t, vfs_unmount),
fa9e4066f08beec538e775443c5be79dd423fcabahrens fs_nosys, fs_nosys,
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens VFSNAME_ROOT, offsetof(vfsops_t, vfs_root),
fa9e4066f08beec538e775443c5be79dd423fcabahrens fs_nosys, fs_nosys,
fa9e4066f08beec538e775443c5be79dd423fcabahrens
99653d4ee642c6528e88224f12409a5f23060994eschrock VFSNAME_STATVFS, offsetof(vfsops_t, vfs_statvfs),
99653d4ee642c6528e88224f12409a5f23060994eschrock fs_nosys, fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock
fa9e4066f08beec538e775443c5be79dd423fcabahrens VFSNAME_SYNC, offsetof(vfsops_t, vfs_sync),
fa9e4066f08beec538e775443c5be79dd423fcabahrens (fs_generic_func_p) fs_sync,
99653d4ee642c6528e88224f12409a5f23060994eschrock (fs_generic_func_p) fs_sync, /* No errors allowed */
99653d4ee642c6528e88224f12409a5f23060994eschrock
97d9e3a676d96208790c885c4766194423e84b24ck VFSNAME_VGET, offsetof(vfsops_t, vfs_vget),
99653d4ee642c6528e88224f12409a5f23060994eschrock fs_nosys, fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock VFSNAME_MOUNTROOT, offsetof(vfsops_t, vfs_mountroot),
99653d4ee642c6528e88224f12409a5f23060994eschrock fs_nosys, fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock VFSNAME_FREEVFS, offsetof(vfsops_t, vfs_freevfs),
99653d4ee642c6528e88224f12409a5f23060994eschrock (fs_generic_func_p)fs_freevfs,
99653d4ee642c6528e88224f12409a5f23060994eschrock (fs_generic_func_p)fs_freevfs, /* Shouldn't fail */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock VFSNAME_VNSTATE, offsetof(vfsops_t, vfs_vnstate),
99653d4ee642c6528e88224f12409a5f23060994eschrock (fs_generic_func_p)fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock (fs_generic_func_p)fs_nosys,
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock NULL, 0, NULL, NULL
99653d4ee642c6528e88224f12409a5f23060994eschrock };
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock return (fs_build_vector(actual, unused_ops, vfs_ops_table, template));
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrockzfs_boot_init() {
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock if (strcmp(rootfs.bo_fstype, MNTTYPE_ZFS) == 0)
99653d4ee642c6528e88224f12409a5f23060994eschrock spa_boot_init();
99653d4ee642c6528e88224f12409a5f23060994eschrock}
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksint
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarksvfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual)
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks{
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks int error;
b7661ccca92e6bf5160f4d5d2601efaeaa1f5161mmusante int unused_ops;
b7661ccca92e6bf5160f4d5d2601efaeaa1f5161mmusante
b7661ccca92e6bf5160f4d5d2601efaeaa1f5161mmusante /*
40ff3960ec49d8bb707c8b7f0030f2ac0c014033ck * Verify that fstype refers to a valid fs. Note that
40ff3960ec49d8bb707c8b7f0030f2ac0c014033ck * 0 is valid since it's used to set "stray" ops.
40ff3960ec49d8bb707c8b7f0030f2ac0c014033ck */
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson if ((fstype < 0) || (fstype >= nfstype))
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson return (EINVAL);
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson if (!ALLOCATED_VFSSW(&vfssw[fstype]))
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson return (EINVAL);
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock /* Set up the operations vector. */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock error = fs_copyfsops(template, &vfssw[fstype].vsw_vfsops, &unused_ops);
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock if (error != 0)
99653d4ee642c6528e88224f12409a5f23060994eschrock return (error);
99653d4ee642c6528e88224f12409a5f23060994eschrock
fa9e4066f08beec538e775443c5be79dd423fcabahrens vfssw[fstype].vsw_flag |= VSW_INSTALLED;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling if (actual != NULL)
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling *actual = &vfssw[fstype].vsw_vfsops;
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling#if DEBUG
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling if (unused_ops != 0)
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling cmn_err(CE_WARN, "vfs_setfsops: %s: %d operations supplied "
99653d4ee642c6528e88224f12409a5f23060994eschrock "but not used", vfssw[fstype].vsw_name, unused_ops);
99653d4ee642c6528e88224f12409a5f23060994eschrock#endif
ece3d9b3bacef51a5f34d993935eedbb7bb87059lling
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_makefsops(const fs_operation_def_t *template, vfsops_t **actual)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock int error;
99653d4ee642c6528e88224f12409a5f23060994eschrock int unused_ops;
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock *actual = (vfsops_t *)kmem_alloc(sizeof (vfsops_t), KM_SLEEP);
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock error = fs_copyfsops(template, *actual, &unused_ops);
99653d4ee642c6528e88224f12409a5f23060994eschrock if (error != 0) {
99653d4ee642c6528e88224f12409a5f23060994eschrock kmem_free(*actual, sizeof (vfsops_t));
99653d4ee642c6528e88224f12409a5f23060994eschrock *actual = NULL;
99653d4ee642c6528e88224f12409a5f23060994eschrock return (error);
b1b8ab34de515a5e83206da22c3d7e563241b021lling }
b1b8ab34de515a5e83206da22c3d7e563241b021lling
99653d4ee642c6528e88224f12409a5f23060994eschrock return (0);
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Free a vfsops structure created as a result of vfs_makefsops().
99653d4ee642c6528e88224f12409a5f23060994eschrock * NOTE: For a vfsops structure initialized by vfs_setfsops(), use
99653d4ee642c6528e88224f12409a5f23060994eschrock * vfs_freevfsops_by_type().
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_freevfsops(vfsops_t *vfsops)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
43afaaa8b73f73af765f4fa90f39a0f86cb8a364Eric Schrock kmem_free(vfsops, sizeof (vfsops_t));
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Since the vfsops structure is part of the vfssw table and wasn't
99653d4ee642c6528e88224f12409a5f23060994eschrock * really allocated, we're not really freeing anything. We keep
99653d4ee642c6528e88224f12409a5f23060994eschrock * the name for consistency with vfs_freevfsops(). We do, however,
99653d4ee642c6528e88224f12409a5f23060994eschrock * need to take care of a little bookkeeping.
99653d4ee642c6528e88224f12409a5f23060994eschrock * NOTE: For a vfsops structure created by vfs_setfsops(), use
99653d4ee642c6528e88224f12409a5f23060994eschrock * vfs_freevfsops_by_type().
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockint
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_freevfsops_by_type(int fstype)
b1b8ab34de515a5e83206da22c3d7e563241b021lling{
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling /* Verify that fstype refers to a loaded fs (and not fsid 0). */
b1b8ab34de515a5e83206da22c3d7e563241b021lling if ((fstype <= 0) || (fstype >= nfstype))
b1b8ab34de515a5e83206da22c3d7e563241b021lling return (EINVAL);
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling WLOCK_VFSSW();
b1b8ab34de515a5e83206da22c3d7e563241b021lling if ((vfssw[fstype].vsw_flag & VSW_INSTALLED) == 0) {
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks WUNLOCK_VFSSW();
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks return (EINVAL);
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks }
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson vfssw[fstype].vsw_flag &= ~VSW_INSTALLED;
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson WUNLOCK_VFSSW();
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson return (0);
54d692b75b7a6f90ce7787309da5451f7458e66aGeorge Wilson}
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
99653d4ee642c6528e88224f12409a5f23060994eschrock/* Support routines used to reference vfs_op */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/* Set the operations vector for a vfs */
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_setops(vfs_t *vfsp, vfsops_t *vfsops)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock vfsops_t *op;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(vfsp != NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(vfsops != NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens op = vfsp->vfs_op;
99653d4ee642c6528e88224f12409a5f23060994eschrock membar_consumer();
99653d4ee642c6528e88224f12409a5f23060994eschrock if (vfsp->vfs_femhead == NULL &&
fa9e4066f08beec538e775443c5be79dd423fcabahrens casptr(&vfsp->vfs_op, op, vfsops) == op) {
99653d4ee642c6528e88224f12409a5f23060994eschrock return;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens fsem_setvfsops(vfsp, vfsops);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* Retrieve the operations vector for a vfs */
fa9e4066f08beec538e775443c5be79dd423fcabahrensvfsops_t *
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_getops(vfs_t *vfsp)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens vfsops_t *op;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(vfsp != NULL);
99653d4ee642c6528e88224f12409a5f23060994eschrock
fa9e4066f08beec538e775443c5be79dd423fcabahrens op = vfsp->vfs_op;
fa9e4066f08beec538e775443c5be79dd423fcabahrens membar_consumer();
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (vfsp->vfs_femhead == NULL && op == vfsp->vfs_op) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (op);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock } else {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock return (fsem_getvfsops(vfsp));
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock }
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock}
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock/*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Returns non-zero (1) if the vfsops matches that of the vfs.
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Returns zero (0) if not.
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrockint
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrockvfs_matchops(vfs_t *vfsp, vfsops_t *vfsops)
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock{
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock return (vfs_getops(vfsp) == vfsops);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock}
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock/*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Returns non-zero (1) if the file system has installed a non-default,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * non-error vfs_sync routine. Returns zero (0) otherwise.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensint
fa9e4066f08beec538e775443c5be79dd423fcabahrensvfs_can_sync(vfs_t *vfsp)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* vfs_sync() routine is not the default/error function */
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (vfs_getops(vfsp)->vfs_sync != fs_sync);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Initialize a vfs structure.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensvfs_init(vfs_t *vfsp, vfsops_t *op, void *data)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* Other initialization has been moved to vfs_alloc() */
fa9e4066f08beec538e775443c5be79dd423fcabahrens vfsp->vfs_count = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vfsp->vfs_next = vfsp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vfsp->vfs_prev = vfsp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vfsp->vfs_zone_next = vfsp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vfsp->vfs_zone_prev = vfsp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vfsp->vfs_lofi_minor = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens sema_init(&vfsp->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens vfsimpl_setup(vfsp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens vfsp->vfs_data = (data);
5c7098917783942b65876f681a21342761227dadeschrock vfs_setops((vfsp), (op));
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Allocate and initialize the vfs implementation private data
fa9e4066f08beec538e775443c5be79dd423fcabahrens * structure, vfs_impl_t.
5c7098917783942b65876f681a21342761227dadeschrock */
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
5c7098917783942b65876f681a21342761227dadeschrockvfsimpl_setup(vfs_t *vfsp)
5c7098917783942b65876f681a21342761227dadeschrock{
5c7098917783942b65876f681a21342761227dadeschrock int i;
5c7098917783942b65876f681a21342761227dadeschrock
5c7098917783942b65876f681a21342761227dadeschrock if (vfsp->vfs_implp != NULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens return;
5c7098917783942b65876f681a21342761227dadeschrock }
5c7098917783942b65876f681a21342761227dadeschrock
5c7098917783942b65876f681a21342761227dadeschrock vfsp->vfs_implp = kmem_alloc(sizeof (vfs_impl_t), KM_SLEEP);
5c7098917783942b65876f681a21342761227dadeschrock /* Note that these are #define'd in vfs.h */
5c7098917783942b65876f681a21342761227dadeschrock vfsp->vfs_vskap = NULL;
5c7098917783942b65876f681a21342761227dadeschrock vfsp->vfs_fstypevsp = NULL;
5c7098917783942b65876f681a21342761227dadeschrock
5c7098917783942b65876f681a21342761227dadeschrock /* Set size of counted array, then zero the array */
5c7098917783942b65876f681a21342761227dadeschrock vfsp->vfs_featureset[0] = VFS_FEATURE_MAXSZ - 1;
5c7098917783942b65876f681a21342761227dadeschrock for (i = 1; i < VFS_FEATURE_MAXSZ; i++) {
5c7098917783942b65876f681a21342761227dadeschrock vfsp->vfs_featureset[i] = 0;
5c7098917783942b65876f681a21342761227dadeschrock }
3d7072f8bd27709dba14f6fe336f149d25d9e207eschrock}
3d7072f8bd27709dba14f6fe336f149d25d9e207eschrock
5c7098917783942b65876f681a21342761227dadeschrock/*
5c7098917783942b65876f681a21342761227dadeschrock * Release the vfs_impl_t structure, if it exists. Some unbundled
5c7098917783942b65876f681a21342761227dadeschrock * filesystems may not use the newer version of vfs and thus
fa9e4066f08beec538e775443c5be79dd423fcabahrens * would not contain this implementation private data structure.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrockvfsimpl_teardown(vfs_t *vfsp)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock vfs_impl_t *vip = vfsp->vfs_implp;
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock if (vip == NULL)
99653d4ee642c6528e88224f12409a5f23060994eschrock return;
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock kmem_free(vfsp->vfs_implp, sizeof (vfs_impl_t));
99653d4ee642c6528e88224f12409a5f23060994eschrock vfsp->vfs_implp = NULL;
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * VFS system calls: mount, umount, syssync, statfs, fstatfs, statvfs,
99653d4ee642c6528e88224f12409a5f23060994eschrock * fstatvfs, and sysfs moved to common/syscall.
c08432ebe688ed6410b302771a9afd8e23e2a7cceschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Update every mounted file system. We call the vfs_sync operation of
99653d4ee642c6528e88224f12409a5f23060994eschrock * each file system type, passing it a NULL vfsp to indicate that all
99653d4ee642c6528e88224f12409a5f23060994eschrock * mounted file systems of that type should be updated.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrockvfs_sync(int flag)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
99653d4ee642c6528e88224f12409a5f23060994eschrock struct vfssw *vswp;
99653d4ee642c6528e88224f12409a5f23060994eschrock RLOCK_VFSSW();
99653d4ee642c6528e88224f12409a5f23060994eschrock for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
91ebeef555ce7f899b6270a3c2df47b51f7ad59aahrens if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_refvfssw(vswp);
b2634b9c57bbcfa01bb5dec2e196aec32957925fEric Taylor RUNLOCK_VFSSW();
91ebeef555ce7f899b6270a3c2df47b51f7ad59aahrens (void) (*vswp->vsw_vfsops.vfs_sync)(NULL, flag,
99653d4ee642c6528e88224f12409a5f23060994eschrock CRED());
99653d4ee642c6528e88224f12409a5f23060994eschrock vfs_unrefvfssw(vswp);
99653d4ee642c6528e88224f12409a5f23060994eschrock RLOCK_VFSSW();
99653d4ee642c6528e88224f12409a5f23060994eschrock }
99653d4ee642c6528e88224f12409a5f23060994eschrock }
99653d4ee642c6528e88224f12409a5f23060994eschrock RUNLOCK_VFSSW();
99653d4ee642c6528e88224f12409a5f23060994eschrock}
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrockvoid
99653d4ee642c6528e88224f12409a5f23060994eschrocksync(void)
99653d4ee642c6528e88224f12409a5f23060994eschrock{
6733190958bbcc0bd6d1d601e7ae0a6994dafb45dougm vfs_sync(0);
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks}
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
29ab75c9a733dad2978c4860efd954b5625e3467rm/*
069f55e237020c4a4907b235fc38fafc6442ce94Eric Schrock * External routines.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
b2634b9c57bbcfa01bb5dec2e196aec32957925fEric Taylor
99653d4ee642c6528e88224f12409a5f23060994eschrockkrwlock_t vfssw_lock; /* lock accesses to vfssw */
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Lock for accessing the vfs linked list. Initialized in vfs_mountroot(),
99653d4ee642c6528e88224f12409a5f23060994eschrock * but otherwise should be accessed only via vfs_list_lock() and
99653d4ee642c6528e88224f12409a5f23060994eschrock * vfs_list_unlock(). Also used to protect the timestamp for mods to the list.
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic krwlock_t vfslist;
99653d4ee642c6528e88224f12409a5f23060994eschrock
99653d4ee642c6528e88224f12409a5f23060994eschrock/*
99653d4ee642c6528e88224f12409a5f23060994eschrock * Mount devfs on /devices. This is done right after root is mounted
99653d4ee642c6528e88224f12409a5f23060994eschrock * to provide device access support for the system
99653d4ee642c6528e88224f12409a5f23060994eschrock */
99653d4ee642c6528e88224f12409a5f23060994eschrockstatic void
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrockvfs_mountdevices(void)
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris{
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris struct vfssw *vsw;
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris struct vnode *mvp;
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris struct mounta mounta = { /* fake mounta for devfs_mount() */
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris NULL,
d5b5bb256c576fe5ef26e0795bd40abe77f93246Rich Morris NULL,
5aba80db367b061758a29154d304977d00d8e4f4ck MS_SYSSPACE,
5aba80db367b061758a29154d304977d00d8e4f4ck NULL,
5aba80db367b061758a29154d304977d00d8e4f4ck NULL,
5aba80db367b061758a29154d304977d00d8e4f4ck 0,
5aba80db367b061758a29154d304977d00d8e4f4ck NULL,
5aba80db367b061758a29154d304977d00d8e4f4ck 0
5aba80db367b061758a29154d304977d00d8e4f4ck };
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck /*
5aba80db367b061758a29154d304977d00d8e4f4ck * _init devfs module to fill in the vfssw
5aba80db367b061758a29154d304977d00d8e4f4ck */
5aba80db367b061758a29154d304977d00d8e4f4ck if (modload("fs", "devfs") == -1)
5aba80db367b061758a29154d304977d00d8e4f4ck panic("Cannot _init devfs module");
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck /*
5aba80db367b061758a29154d304977d00d8e4f4ck * Hold vfs
5aba80db367b061758a29154d304977d00d8e4f4ck */
5aba80db367b061758a29154d304977d00d8e4f4ck RLOCK_VFSSW();
5aba80db367b061758a29154d304977d00d8e4f4ck vsw = vfs_getvfsswbyname("devfs");
5aba80db367b061758a29154d304977d00d8e4f4ck VFS_INIT(&devices, &vsw->vsw_vfsops, NULL);
5aba80db367b061758a29154d304977d00d8e4f4ck VFS_HOLD(&devices);
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck /*
5aba80db367b061758a29154d304977d00d8e4f4ck * Locate mount point
5aba80db367b061758a29154d304977d00d8e4f4ck */
5aba80db367b061758a29154d304977d00d8e4f4ck if (lookupname("/devices", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp))
5aba80db367b061758a29154d304977d00d8e4f4ck panic("Cannot find /devices");
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck /*
5aba80db367b061758a29154d304977d00d8e4f4ck * Perform the mount of /devices
5aba80db367b061758a29154d304977d00d8e4f4ck */
5aba80db367b061758a29154d304977d00d8e4f4ck if (VFS_MOUNT(&devices, mvp, &mounta, CRED()))
5aba80db367b061758a29154d304977d00d8e4f4ck panic("Cannot mount /devices");
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck RUNLOCK_VFSSW();
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck /*
5aba80db367b061758a29154d304977d00d8e4f4ck * Set appropriate members and add to vfs list for mnttab display
5aba80db367b061758a29154d304977d00d8e4f4ck */
5aba80db367b061758a29154d304977d00d8e4f4ck vfs_setresource(&devices, "/devices");
5aba80db367b061758a29154d304977d00d8e4f4ck vfs_setmntpoint(&devices, "/devices");
5aba80db367b061758a29154d304977d00d8e4f4ck
5aba80db367b061758a29154d304977d00d8e4f4ck /*
5aba80db367b061758a29154d304977d00d8e4f4ck * Hold the root of /devices so it won't go away
5aba80db367b061758a29154d304977d00d8e4f4ck */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock if (VFS_ROOT(&devices, &devicesdir))
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock panic("vfs_mountdevices: not devices root");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock if (vfs_lock(&devices) != 0) {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock VN_RELE(devicesdir);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /devices");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock return;
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock }
9966ca11f4a1481acce85f690fa59e4084050627Matthew Ahrens
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock if (vn_vfswlock(mvp) != 0) {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock vfs_unlock(&devices);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock VN_RELE(devicesdir);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock cmn_err(CE_NOTE, "Cannot acquire vfswlock of /devices");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock return;
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock }
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock vfs_add(mvp, &devices, 0);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock vn_vfsunlock(mvp);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock vfs_unlock(&devices);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock VN_RELE(devicesdir);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock}
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock/*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * mount the first instance of /dev to root and remain mounted
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrockstatic void
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrockvfs_mountdev1(void)
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock{
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock struct vfssw *vsw;
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock struct vnode *mvp;
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock struct mounta mounta = { /* fake mounta for sdev_mount() */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock NULL,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock NULL,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock MS_SYSSPACE | MS_OVERLAY,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock NULL,
a2eea2e101e6a163a537dcc6d4e3c4da2a0ea5b2ahrens NULL,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock 0,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock NULL,
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock 0
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock };
990b4856d0eaada6f8140335733a1b1771ed2746lling
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock /*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * _init dev module to fill in the vfssw
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock if (modload("fs", "dev") == -1)
990b4856d0eaada6f8140335733a1b1771ed2746lling cmn_err(CE_PANIC, "Cannot _init dev module\n");
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Hold vfs
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock RLOCK_VFSSW();
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock vsw = vfs_getvfsswbyname("dev");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock VFS_INIT(&dev, &vsw->vsw_vfsops, NULL);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock VFS_HOLD(&dev);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock /*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Locate mount point
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock if (lookupname("/dev", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp))
990b4856d0eaada6f8140335733a1b1771ed2746lling cmn_err(CE_PANIC, "Cannot find /dev\n");
990b4856d0eaada6f8140335733a1b1771ed2746lling
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock /*
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock * Perform the mount of /dev
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock */
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock if (VFS_MOUNT(&dev, mvp, &mounta, CRED()))
990b4856d0eaada6f8140335733a1b1771ed2746lling cmn_err(CE_PANIC, "Cannot mount /dev 1\n");
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling RUNLOCK_VFSSW();
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Set appropriate members and add to vfs list for mnttab display
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_setresource(&dev, "/dev");
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_setmntpoint(&dev, "/dev");
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Hold the root of /dev so it won't go away
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling if (VFS_ROOT(&dev, &devdir))
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock cmn_err(CE_PANIC, "vfs_mountdev1: not dev root");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock if (vfs_lock(&dev) != 0) {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock VN_RELE(devdir);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /dev");
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock return;
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock }
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock if (vn_vfswlock(mvp) != 0) {
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock vfs_unlock(&dev);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock VN_RELE(devdir);
e9dbad6f263d5570ed7ff5443ec5b958af8c24d7eschrock cmn_err(CE_NOTE, "Cannot acquire vfswlock of /dev");
b1b8ab34de515a5e83206da22c3d7e563241b021lling return;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_add(mvp, &dev, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling vn_vfsunlock(mvp);
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_unlock(&dev);
990b4856d0eaada6f8140335733a1b1771ed2746lling VN_RELE(devdir);
990b4856d0eaada6f8140335733a1b1771ed2746lling}
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling/*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Mount required filesystem. This is done right after root is mounted.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746llingstatic void
990b4856d0eaada6f8140335733a1b1771ed2746llingvfs_mountfs(char *module, char *spec, char *path)
990b4856d0eaada6f8140335733a1b1771ed2746lling{
990b4856d0eaada6f8140335733a1b1771ed2746lling struct vnode *mvp;
990b4856d0eaada6f8140335733a1b1771ed2746lling struct mounta mounta;
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_t *vfsp;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling mounta.flags = MS_SYSSPACE | MS_DATA;
990b4856d0eaada6f8140335733a1b1771ed2746lling mounta.fstype = module;
990b4856d0eaada6f8140335733a1b1771ed2746lling mounta.spec = spec;
990b4856d0eaada6f8140335733a1b1771ed2746lling mounta.dir = path;
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling cmn_err(CE_WARN, "Cannot find %s", path);
b1b8ab34de515a5e83206da22c3d7e563241b021lling return;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (domount(NULL, &mounta, mvp, CRED(), &vfsp))
b1b8ab34de515a5e83206da22c3d7e563241b021lling cmn_err(CE_WARN, "Cannot mount %s", path);
b1b8ab34de515a5e83206da22c3d7e563241b021lling else
b1b8ab34de515a5e83206da22c3d7e563241b021lling VFS_RELE(vfsp);
b1b8ab34de515a5e83206da22c3d7e563241b021lling VN_RELE(mvp);
b1b8ab34de515a5e83206da22c3d7e563241b021lling}
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling/*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * vfs_mountroot is called by main() to mount the root filesystem.
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021llingvoid
b1b8ab34de515a5e83206da22c3d7e563241b021llingvfs_mountroot(void)
b1b8ab34de515a5e83206da22c3d7e563241b021lling{
b1b8ab34de515a5e83206da22c3d7e563241b021lling struct vnode *rvp = NULL;
b1b8ab34de515a5e83206da22c3d7e563241b021lling char *path;
b1b8ab34de515a5e83206da22c3d7e563241b021lling size_t plen;
b1b8ab34de515a5e83206da22c3d7e563241b021lling struct vfssw *vswp;
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling rw_init(&vfssw_lock, NULL, RW_DEFAULT, NULL);
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante rw_init(&vfslist, NULL, RW_DEFAULT, NULL);
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante /*
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante * Alloc the vfs hash bucket array and locks
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021lling rvfs_list = kmem_zalloc(vfshsz * sizeof (rvfs_t), KM_SLEEP);
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Call machine-dependent routine "rootconf" to choose a root
b1b8ab34de515a5e83206da22c3d7e563241b021lling * file system type.
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (rootconf())
b1b8ab34de515a5e83206da22c3d7e563241b021lling panic("vfs_mountroot: cannot mount root");
b1b8ab34de515a5e83206da22c3d7e563241b021lling /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Get vnode for '/'. Set up rootdir, u.u_rdir and u.u_cdir
b1b8ab34de515a5e83206da22c3d7e563241b021lling * to point to it. These are used by lookuppn() so that it
990b4856d0eaada6f8140335733a1b1771ed2746lling * knows where to start from ('/' or '.').
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_setmntpoint(rootvfs, "/");
990b4856d0eaada6f8140335733a1b1771ed2746lling if (VFS_ROOT(rootvfs, &rootdir))
990b4856d0eaada6f8140335733a1b1771ed2746lling panic("vfs_mountroot: no root vnode");
990b4856d0eaada6f8140335733a1b1771ed2746lling PTOU(curproc)->u_cdir = rootdir;
b1b8ab34de515a5e83206da22c3d7e563241b021lling VN_HOLD(PTOU(curproc)->u_cdir);
b1b8ab34de515a5e83206da22c3d7e563241b021lling PTOU(curproc)->u_rdir = NULL;
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Setup the global zone's rootvp, now that it exists.
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021lling global_zone->zone_rootvp = rootdir;
b1b8ab34de515a5e83206da22c3d7e563241b021lling VN_HOLD(global_zone->zone_rootvp);
b1b8ab34de515a5e83206da22c3d7e563241b021lling
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante /*
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante * Notify the module code that it can begin using the
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante * root filesystem instead of the boot program's services.
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante */
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante modrootloaded = 1;
b1b8ab34de515a5e83206da22c3d7e563241b021lling
deb8317b8f5925e3f6dd7cb6ed0cdd035f546a5aMark J Musante /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Special handling for a ZFS root file system.
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021lling zfs_boot_init();
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Set up mnttab information for root
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_setresource(rootvfs, rootfs.bo_name);
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Notify cluster software that the root filesystem is available.
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021lling clboot_mountroot();
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling /* Now that we're all done with the root FS, set up its vopstats */
b1b8ab34de515a5e83206da22c3d7e563241b021lling if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) != NULL) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling /* Set flag for statistics collection */
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (vswp->vsw_flag & VSW_STATS) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling initialize_vopstats(&rootvfs->vfs_vopstats);
b1b8ab34de515a5e83206da22c3d7e563241b021lling rootvfs->vfs_flag |= VFS_STATS;
b1b8ab34de515a5e83206da22c3d7e563241b021lling rootvfs->vfs_fstypevsp =
b1b8ab34de515a5e83206da22c3d7e563241b021lling get_fstype_vopstats(rootvfs, vswp);
b1b8ab34de515a5e83206da22c3d7e563241b021lling rootvfs->vfs_vskap = get_vskstat_anchor(rootvfs);
b1b8ab34de515a5e83206da22c3d7e563241b021lling }
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_unrefvfssw(vswp);
b1b8ab34de515a5e83206da22c3d7e563241b021lling }
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Mount /devices, /dev instance 1, /system/contract, /etc/mnttab,
b1b8ab34de515a5e83206da22c3d7e563241b021lling * /etc/svc/volatile, /etc/dfs/sharetab, /system/object, and /proc.
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mountdevices();
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mountdev1();
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mountfs("ctfs", "ctfs", CTFS_ROOT);
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mountfs("proc", "/proc", "/proc");
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mountfs("mntfs", "/etc/mnttab", "/etc/mnttab");
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile");
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mountfs("objfs", "objfs", OBJFS_ROOT);
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (getzoneid() == GLOBAL_ZONEID) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mountfs("sharefs", "sharefs", "/etc/dfs/sharetab");
b1b8ab34de515a5e83206da22c3d7e563241b021lling }
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling#ifdef __sparc
b1b8ab34de515a5e83206da22c3d7e563241b021lling /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * This bit of magic can go away when we convert sparc to
b1b8ab34de515a5e83206da22c3d7e563241b021lling * the new boot architecture based on ramdisk.
b1b8ab34de515a5e83206da22c3d7e563241b021lling *
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Booting off a mirrored root volume:
b1b8ab34de515a5e83206da22c3d7e563241b021lling * At this point, we have booted and mounted root on a
b1b8ab34de515a5e83206da22c3d7e563241b021lling * single component of the mirror. Complete the boot
b1b8ab34de515a5e83206da22c3d7e563241b021lling * by configuring SVM and converting the root to the
990b4856d0eaada6f8140335733a1b1771ed2746lling * dev_t of the mirrored root device. This dev_t conversion
990b4856d0eaada6f8140335733a1b1771ed2746lling * only works because the underlying device doesn't change.
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (root_is_svm) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (svm_rootconf()) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling panic("vfs_mountroot: cannot remount root");
b1b8ab34de515a5e83206da22c3d7e563241b021lling }
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * mnttab should reflect the new root device
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_lock_wait(rootvfs);
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_setresource(rootvfs, rootfs.bo_name);
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_unlock(rootvfs);
b1b8ab34de515a5e83206da22c3d7e563241b021lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling#endif /* __sparc */
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * Look up the root device via devfs so that a dv_node is
b1b8ab34de515a5e83206da22c3d7e563241b021lling * created for it. The vnode is never VN_RELE()ed.
b1b8ab34de515a5e83206da22c3d7e563241b021lling * We allocate more than MAXPATHLEN so that the
b1b8ab34de515a5e83206da22c3d7e563241b021lling * buffer passed to i_ddi_prompath_to_devfspath() is
b1b8ab34de515a5e83206da22c3d7e563241b021lling * exactly MAXPATHLEN (the function expects a buffer
b1b8ab34de515a5e83206da22c3d7e563241b021lling * of that length).
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021lling plen = strlen("/devices");
b1b8ab34de515a5e83206da22c3d7e563241b021lling path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP);
b1b8ab34de515a5e83206da22c3d7e563241b021lling (void) strcpy(path, "/devices");
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen)
b1b8ab34de515a5e83206da22c3d7e563241b021lling != DDI_SUCCESS ||
b1b8ab34de515a5e83206da22c3d7e563241b021lling lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling
990b4856d0eaada6f8140335733a1b1771ed2746lling /* NUL terminate in case "path" has garbage */
b1b8ab34de515a5e83206da22c3d7e563241b021lling path[plen + MAXPATHLEN - 1] = '\0';
b1b8ab34de515a5e83206da22c3d7e563241b021lling#ifdef DEBUG
b1b8ab34de515a5e83206da22c3d7e563241b021lling cmn_err(CE_WARN, "!Cannot lookup root device: %s", path);
990b4856d0eaada6f8140335733a1b1771ed2746lling#endif
b1b8ab34de515a5e83206da22c3d7e563241b021lling }
b1b8ab34de515a5e83206da22c3d7e563241b021lling kmem_free(path, plen + MAXPATHLEN);
b1b8ab34de515a5e83206da22c3d7e563241b021lling vfs_mnttabvp_setup();
990b4856d0eaada6f8140335733a1b1771ed2746lling}
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling/*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * If remount failed and we're in a zone we need to check for the zone
990b4856d0eaada6f8140335733a1b1771ed2746lling * root path and strip it before the call to vfs_setpath().
b1b8ab34de515a5e83206da22c3d7e563241b021lling *
b1b8ab34de515a5e83206da22c3d7e563241b021lling * If strpath doesn't begin with the zone_rootpath the original
b1b8ab34de515a5e83206da22c3d7e563241b021lling * strpath is returned unchanged.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
b1b8ab34de515a5e83206da22c3d7e563241b021llingstatic const char *
b1b8ab34de515a5e83206da22c3d7e563241b021llingstripzonepath(const char *strpath)
b1b8ab34de515a5e83206da22c3d7e563241b021lling{
b1b8ab34de515a5e83206da22c3d7e563241b021lling char *str1, *str2;
b1b8ab34de515a5e83206da22c3d7e563241b021lling int i;
b1b8ab34de515a5e83206da22c3d7e563241b021lling zone_t *zonep = curproc->p_zone;
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (zonep->zone_rootpath == NULL || strpath == NULL) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling return (NULL);
b1b8ab34de515a5e83206da22c3d7e563241b021lling }
b1b8ab34de515a5e83206da22c3d7e563241b021lling
b1b8ab34de515a5e83206da22c3d7e563241b021lling /*
b1b8ab34de515a5e83206da22c3d7e563241b021lling * we check for the end of the string at one past the
b1b8ab34de515a5e83206da22c3d7e563241b021lling * current position because the zone_rootpath always
b1b8ab34de515a5e83206da22c3d7e563241b021lling * ends with "/" but we don't want to strip that off.
b1b8ab34de515a5e83206da22c3d7e563241b021lling */
b1b8ab34de515a5e83206da22c3d7e563241b021lling str1 = zonep->zone_rootpath;
b1b8ab34de515a5e83206da22c3d7e563241b021lling str2 = (char *)strpath;
b1b8ab34de515a5e83206da22c3d7e563241b021lling ASSERT(str1[0] != '\0');
b1b8ab34de515a5e83206da22c3d7e563241b021lling for (i = 0; str1[i + 1] != '\0'; i++) {
b1b8ab34de515a5e83206da22c3d7e563241b021lling if (str1[i] != str2[i])
b1b8ab34de515a5e83206da22c3d7e563241b021lling return ((char *)strpath);
b1b8ab34de515a5e83206da22c3d7e563241b021lling }
b1b8ab34de515a5e83206da22c3d7e563241b021lling return (&str2[i]);
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks}
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling/*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Check to see if our "block device" is actually a file. If so,
990b4856d0eaada6f8140335733a1b1771ed2746lling * automatically add a lofi device, and keep track of this fact.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746llingstatic int
990b4856d0eaada6f8140335733a1b1771ed2746llinglofi_add(const char *fsname, struct vfs *vfsp,
990b4856d0eaada6f8140335733a1b1771ed2746lling mntopts_t *mntopts, struct mounta *uap)
990b4856d0eaada6f8140335733a1b1771ed2746lling{
990b4856d0eaada6f8140335733a1b1771ed2746lling int fromspace = (uap->flags & MS_SYSSPACE) ?
990b4856d0eaada6f8140335733a1b1771ed2746lling UIO_SYSSPACE : UIO_USERSPACE;
990b4856d0eaada6f8140335733a1b1771ed2746lling struct lofi_ioctl *li = NULL;
990b4856d0eaada6f8140335733a1b1771ed2746lling struct vnode *vp = NULL;
990b4856d0eaada6f8140335733a1b1771ed2746lling struct pathname pn = { NULL };
990b4856d0eaada6f8140335733a1b1771ed2746lling ldi_ident_t ldi_id;
990b4856d0eaada6f8140335733a1b1771ed2746lling ldi_handle_t ldi_hdl;
990b4856d0eaada6f8140335733a1b1771ed2746lling vfssw_t *vfssw;
990b4856d0eaada6f8140335733a1b1771ed2746lling int minor;
990b4856d0eaada6f8140335733a1b1771ed2746lling int err = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (fsname == NULL ||
990b4856d0eaada6f8140335733a1b1771ed2746lling (vfssw = vfs_getvfssw(fsname)) == NULL)
990b4856d0eaada6f8140335733a1b1771ed2746lling return (0);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (!(vfssw->vsw_flag & VSW_CANLOFI)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_unrefvfssw(vfssw);
990b4856d0eaada6f8140335733a1b1771ed2746lling return (0);
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_unrefvfssw(vfssw);
990b4856d0eaada6f8140335733a1b1771ed2746lling vfssw = NULL;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (pn_get(uap->spec, fromspace, &pn) != 0)
990b4856d0eaada6f8140335733a1b1771ed2746lling return (0);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (lookupname(uap->spec, fromspace, FOLLOW, NULL, &vp) != 0)
990b4856d0eaada6f8140335733a1b1771ed2746lling goto out;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (vp->v_type != VREG)
990b4856d0eaada6f8140335733a1b1771ed2746lling goto out;
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
990b4856d0eaada6f8140335733a1b1771ed2746lling /* OK, this is a lofi mount. */
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
990b4856d0eaada6f8140335733a1b1771ed2746lling if ((uap->flags & (MS_REMOUNT|MS_GLOBAL)) ||
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_optionisset_nolock(mntopts, MNTOPT_SUID, NULL) ||
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks vfs_optionisset_nolock(mntopts, MNTOPT_SETUID, NULL) ||
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_optionisset_nolock(mntopts, MNTOPT_DEVICES, NULL)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling err = EINVAL;
990b4856d0eaada6f8140335733a1b1771ed2746lling goto out;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling ldi_id = ldi_ident_from_anon();
990b4856d0eaada6f8140335733a1b1771ed2746lling li = kmem_zalloc(sizeof (*li), KM_SLEEP);
990b4856d0eaada6f8140335733a1b1771ed2746lling (void) strlcpy(li->li_filename, pn.pn_path, MAXPATHLEN + 1);
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks /*
069f55e237020c4a4907b235fc38fafc6442ce94Eric Schrock * The lofi control node is currently exclusive-open. We'd like
990b4856d0eaada6f8140335733a1b1771ed2746lling * to improve this, but in the meantime, we'll loop waiting for
f67f35c39aa272d43489ee49625b4965cc83add2Eric Schrock * access.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling for (;;) {
990b4856d0eaada6f8140335733a1b1771ed2746lling err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL,
990b4856d0eaada6f8140335733a1b1771ed2746lling kcred, &ldi_hdl, ldi_id);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (err != EBUSY)
990b4856d0eaada6f8140335733a1b1771ed2746lling break;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if ((err = delay_sig(hz / 8)) == EINTR)
990b4856d0eaada6f8140335733a1b1771ed2746lling break;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (err)
990b4856d0eaada6f8140335733a1b1771ed2746lling goto out2;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li,
990b4856d0eaada6f8140335733a1b1771ed2746lling FREAD | FWRITE | FEXCL | FKIOCTL, kcred, &minor);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (!err)
990b4856d0eaada6f8140335733a1b1771ed2746lling vfsp->vfs_lofi_minor = minor;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746llingout2:
990b4856d0eaada6f8140335733a1b1771ed2746lling ldi_ident_release(ldi_id);
990b4856d0eaada6f8140335733a1b1771ed2746llingout:
990b4856d0eaada6f8140335733a1b1771ed2746lling if (li != NULL)
990b4856d0eaada6f8140335733a1b1771ed2746lling kmem_free(li, sizeof (*li));
990b4856d0eaada6f8140335733a1b1771ed2746lling if (vp != NULL)
990b4856d0eaada6f8140335733a1b1771ed2746lling VN_RELE(vp);
990b4856d0eaada6f8140335733a1b1771ed2746lling pn_free(&pn);
990b4856d0eaada6f8140335733a1b1771ed2746lling return (err);
990b4856d0eaada6f8140335733a1b1771ed2746lling}
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746llingstatic void
990b4856d0eaada6f8140335733a1b1771ed2746llinglofi_remove(struct vfs *vfsp)
990b4856d0eaada6f8140335733a1b1771ed2746lling{
990b4856d0eaada6f8140335733a1b1771ed2746lling struct lofi_ioctl *li = NULL;
990b4856d0eaada6f8140335733a1b1771ed2746lling ldi_ident_t ldi_id;
990b4856d0eaada6f8140335733a1b1771ed2746lling ldi_handle_t ldi_hdl;
990b4856d0eaada6f8140335733a1b1771ed2746lling int err;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (vfsp->vfs_lofi_minor == 0)
990b4856d0eaada6f8140335733a1b1771ed2746lling return;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling ldi_id = ldi_ident_from_anon();
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling li = kmem_zalloc(sizeof (*li), KM_SLEEP);
990b4856d0eaada6f8140335733a1b1771ed2746lling li->li_minor = vfsp->vfs_lofi_minor;
990b4856d0eaada6f8140335733a1b1771ed2746lling li->li_cleanup = B_TRUE;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling do {
990b4856d0eaada6f8140335733a1b1771ed2746lling err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL,
990b4856d0eaada6f8140335733a1b1771ed2746lling kcred, &ldi_hdl, ldi_id);
990b4856d0eaada6f8140335733a1b1771ed2746lling } while (err == EBUSY);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (err)
990b4856d0eaada6f8140335733a1b1771ed2746lling goto out;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling err = ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE_MINOR, (intptr_t)li,
990b4856d0eaada6f8140335733a1b1771ed2746lling FREAD | FWRITE | FEXCL | FKIOCTL, kcred, NULL);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (!err)
990b4856d0eaada6f8140335733a1b1771ed2746lling vfsp->vfs_lofi_minor = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746llingout:
990b4856d0eaada6f8140335733a1b1771ed2746lling ldi_ident_release(ldi_id);
990b4856d0eaada6f8140335733a1b1771ed2746lling if (li != NULL)
990b4856d0eaada6f8140335733a1b1771ed2746lling kmem_free(li, sizeof (*li));
990b4856d0eaada6f8140335733a1b1771ed2746lling}
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling/*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Common mount code. Called from the system call entry point, from autofs,
990b4856d0eaada6f8140335733a1b1771ed2746lling * nfsv4 trigger mounts, and from pxfs.
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * Takes the effective file system type, mount arguments, the mount point
990b4856d0eaada6f8140335733a1b1771ed2746lling * vnode, flags specifying whether the mount is a remount and whether it
990b4856d0eaada6f8140335733a1b1771ed2746lling * should be entered into the vfs list, and credentials. Fills in its vfspp
990b4856d0eaada6f8140335733a1b1771ed2746lling * parameter with the mounted file system instance's vfs.
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * Note that the effective file system type is specified as a string. It may
990b4856d0eaada6f8140335733a1b1771ed2746lling * be null, in which case it's determined from the mount arguments, and may
990b4856d0eaada6f8140335733a1b1771ed2746lling * differ from the type specified in the mount arguments; this is a hook to
990b4856d0eaada6f8140335733a1b1771ed2746lling * allow interposition when instantiating file system instances.
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * The caller is responsible for releasing its own hold on the mount point
990b4856d0eaada6f8140335733a1b1771ed2746lling * vp (this routine does its own hold when necessary).
990b4856d0eaada6f8140335733a1b1771ed2746lling * Also note that for remounts, the mount point vp should be the vnode for
990b4856d0eaada6f8140335733a1b1771ed2746lling * the root of the file system rather than the vnode that the file system
990b4856d0eaada6f8140335733a1b1771ed2746lling * is mounted on top of.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746llingint
990b4856d0eaada6f8140335733a1b1771ed2746llingdomount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
990b4856d0eaada6f8140335733a1b1771ed2746lling struct vfs **vfspp)
990b4856d0eaada6f8140335733a1b1771ed2746lling{
990b4856d0eaada6f8140335733a1b1771ed2746lling struct vfssw *vswp;
990b4856d0eaada6f8140335733a1b1771ed2746lling vfsops_t *vfsops;
990b4856d0eaada6f8140335733a1b1771ed2746lling struct vfs *vfsp;
990b4856d0eaada6f8140335733a1b1771ed2746lling struct vnode *bvp;
990b4856d0eaada6f8140335733a1b1771ed2746lling dev_t bdev = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling mntopts_t mnt_mntopts;
990b4856d0eaada6f8140335733a1b1771ed2746lling int error = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling int copyout_error = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling int ovflags;
990b4856d0eaada6f8140335733a1b1771ed2746lling char *opts = uap->optptr;
990b4856d0eaada6f8140335733a1b1771ed2746lling char *inargs = opts;
990b4856d0eaada6f8140335733a1b1771ed2746lling int optlen = uap->optlen;
990b4856d0eaada6f8140335733a1b1771ed2746lling int remount;
990b4856d0eaada6f8140335733a1b1771ed2746lling int rdonly;
990b4856d0eaada6f8140335733a1b1771ed2746lling int nbmand = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling int delmip = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling int addmip = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling int splice = ((uap->flags & MS_NOSPLICE) == 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling int fromspace = (uap->flags & MS_SYSSPACE) ?
990b4856d0eaada6f8140335733a1b1771ed2746lling UIO_SYSSPACE : UIO_USERSPACE;
990b4856d0eaada6f8140335733a1b1771ed2746lling char *resource = NULL, *mountpt = NULL;
990b4856d0eaada6f8140335733a1b1771ed2746lling refstr_t *oldresource, *oldmntpt;
990b4856d0eaada6f8140335733a1b1771ed2746lling struct pathname pn, rpn;
990b4856d0eaada6f8140335733a1b1771ed2746lling vsk_anchor_t *vskap;
a9799022bd90b13722204e80112efaa5bf573099ck char fstname[FSTYPSZ];
a9799022bd90b13722204e80112efaa5bf573099ck
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
a9799022bd90b13722204e80112efaa5bf573099ck * The v_flag value for the mount point vp is permanently set
990b4856d0eaada6f8140335733a1b1771ed2746lling * to VVFSLOCK so that no one bypasses the vn_vfs*locks routine
990b4856d0eaada6f8140335733a1b1771ed2746lling * for mount point locking.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling mutex_enter(&vp->v_lock);
990b4856d0eaada6f8140335733a1b1771ed2746lling vp->v_flag |= VVFSLOCK;
a9799022bd90b13722204e80112efaa5bf573099ck mutex_exit(&vp->v_lock);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling mnt_mntopts.mo_count = 0;
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Find the ops vector to use to invoke the file system-specific mount
990b4856d0eaada6f8140335733a1b1771ed2746lling * method. If the fsname argument is non-NULL, use it directly.
a9799022bd90b13722204e80112efaa5bf573099ck * Otherwise, dig the file system type information out of the mount
a9799022bd90b13722204e80112efaa5bf573099ck * arguments.
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * A side effect is to hold the vfssw entry.
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * Mount arguments can be specified in several ways, which are
990b4856d0eaada6f8140335733a1b1771ed2746lling * distinguished by flag bit settings. The preferred way is to set
990b4856d0eaada6f8140335733a1b1771ed2746lling * MS_OPTIONSTR, indicating an 8 argument mount with the file system
990b4856d0eaada6f8140335733a1b1771ed2746lling * type supplied as a character string and the last two arguments
990b4856d0eaada6f8140335733a1b1771ed2746lling * being a pointer to a character buffer and the size of the buffer.
990b4856d0eaada6f8140335733a1b1771ed2746lling * On entry, the buffer holds a null terminated list of options; on
990b4856d0eaada6f8140335733a1b1771ed2746lling * return, the string is the list of options the file system
990b4856d0eaada6f8140335733a1b1771ed2746lling * recognized. If MS_DATA is set arguments five and six point to a
990b4856d0eaada6f8140335733a1b1771ed2746lling * block of binary data which the file system interprets.
990b4856d0eaada6f8140335733a1b1771ed2746lling * A further wrinkle is that some callers don't set MS_FSS and MS_DATA
990b4856d0eaada6f8140335733a1b1771ed2746lling * consistently with these conventions. To handle them, we check to
990b4856d0eaada6f8140335733a1b1771ed2746lling * see whether the pointer to the file system name has a numeric value
990b4856d0eaada6f8140335733a1b1771ed2746lling * less than 256. If so, we treat it as an index.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling if (fsname != NULL) {
990b4856d0eaada6f8140335733a1b1771ed2746lling if ((vswp = vfs_getvfssw(fsname)) == NULL) {
990b4856d0eaada6f8140335733a1b1771ed2746lling return (EINVAL);
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling } else if (uap->flags & (MS_OPTIONSTR | MS_DATA | MS_FSS)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling size_t n;
990b4856d0eaada6f8140335733a1b1771ed2746lling uint_t fstype;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling fsname = fstname;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if ((fstype = (uintptr_t)uap->fstype) < 256) {
990b4856d0eaada6f8140335733a1b1771ed2746lling RLOCK_VFSSW();
990b4856d0eaada6f8140335733a1b1771ed2746lling if (fstype == 0 || fstype >= nfstype ||
990b4856d0eaada6f8140335733a1b1771ed2746lling !ALLOCATED_VFSSW(&vfssw[fstype])) {
990b4856d0eaada6f8140335733a1b1771ed2746lling RUNLOCK_VFSSW();
990b4856d0eaada6f8140335733a1b1771ed2746lling return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens (void) strcpy(fsname, vfssw[fstype].vsw_name);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens RUNLOCK_VFSSW();
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if ((vswp = vfs_getvfssw(fsname)) == NULL)
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens } else {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * Handle either kernel or user address space.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (uap->flags & MS_SYSSPACE) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens error = copystr(uap->fstype, fsname,
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens FSTYPSZ, &n);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens } else {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens error = copyinstr(uap->fstype, fsname,
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens FSTYPSZ, &n);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (error) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (error == ENAMETOOLONG)
148434217c040ea38dc844384f6ba68d9b325906Matthew Ahrens return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens return (error);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if ((vswp = vfs_getvfssw(fsname)) == NULL)
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens } else {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) == NULL)
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (!VFS_INSTALLED(vswp))
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens return (EINVAL);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens vfsops = &vswp->vsw_vfsops;
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * Fetch mount options and parse them for generic vfs options
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (uap->flags & MS_OPTIONSTR) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * Limit the buffer size
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (optlen < 0 || optlen > MAX_MNTOPT_STR) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens error = EINVAL;
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens goto errout;
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if ((uap->flags & MS_SYSSPACE) == 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling inargs = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP);
990b4856d0eaada6f8140335733a1b1771ed2746lling inargs[0] = '\0';
990b4856d0eaada6f8140335733a1b1771ed2746lling if (optlen) {
990b4856d0eaada6f8140335733a1b1771ed2746lling error = copyinstr(opts, inargs, (size_t)optlen,
990b4856d0eaada6f8140335733a1b1771ed2746lling NULL);
990b4856d0eaada6f8140335733a1b1771ed2746lling if (error) {
990b4856d0eaada6f8140335733a1b1771ed2746lling goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_parsemntopts(&mnt_mntopts, inargs, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Flag bits override the options string.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling if (uap->flags & MS_REMOUNT)
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_REMOUNT, NULL, 0, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling if (uap->flags & MS_RDONLY)
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_RO, NULL, 0, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling if (uap->flags & MS_NOSUID)
990b4856d0eaada6f8140335733a1b1771ed2746lling vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Check if this is a remount; must be set in the option string and
990b4856d0eaada6f8140335733a1b1771ed2746lling * the file system must support a remount option.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling if (remount = vfs_optionisset_nolock(&mnt_mntopts,
990b4856d0eaada6f8140335733a1b1771ed2746lling MNTOPT_REMOUNT, NULL)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling if (!(vswp->vsw_flag & VSW_CANREMOUNT)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling error = ENOTSUP;
990b4856d0eaada6f8140335733a1b1771ed2746lling goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens uap->flags |= MS_REMOUNT;
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * uap->flags and vfs_optionisset() should agree.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (rdonly = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_RO, NULL)) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens uap->flags |= MS_RDONLY;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling uap->flags |= MS_NOSUID;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling nbmand = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NBMAND, NULL);
990b4856d0eaada6f8140335733a1b1771ed2746lling ASSERT(splice || !remount);
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * If we are splicing the fs into the namespace,
990b4856d0eaada6f8140335733a1b1771ed2746lling * perform mount point checks.
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * We want to resolve the path for the mount point to eliminate
990b4856d0eaada6f8140335733a1b1771ed2746lling * '.' and ".." and symlinks in mount points; we can't do the
990b4856d0eaada6f8140335733a1b1771ed2746lling * same for the resource string, since it would turn
990b4856d0eaada6f8140335733a1b1771ed2746lling * "/dev/dsk/c0t0d0s0" into "/devices/pci@...". We need to do
990b4856d0eaada6f8140335733a1b1771ed2746lling * this before grabbing vn_vfswlock(), because otherwise we
990b4856d0eaada6f8140335733a1b1771ed2746lling * would deadlock with lookuppn().
990b4856d0eaada6f8140335733a1b1771ed2746lling */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (splice) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens ASSERT(vp->v_count > 0);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * Pick up mount point and device from appropriate space.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if (pn_get(uap->spec, fromspace, &pn) == 0) {
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens resource = kmem_alloc(pn.pn_pathlen + 1,
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens KM_SLEEP);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens (void) strcpy(resource, pn.pn_path);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens pn_free(&pn);
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens }
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens /*
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * Do a lookupname prior to taking the
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens * writelock. Mark this as completed if
990b4856d0eaada6f8140335733a1b1771ed2746lling * successful for later cleanup and addition to
990b4856d0eaada6f8140335733a1b1771ed2746lling * the mount in progress table.
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens */
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if ((uap->flags & MS_GLOBAL) == 0 &&
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens lookupname(uap->spec, fromspace,
990b4856d0eaada6f8140335733a1b1771ed2746lling FOLLOW, NULL, &bvp) == 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling addmip = 1;
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens }
990b4856d0eaada6f8140335733a1b1771ed2746lling
74e7dc986c89efca1f2e4451c7a572e05e4a6e4fMatthew Ahrens if ((error = pn_get(uap->dir, fromspace, &pn)) == 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling pathname_t *pnp;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (*pn.pn_path != '/') {
990b4856d0eaada6f8140335733a1b1771ed2746lling error = EINVAL;
990b4856d0eaada6f8140335733a1b1771ed2746lling pn_free(&pn);
990b4856d0eaada6f8140335733a1b1771ed2746lling goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling pn_alloc(&rpn);
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Kludge to prevent autofs from deadlocking with
990b4856d0eaada6f8140335733a1b1771ed2746lling * itself when it calls domount().
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * If autofs is calling, it is because it is doing
990b4856d0eaada6f8140335733a1b1771ed2746lling * (autofs) mounts in the process of an NFS mount. A
990b4856d0eaada6f8140335733a1b1771ed2746lling * lookuppn() here would cause us to block waiting for
990b4856d0eaada6f8140335733a1b1771ed2746lling * said NFS mount to complete, which can't since this
990b4856d0eaada6f8140335733a1b1771ed2746lling * is the thread that was supposed to doing it.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling if (fromspace == UIO_USERSPACE) {
990b4856d0eaada6f8140335733a1b1771ed2746lling if ((error = lookuppn(&pn, &rpn, FOLLOW, NULL,
990b4856d0eaada6f8140335733a1b1771ed2746lling NULL)) == 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling pnp = &rpn;
990b4856d0eaada6f8140335733a1b1771ed2746lling } else {
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * The file disappeared or otherwise
990b4856d0eaada6f8140335733a1b1771ed2746lling * became inaccessible since we opened
990b4856d0eaada6f8140335733a1b1771ed2746lling * it; might as well fail the mount
990b4856d0eaada6f8140335733a1b1771ed2746lling * since the mount point is no longer
990b4856d0eaada6f8140335733a1b1771ed2746lling * accessible.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling pn_free(&rpn);
990b4856d0eaada6f8140335733a1b1771ed2746lling pn_free(&pn);
990b4856d0eaada6f8140335733a1b1771ed2746lling goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling } else {
990b4856d0eaada6f8140335733a1b1771ed2746lling pnp = &pn;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling mountpt = kmem_alloc(pnp->pn_pathlen + 1, KM_SLEEP);
990b4856d0eaada6f8140335733a1b1771ed2746lling (void) strcpy(mountpt, pnp->pn_path);
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * If the addition of the zone's rootpath
990b4856d0eaada6f8140335733a1b1771ed2746lling * would push us over a total path length
990b4856d0eaada6f8140335733a1b1771ed2746lling * of MAXPATHLEN, we fail the mount with
990b4856d0eaada6f8140335733a1b1771ed2746lling * ENAMETOOLONG, which is what we would have
990b4856d0eaada6f8140335733a1b1771ed2746lling * gotten if we were trying to perform the same
990b4856d0eaada6f8140335733a1b1771ed2746lling * mount in the global zone.
990b4856d0eaada6f8140335733a1b1771ed2746lling *
990b4856d0eaada6f8140335733a1b1771ed2746lling * strlen() doesn't count the trailing
990b4856d0eaada6f8140335733a1b1771ed2746lling * '\0', but zone_rootpathlen counts both a
990b4856d0eaada6f8140335733a1b1771ed2746lling * trailing '/' and the terminating '\0'.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling if ((curproc->p_zone->zone_rootpathlen - 1 +
990b4856d0eaada6f8140335733a1b1771ed2746lling strlen(mountpt)) > MAXPATHLEN ||
990b4856d0eaada6f8140335733a1b1771ed2746lling (resource != NULL &&
990b4856d0eaada6f8140335733a1b1771ed2746lling (curproc->p_zone->zone_rootpathlen - 1 +
990b4856d0eaada6f8140335733a1b1771ed2746lling strlen(resource)) > MAXPATHLEN)) {
990b4856d0eaada6f8140335733a1b1771ed2746lling error = ENAMETOOLONG;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling pn_free(&rpn);
990b4856d0eaada6f8140335733a1b1771ed2746lling pn_free(&pn);
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling if (error)
990b4856d0eaada6f8140335733a1b1771ed2746lling goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Prevent path name resolution from proceeding past
990b4856d0eaada6f8140335733a1b1771ed2746lling * the mount point.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling if (vn_vfswlock(vp) != 0) {
990b4856d0eaada6f8140335733a1b1771ed2746lling error = EBUSY;
990b4856d0eaada6f8140335733a1b1771ed2746lling goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
990b4856d0eaada6f8140335733a1b1771ed2746lling
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * Verify that it's legitimate to establish a mount on
990b4856d0eaada6f8140335733a1b1771ed2746lling * the prospective mount point.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling if (vn_mountedvfs(vp) != NULL) {
990b4856d0eaada6f8140335733a1b1771ed2746lling /*
990b4856d0eaada6f8140335733a1b1771ed2746lling * The mount point lock was obtained after some
990b4856d0eaada6f8140335733a1b1771ed2746lling * other thread raced through and established a mount.
990b4856d0eaada6f8140335733a1b1771ed2746lling */
990b4856d0eaada6f8140335733a1b1771ed2746lling vn_vfsunlock(vp);
990b4856d0eaada6f8140335733a1b1771ed2746lling error = EBUSY;
990b4856d0eaada6f8140335733a1b1771ed2746lling goto errout;
990b4856d0eaada6f8140335733a1b1771ed2746lling }
ecd6cf800b63704be73fb264c3f5b6e0dafc068dmarks if (vp->v_flag & VNOMOUNT) {
vn_vfsunlock(vp);
error = EINVAL;
goto errout;
}
}
if ((uap->flags & (MS_DATA | MS_OPTIONSTR)) == 0) {
uap->dataptr = NULL;
uap->datalen = 0;
}
/*
* If this is a remount, we don't want to create a new VFS.
* Instead, we pass the existing one with a remount flag.
*/
if (remount) {
/*
* Confirm that the mount point is the root vnode of the
* file system that is being remounted.
* This can happen if the user specifies a different
* mount point directory pathname in the (re)mount command.
*
* Code below can only be reached if splice is true, so it's
* safe to do vn_vfsunlock() here.
*/
if ((vp->v_flag & VROOT) == 0) {
vn_vfsunlock(vp);
error = ENOENT;
goto errout;
}
/*
* Disallow making file systems read-only unless file system
* explicitly allows it in its vfssw. Ignore other flags.
*/
if (rdonly && vn_is_readonly(vp) == 0 &&
(vswp->vsw_flag & VSW_CANRWRO) == 0) {
vn_vfsunlock(vp);
error = EINVAL;
goto errout;
}
/*
* Disallow changing the NBMAND disposition of the file
* system on remounts.
*/
if ((nbmand && ((vp->v_vfsp->vfs_flag & VFS_NBMAND) == 0)) ||
(!nbmand && (vp->v_vfsp->vfs_flag & VFS_NBMAND))) {
vn_vfsunlock(vp);
error = EINVAL;
goto errout;
}
vfsp = vp->v_vfsp;
ovflags = vfsp->vfs_flag;
vfsp->vfs_flag |= VFS_REMOUNT;
vfsp->vfs_flag &= ~VFS_RDONLY;
} else {
vfsp = vfs_alloc(KM_SLEEP);
VFS_INIT(vfsp, vfsops, NULL);
}
VFS_HOLD(vfsp);
if ((error = lofi_add(fsname, vfsp, &mnt_mntopts, uap)) != 0) {
if (!remount) {
if (splice)
vn_vfsunlock(vp);
vfs_free(vfsp);
} else {
vn_vfsunlock(vp);
VFS_RELE(vfsp);
}
goto errout;
}
/*
* PRIV_SYS_MOUNT doesn't mean you can become root.
*/
if (vfsp->vfs_lofi_minor != 0) {
uap->flags |= MS_NOSUID;
vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0);
}
/*
* The vfs_reflock is not used anymore the code below explicitly
* holds it preventing others accesing it directly.
*/
if ((sema_tryp(&vfsp->vfs_reflock) == 0) &&
!(vfsp->vfs_flag & VFS_REMOUNT))
cmn_err(CE_WARN,
"mount type %s couldn't get vfs_reflock", vswp->vsw_name);
/*
* Lock the vfs. If this is a remount we want to avoid spurious umount
* failures that happen as a side-effect of fsflush() and other mount
* and unmount operations that might be going on simultaneously and
* may have locked the vfs currently. To not return EBUSY immediately
* here we use vfs_lock_wait() instead vfs_lock() for the remount case.
*/
if (!remount) {
if (error = vfs_lock(vfsp)) {
vfsp->vfs_flag = ovflags;
lofi_remove(vfsp);
if (splice)
vn_vfsunlock(vp);
vfs_free(vfsp);
goto errout;
}
} else {
vfs_lock_wait(vfsp);
}
/*
* Add device to mount in progress table, global mounts require special
* handling. It is possible that we have already done the lookupname
* on a spliced, non-global fs. If so, we don't want to do it again
* since we cannot do a lookupname after taking the
* wlock above. This case is for a non-spliced, non-global filesystem.
*/
if (!addmip) {
if ((uap->flags & MS_GLOBAL) == 0 &&
lookupname(uap->spec, fromspace, FOLLOW, NULL, &bvp) == 0) {
addmip = 1;
}
}
if (addmip) {
vnode_t *lvp = NULL;
error = vfs_get_lofi(vfsp, &lvp);
if (error > 0) {
lofi_remove(vfsp);
if (splice)
vn_vfsunlock(vp);
vfs_unlock(vfsp);
if (remount) {
VFS_RELE(vfsp);
} else {
vfs_free(vfsp);
}
goto errout;
} else if (error == -1) {
bdev = bvp->v_rdev;
VN_RELE(bvp);
} else {
bdev = lvp->v_rdev;
VN_RELE(lvp);
VN_RELE(bvp);
}
vfs_addmip(bdev, vfsp);
addmip = 0;
delmip = 1;
}
/*
* Invalidate cached entry for the mount point.
*/
if (splice)
dnlc_purge_vp(vp);
/*
* If have an option string but the filesystem doesn't supply a
* prototype options table, create a table with the global
* options and sufficient room to accept all the options in the
* string. Then parse the passed in option string
* accepting all the options in the string. This gives us an
* option table with all the proper cancel properties for the
* global options.
*
* Filesystems that supply a prototype options table are handled
* earlier in this function.
*/
if (uap->flags & MS_OPTIONSTR) {
if (!(vswp->vsw_flag & VSW_HASPROTO)) {
mntopts_t tmp_mntopts;
tmp_mntopts.mo_count = 0;
vfs_createopttbl_extend(&tmp_mntopts, inargs,
&mnt_mntopts);
vfs_parsemntopts(&tmp_mntopts, inargs, 1);
vfs_swapopttbl_nolock(&mnt_mntopts, &tmp_mntopts);
vfs_freeopttbl(&tmp_mntopts);
}
}
/*
* Serialize with zone creations.
*/
mount_in_progress();
/*
* Instantiate (or reinstantiate) the file system. If appropriate,
* splice it into the file system name space.
*
* We want VFS_MOUNT() to be able to override the vfs_resource
* string if necessary (ie, mntfs), and also for a remount to
* change the same (necessary when remounting '/' during boot).
* So we set up vfs_mntpt and vfs_resource to what we think they
* should be, then hand off control to VFS_MOUNT() which can
* override this.
*
* For safety's sake, when changing vfs_resource or vfs_mntpt of
* a vfs which is on the vfs list (i.e. during a remount), we must
* never set those fields to NULL. Several bits of code make
* assumptions that the fields are always valid.
*/
vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts);
if (remount) {
if ((oldresource = vfsp->vfs_resource) != NULL)
refstr_hold(oldresource);
if ((oldmntpt = vfsp->vfs_mntpt) != NULL)
refstr_hold(oldmntpt);
}
vfs_setresource(vfsp, resource);
vfs_setmntpoint(vfsp, mountpt);
/*
* going to mount on this vnode, so notify.
*/
vnevent_mountedover(vp, NULL);
error = VFS_MOUNT(vfsp, vp, uap, credp);
if (uap->flags & MS_RDONLY)
vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
if (uap->flags & MS_NOSUID)
vfs_setmntopt(vfsp, MNTOPT_NOSUID, NULL, 0);
if (uap->flags & MS_GLOBAL)
vfs_setmntopt(vfsp, MNTOPT_GLOBAL, NULL, 0);
if (error) {
lofi_remove(vfsp);
if (remount) {
/* put back pre-remount options */
vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts);
vfs_setmntpoint(vfsp, (stripzonepath(
refstr_value(oldmntpt))));
if (oldmntpt)
refstr_rele(oldmntpt);
vfs_setresource(vfsp, (stripzonepath(
refstr_value(oldresource))));
if (oldresource)
refstr_rele(oldresource);
vfsp->vfs_flag = ovflags;
vfs_unlock(vfsp);
VFS_RELE(vfsp);
} else {
vfs_unlock(vfsp);
vfs_freemnttab(vfsp);
vfs_free(vfsp);
}
} else {
/*
* Set the mount time to now
*/
vfsp->vfs_mtime = ddi_get_time();
if (remount) {
vfsp->vfs_flag &= ~VFS_REMOUNT;
if (oldresource)
refstr_rele(oldresource);
if (oldmntpt)
refstr_rele(oldmntpt);
} else if (splice) {
/*
* Link vfsp into the name space at the mount
* point. Vfs_add() is responsible for
* holding the mount point which will be
* released when vfs_remove() is called.
*/
vfs_add(vp, vfsp, uap->flags);
} else {
/*
* Hold the reference to file system which is
* not linked into the name space.
*/
vfsp->vfs_zone = NULL;
VFS_HOLD(vfsp);
vfsp->vfs_vnodecovered = NULL;
}
/*
* Set flags for global options encountered
*/
if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
vfsp->vfs_flag |= VFS_RDONLY;
else
vfsp->vfs_flag &= ~VFS_RDONLY;
if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
vfsp->vfs_flag |= (VFS_NOSETUID|VFS_NODEVICES);
} else {
if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
vfsp->vfs_flag |= VFS_NODEVICES;
else
vfsp->vfs_flag &= ~VFS_NODEVICES;
if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
vfsp->vfs_flag |= VFS_NOSETUID;
else
vfsp->vfs_flag &= ~VFS_NOSETUID;
}
if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
vfsp->vfs_flag |= VFS_NBMAND;
else
vfsp->vfs_flag &= ~VFS_NBMAND;
if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
vfsp->vfs_flag |= VFS_XATTR;
else
vfsp->vfs_flag &= ~VFS_XATTR;
if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
vfsp->vfs_flag |= VFS_NOEXEC;
else
vfsp->vfs_flag &= ~VFS_NOEXEC;
/*
* Now construct the output option string of options
* we recognized.
*/
if (uap->flags & MS_OPTIONSTR) {
vfs_list_read_lock();
copyout_error = vfs_buildoptionstr(
&vfsp->vfs_mntopts, inargs, optlen);
vfs_list_unlock();
if (copyout_error == 0 &&
(uap->flags & MS_SYSSPACE) == 0) {
copyout_error = copyoutstr(inargs, opts,
optlen, NULL);
}
}
/*
* If this isn't a remount, set up the vopstats before
* anyone can touch this. We only allow spliced file
* systems (file systems which are in the namespace) to
* have the VFS_STATS flag set.
* NOTE: PxFS mounts the underlying file system with
* MS_NOSPLICE set and copies those vfs_flags to its private
* vfs structure. As a result, PxFS should never have
* the VFS_STATS flag or else we might access the vfs
* statistics-related fields prior to them being
* properly initialized.
*/
if (!remount && (vswp->vsw_flag & VSW_STATS) && splice) {
initialize_vopstats(&vfsp->vfs_vopstats);
/*
* We need to set vfs_vskap to NULL because there's
* a chance it won't be set below. This is checked
* in teardown_vopstats() so we can't have garbage.
*/
vfsp->vfs_vskap = NULL;
vfsp->vfs_flag |= VFS_STATS;
vfsp->vfs_fstypevsp = get_fstype_vopstats(vfsp, vswp);
}
if (vswp->vsw_flag & VSW_XID)
vfsp->vfs_flag |= VFS_XID;
vfs_unlock(vfsp);
}
mount_completed();
if (splice)
vn_vfsunlock(vp);
if ((error == 0) && (copyout_error == 0)) {
if (!remount) {
/*
* Don't call get_vskstat_anchor() while holding
* locks since it allocates memory and calls
* VFS_STATVFS(). For NFS, the latter can generate
* an over-the-wire call.
*/
vskap = get_vskstat_anchor(vfsp);
/* Only take the lock if we have something to do */
if (vskap != NULL) {
vfs_lock_wait(vfsp);
if (vfsp->vfs_flag & VFS_STATS) {
vfsp->vfs_vskap = vskap;
}
vfs_unlock(vfsp);
}
}
/* Return vfsp to caller. */
*vfspp = vfsp;
}
errout:
vfs_freeopttbl(&mnt_mntopts);
if (resource != NULL)
kmem_free(resource, strlen(resource) + 1);
if (mountpt != NULL)
kmem_free(mountpt, strlen(mountpt) + 1);
/*
* It is possible we errored prior to adding to mount in progress
* table. Must free vnode we acquired with successful lookupname.
*/
if (addmip)
VN_RELE(bvp);
if (delmip)
vfs_delmip(vfsp);
ASSERT(vswp != NULL);
vfs_unrefvfssw(vswp);
if (inargs != opts)
kmem_free(inargs, MAX_MNTOPT_STR);
if (copyout_error) {
lofi_remove(vfsp);
VFS_RELE(vfsp);
error = copyout_error;
}
return (error);
}
static void
vfs_setpath(struct vfs *vfsp, refstr_t **refp, const char *newpath)
{
size_t len;
refstr_t *ref;
zone_t *zone = curproc->p_zone;
char *sp;
int have_list_lock = 0;
ASSERT(!VFS_ON_LIST(vfsp) || vfs_lock_held(vfsp));
/*
* New path must be less than MAXPATHLEN because mntfs
* will only display up to MAXPATHLEN bytes. This is currently
* safe, because domount() uses pn_get(), and other callers
* similarly cap the size to fewer than MAXPATHLEN bytes.
*/
ASSERT(strlen(newpath) < MAXPATHLEN);
/* mntfs requires consistency while vfs list lock is held */
if (VFS_ON_LIST(vfsp)) {
have_list_lock = 1;
vfs_list_lock();
}
if (*refp != NULL)
refstr_rele(*refp);
/* Do we need to modify the path? */
if (zone == global_zone || *newpath != '/') {
ref = refstr_alloc(newpath);
goto out;
}
/*
* Truncate the trailing '/' in the zoneroot, and merge
* in the zone's rootpath with the "newpath" (resource
* or mountpoint) passed in.
*
* The size of the required buffer is thus the size of
* the buffer required for the passed-in newpath
* (strlen(newpath) + 1), plus the size of the buffer
* required to hold zone_rootpath (zone_rootpathlen)
* minus one for one of the now-superfluous NUL
* terminations, minus one for the trailing '/'.
*
* That gives us:
*
* (strlen(newpath) + 1) + zone_rootpathlen - 1 - 1
*
* Which is what we have below.
*/
len = strlen(newpath) + zone->zone_rootpathlen - 1;
sp = kmem_alloc(len, KM_SLEEP);
/*
* Copy everything including the trailing slash, which
* we then overwrite with the NUL character.
*/
(void) strcpy(sp, zone->zone_rootpath);
sp[zone->zone_rootpathlen - 2] = '\0';
(void) strcat(sp, newpath);
ref = refstr_alloc(sp);
kmem_free(sp, len);
out:
*refp = ref;
if (have_list_lock) {
vfs_mnttab_modtimeupd();
vfs_list_unlock();
}
}
/*
* Record a mounted resource name in a vfs structure.
* If vfsp is already mounted, caller must hold the vfs lock.
*/
void
vfs_setresource(struct vfs *vfsp, const char *resource)
{
if (resource == NULL || resource[0] == '\0')
resource = VFS_NORESOURCE;
vfs_setpath(vfsp, &vfsp->vfs_resource, resource);
}
/*
* Record a mount point name in a vfs structure.
* If vfsp is already mounted, caller must hold the vfs lock.
*/
void
vfs_setmntpoint(struct vfs *vfsp, const char *mntpt)
{
if (mntpt == NULL || mntpt[0] == '\0')
mntpt = VFS_NOMNTPT;
vfs_setpath(vfsp, &vfsp->vfs_mntpt, mntpt);
}
/* Returns the vfs_resource. Caller must call refstr_rele() when finished. */
refstr_t *
vfs_getresource(const struct vfs *vfsp)
{
refstr_t *resource;
vfs_list_read_lock();
resource = vfsp->vfs_resource;
refstr_hold(resource);
vfs_list_unlock();
return (resource);
}
/* Returns the vfs_mntpt. Caller must call refstr_rele() when finished. */
refstr_t *
vfs_getmntpoint(const struct vfs *vfsp)
{
refstr_t *mntpt;
vfs_list_read_lock();
mntpt = vfsp->vfs_mntpt;
refstr_hold(mntpt);
vfs_list_unlock();
return (mntpt);
}
/*
* Create an empty options table with enough empty slots to hold all
* The options in the options string passed as an argument.
* Potentially prepend another options table.
*
* Note: caller is responsible for locking the vfs list, if needed,
* to protect mops.
*/
static void
vfs_createopttbl_extend(mntopts_t *mops, const char *opts,
const mntopts_t *mtmpl)
{
const char *s = opts;
uint_t count;
if (opts == NULL || *opts == '\0') {
count = 0;
} else {
count = 1;
/*
* Count number of options in the string
*/
for (s = strchr(s, ','); s != NULL; s = strchr(s, ',')) {
count++;
s++;
}
}
vfs_copyopttbl_extend(mtmpl, mops, count);
}
/*
* Create an empty options table with enough empty slots to hold all
* The options in the options string passed as an argument.
*
* This function is *not* for general use by filesystems.
*
* Note: caller is responsible for locking the vfs list, if needed,
* to protect mops.
*/
void
vfs_createopttbl(mntopts_t *mops, const char *opts)
{
vfs_createopttbl_extend(mops, opts, NULL);
}
/*
* Swap two mount options tables
*/
static void
vfs_swapopttbl_nolock(mntopts_t *optbl1, mntopts_t *optbl2)
{
uint_t tmpcnt;
mntopt_t *tmplist;
tmpcnt = optbl2->mo_count;
tmplist = optbl2->mo_list;
optbl2->mo_count = optbl1->mo_count;
optbl2->mo_list = optbl1->mo_list;
optbl1->mo_count = tmpcnt;
optbl1->mo_list = tmplist;
}
static void
vfs_swapopttbl(mntopts_t *optbl1, mntopts_t *optbl2)
{
vfs_list_lock();
vfs_swapopttbl_nolock(optbl1, optbl2);
vfs_mnttab_modtimeupd();
vfs_list_unlock();
}
static char **
vfs_copycancelopt_extend(char **const moc, int extend)
{
int i = 0;
int j;
char **result;
if (moc != NULL) {
for (; moc[i] != NULL; i++)
/* count number of options to cancel */;
}
if (i + extend == 0)
return (NULL);
result = kmem_alloc((i + extend + 1) * sizeof (char *), KM_SLEEP);
for (j = 0; j < i; j++) {
result[j] = kmem_alloc(strlen(moc[j]) + 1, KM_SLEEP);
(void) strcpy(result[j], moc[j]);
}
for (; j <= i + extend; j++)
result[j] = NULL;
return (result);
}
static void
vfs_copyopt(const mntopt_t *s, mntopt_t *d)
{
char *sp, *dp;
d->mo_flags = s->mo_flags;
d->mo_data = s->mo_data;
sp = s->mo_name;
if (sp != NULL) {
dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP);
(void) strcpy(dp, sp);
d->mo_name = dp;
} else {
d->mo_name = NULL; /* should never happen */
}
d->mo_cancel = vfs_copycancelopt_extend(s->mo_cancel, 0);
sp = s->mo_arg;
if (sp != NULL) {
dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP);
(void) strcpy(dp, sp);
d->mo_arg = dp;
} else {
d->mo_arg = NULL;
}
}
/*
* Copy a mount options table, possibly allocating some spare
* slots at the end. It is permissible to copy_extend the NULL table.
*/
static void
vfs_copyopttbl_extend(const mntopts_t *smo, mntopts_t *dmo, int extra)
{
uint_t i, count;
mntopt_t *motbl;
/*
* Clear out any existing stuff in the options table being initialized
*/
vfs_freeopttbl(dmo);
count = (smo == NULL) ? 0 : smo->mo_count;
if ((count + extra) == 0) /* nothing to do */
return;
dmo->mo_count = count + extra;
motbl = kmem_zalloc((count + extra) * sizeof (mntopt_t), KM_SLEEP);
dmo->mo_list = motbl;
for (i = 0; i < count; i++) {
vfs_copyopt(&smo->mo_list[i], &motbl[i]);
}
for (i = count; i < count + extra; i++) {
motbl[i].mo_flags = MO_EMPTY;
}
}
/*
* Copy a mount options table.
*
* This function is *not* for general use by filesystems.
*
* Note: caller is responsible for locking the vfs list, if needed,
* to protect smo and dmo.
*/
void
vfs_copyopttbl(const mntopts_t *smo, mntopts_t *dmo)
{
vfs_copyopttbl_extend(smo, dmo, 0);
}
static char **
vfs_mergecancelopts(const mntopt_t *mop1, const mntopt_t *mop2)
{
int c1 = 0;
int c2 = 0;
char **result;
char **sp1, **sp2, **dp;
/*
* First we count both lists of cancel options.
* If either is NULL or has no elements, we return a copy of
* the other.
*/
if (mop1->mo_cancel != NULL) {
for (; mop1->mo_cancel[c1] != NULL; c1++)
/* count cancel options in mop1 */;
}
if (c1 == 0)
return (vfs_copycancelopt_extend(mop2->mo_cancel, 0));
if (mop2->mo_cancel != NULL) {
for (; mop2->mo_cancel[c2] != NULL; c2++)
/* count cancel options in mop2 */;
}
result = vfs_copycancelopt_extend(mop1->mo_cancel, c2);
if (c2 == 0)
return (result);
/*
* When we get here, we've got two sets of cancel options;
* we need to merge the two sets. We know that the result
* array has "c1+c2+1" entries and in the end we might shrink
* it.
* Result now has a copy of the c1 entries from mop1; we'll
* now lookup all the entries of mop2 in mop1 and copy it if
* it is unique.
* This operation is O(n^2) but it's only called once per
* filesystem per duplicate option. This is a situation
* which doesn't arise with the filesystems in ON and
* n is generally 1.
*/
dp = &result[c1];
for (sp2 = mop2->mo_cancel; *sp2 != NULL; sp2++) {
for (sp1 = mop1->mo_cancel; *sp1 != NULL; sp1++) {
if (strcmp(*sp1, *sp2) == 0)
break;
}
if (*sp1 == NULL) {
/*
* Option *sp2 not found in mop1, so copy it.
* The calls to vfs_copycancelopt_extend()
* guarantee that there's enough room.
*/
*dp = kmem_alloc(strlen(*sp2) + 1, KM_SLEEP);
(void) strcpy(*dp++, *sp2);
}
}
if (dp != &result[c1+c2]) {
size_t bytes = (dp - result + 1) * sizeof (char *);
char **nres = kmem_alloc(bytes, KM_SLEEP);
bcopy(result, nres, bytes);
kmem_free(result, (c1 + c2 + 1) * sizeof (char *));
result = nres;
}
return (result);
}
/*
* Merge two mount option tables (outer and inner) into one. This is very
* similar to "merging" global variables and automatic variables in C.
*
* This isn't (and doesn't have to be) fast.
*
* This function is *not* for general use by filesystems.
*
* Note: caller is responsible for locking the vfs list, if needed,
* to protect omo, imo & dmo.
*/
void
vfs_mergeopttbl(const mntopts_t *omo, const mntopts_t *imo, mntopts_t *dmo)
{
uint_t i, count;
mntopt_t *mop, *motbl;
uint_t freeidx;
/*
* First determine how much space we need to allocate.
*/
count = omo->mo_count;
for (i = 0; i < imo->mo_count; i++) {
if (imo->mo_list[i].mo_flags & MO_EMPTY)
continue;
if (vfs_hasopt(omo, imo->mo_list[i].mo_name) == NULL)
count++;
}
ASSERT(count >= omo->mo_count &&
count <= omo->mo_count + imo->mo_count);
motbl = kmem_alloc(count * sizeof (mntopt_t), KM_SLEEP);
for (i = 0; i < omo->mo_count; i++)
vfs_copyopt(&omo->mo_list[i], &motbl[i]);
freeidx = omo->mo_count;
for (i = 0; i < imo->mo_count; i++) {
if (imo->mo_list[i].mo_flags & MO_EMPTY)
continue;
if ((mop = vfs_hasopt(omo, imo->mo_list[i].mo_name)) != NULL) {
char **newcanp;
uint_t index = mop - omo->mo_list;
newcanp = vfs_mergecancelopts(mop, &motbl[index]);
vfs_freeopt(&motbl[index]);
vfs_copyopt(&imo->mo_list[i], &motbl[index]);
vfs_freecancelopt(motbl[index].mo_cancel);
motbl[index].mo_cancel = newcanp;
} else {
/*
* If it's a new option, just copy it over to the first
* free location.
*/
vfs_copyopt(&imo->mo_list[i], &motbl[freeidx++]);
}
}
dmo->mo_count = count;
dmo->mo_list = motbl;
}
/*
* Functions to set and clear mount options in a mount options table.
*/
/*
* Clear a mount option, if it exists.
*
* The update_mnttab arg indicates whether mops is part of a vfs that is on
* the vfs list.
*/
static void
vfs_clearmntopt_nolock(mntopts_t *mops, const char *opt, int update_mnttab)
{
struct mntopt *mop;
uint_t i, count;
ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist));
count = mops->mo_count;
for (i = 0; i < count; i++) {
mop = &mops->mo_list[i];
if (mop->mo_flags & MO_EMPTY)
continue;
if (strcmp(opt, mop->mo_name))
continue;
mop->mo_flags &= ~MO_SET;
if (mop->mo_arg != NULL) {
kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
}
mop->mo_arg = NULL;
if (update_mnttab)
vfs_mnttab_modtimeupd();
break;
}
}
void
vfs_clearmntopt(struct vfs *vfsp, const char *opt)
{
int gotlock = 0;
if (VFS_ON_LIST(vfsp)) {
gotlock = 1;
vfs_list_lock();
}
vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, opt, gotlock);
if (gotlock)
vfs_list_unlock();
}
/*
* Set a mount option on. If it's not found in the table, it's silently
* ignored. If the option has MO_IGNORE set, it is still set unless the
* VFS_NOFORCEOPT bit is set in the flags. Also, VFS_DISPLAY/VFS_NODISPLAY flag
* bits can be used to toggle the MO_NODISPLAY bit for the option.
* If the VFS_CREATEOPT flag bit is set then the first option slot with
* MO_EMPTY set is created as the option passed in.
*
* The update_mnttab arg indicates whether mops is part of a vfs that is on
* the vfs list.
*/
static void
vfs_setmntopt_nolock(mntopts_t *mops, const char *opt,
const char *arg, int flags, int update_mnttab)
{
mntopt_t *mop;
uint_t i, count;
char *sp;
ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist));
if (flags & VFS_CREATEOPT) {
if (vfs_hasopt(mops, opt) != NULL) {
flags &= ~VFS_CREATEOPT;
}
}
count = mops->mo_count;
for (i = 0; i < count; i++) {
mop = &mops->mo_list[i];
if (mop->mo_flags & MO_EMPTY) {
if ((flags & VFS_CREATEOPT) == 0)
continue;
sp = kmem_alloc(strlen(opt) + 1, KM_SLEEP);
(void) strcpy(sp, opt);
mop->mo_name = sp;
if (arg != NULL)
mop->mo_flags = MO_HASVALUE;
else
mop->mo_flags = 0;
} else if (strcmp(opt, mop->mo_name)) {
continue;
}
if ((mop->mo_flags & MO_IGNORE) && (flags & VFS_NOFORCEOPT))
break;
if (arg != NULL && (mop->mo_flags & MO_HASVALUE) != 0) {
sp = kmem_alloc(strlen(arg) + 1, KM_SLEEP);
(void) strcpy(sp, arg);
} else {
sp = NULL;
}
if (mop->mo_arg != NULL)
kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
mop->mo_arg = sp;
if (flags & VFS_DISPLAY)
mop->mo_flags &= ~MO_NODISPLAY;
if (flags & VFS_NODISPLAY)
mop->mo_flags |= MO_NODISPLAY;
mop->mo_flags |= MO_SET;
if (mop->mo_cancel != NULL) {
char **cp;
for (cp = mop->mo_cancel; *cp != NULL; cp++)
vfs_clearmntopt_nolock(mops, *cp, 0);
}
if (update_mnttab)
vfs_mnttab_modtimeupd();
break;
}
}
void
vfs_setmntopt(struct vfs *vfsp, const char *opt, const char *arg, int flags)
{
int gotlock = 0;
if (VFS_ON_LIST(vfsp)) {
gotlock = 1;
vfs_list_lock();
}
vfs_setmntopt_nolock(&vfsp->vfs_mntopts, opt, arg, flags, gotlock);
if (gotlock)
vfs_list_unlock();
}
/*
* Add a "tag" option to a mounted file system's options list.
*
* Note: caller is responsible for locking the vfs list, if needed,
* to protect mops.
*/
static mntopt_t *
vfs_addtag(mntopts_t *mops, const char *tag)
{
uint_t count;
mntopt_t *mop, *motbl;
count = mops->mo_count + 1;
motbl = kmem_zalloc(count * sizeof (mntopt_t), KM_SLEEP);
if (mops->mo_count) {
size_t len = (count - 1) * sizeof (mntopt_t);
bcopy(mops->mo_list, motbl, len);
kmem_free(mops->mo_list, len);
}
mops->mo_count = count;
mops->mo_list = motbl;
mop = &motbl[count - 1];
mop->mo_flags = MO_TAG;
mop->mo_name = kmem_alloc(strlen(tag) + 1, KM_SLEEP);
(void) strcpy(mop->mo_name, tag);
return (mop);
}
/*
* Allow users to set arbitrary "tags" in a vfs's mount options.
* Broader use within the kernel is discouraged.
*/
int
vfs_settag(uint_t major, uint_t minor, const char *mntpt, const char *tag,
cred_t *cr)
{
vfs_t *vfsp;
mntopts_t *mops;
mntopt_t *mop;
int found = 0;
dev_t dev = makedevice(major, minor);
int err = 0;
char *buf = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP);
/*
* Find the desired mounted file system
*/
vfs_list_lock();
vfsp = rootvfs;
do {
if (vfsp->vfs_dev == dev &&
strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) {
found = 1;
break;
}
vfsp = vfsp->vfs_next;
} while (vfsp != rootvfs);
if (!found) {
err = EINVAL;
goto out;
}
err = secpolicy_fs_config(cr, vfsp);
if (err != 0)
goto out;
mops = &vfsp->vfs_mntopts;
/*
* Add tag if it doesn't already exist
*/
if ((mop = vfs_hasopt(mops, tag)) == NULL) {
int len;
(void) vfs_buildoptionstr(mops, buf, MAX_MNTOPT_STR);
len = strlen(buf);
if (len + strlen(tag) + 2 > MAX_MNTOPT_STR) {
err = ENAMETOOLONG;
goto out;
}
mop = vfs_addtag(mops, tag);
}
if ((mop->mo_flags & MO_TAG) == 0) {
err = EINVAL;
goto out;
}
vfs_setmntopt_nolock(mops, tag, NULL, 0, 1);
out:
vfs_list_unlock();
kmem_free(buf, MAX_MNTOPT_STR);
return (err);
}
/*
* Allow users to remove arbitrary "tags" in a vfs's mount options.
* Broader use within the kernel is discouraged.
*/
int
vfs_clrtag(uint_t major, uint_t minor, const char *mntpt, const char *tag,
cred_t *cr)
{
vfs_t *vfsp;
mntopt_t *mop;
int found = 0;
dev_t dev = makedevice(major, minor);
int err = 0;
/*
* Find the desired mounted file system
*/
vfs_list_lock();
vfsp = rootvfs;
do {
if (vfsp->vfs_dev == dev &&
strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) {
found = 1;
break;
}
vfsp = vfsp->vfs_next;
} while (vfsp != rootvfs);
if (!found) {
err = EINVAL;
goto out;
}
err = secpolicy_fs_config(cr, vfsp);
if (err != 0)
goto out;
if ((mop = vfs_hasopt(&vfsp->vfs_mntopts, tag)) == NULL) {
err = EINVAL;
goto out;
}
if ((mop->mo_flags & MO_TAG) == 0) {
err = EINVAL;
goto out;
}
vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, tag, 1);
out:
vfs_list_unlock();
return (err);
}
/*
* Function to parse an option string and fill in a mount options table.
* Unknown options are silently ignored. The input option string is modified
* by replacing separators with nulls. If the create flag is set, options
* not found in the table are just added on the fly. The table must have
* an option slot marked MO_EMPTY to add an option on the fly.
*
* This function is *not* for general use by filesystems.
*
* Note: caller is responsible for locking the vfs list, if needed,
* to protect mops..
*/
void
vfs_parsemntopts(mntopts_t *mops, char *osp, int create)
{
char *s = osp, *p, *nextop, *valp, *cp, *ep;
int setflg = VFS_NOFORCEOPT;
if (osp == NULL)
return;
while (*s != '\0') {
p = strchr(s, ','); /* find next option */
if (p == NULL) {
cp = NULL;
p = s + strlen(s);
} else {
cp = p; /* save location of comma */
*p++ = '\0'; /* mark end and point to next option */
}
nextop = p;
p = strchr(s, '='); /* look for value */
if (p == NULL) {
valp = NULL; /* no value supplied */
} else {
ep = p; /* save location of equals */
*p++ = '\0'; /* end option and point to value */
valp = p;
}
/*
* set option into options table
*/
if (create)
setflg |= VFS_CREATEOPT;
vfs_setmntopt_nolock(mops, s, valp, setflg, 0);
if (cp != NULL)
*cp = ','; /* restore the comma */
if (valp != NULL)
*ep = '='; /* restore the equals */
s = nextop;
}
}
/*
* Function to inquire if an option exists in a mount options table.
* Returns a pointer to the option if it exists, else NULL.
*
* This function is *not* for general use by filesystems.
*
* Note: caller is responsible for locking the vfs list, if needed,
* to protect mops.
*/
struct mntopt *
vfs_hasopt(const mntopts_t *mops, const char *opt)
{
struct mntopt *mop;
uint_t i, count;
count = mops->mo_count;
for (i = 0; i < count; i++) {
mop = &mops->mo_list[i];
if (mop->mo_flags & MO_EMPTY)
continue;
if (strcmp(opt, mop->mo_name) == 0)
return (mop);
}
return (NULL);
}
/*
* Function to inquire if an option is set in a mount options table.
* Returns non-zero if set and fills in the arg pointer with a pointer to
* the argument string or NULL if there is no argument string.
*/
static int
vfs_optionisset_nolock(const mntopts_t *mops, const char *opt, char **argp)
{
struct mntopt *mop;
uint_t i, count;
count = mops->mo_count;
for (i = 0; i < count; i++) {
mop = &mops->mo_list[i];
if (mop->mo_flags & MO_EMPTY)
continue;
if (strcmp(opt, mop->mo_name))
continue;
if ((mop->mo_flags & MO_SET) == 0)
return (0);
if (argp != NULL && (mop->mo_flags & MO_HASVALUE) != 0)
*argp = mop->mo_arg;
return (1);
}
return (0);
}
int
vfs_optionisset(const struct vfs *vfsp, const char *opt, char **argp)
{
int ret;
vfs_list_read_lock();
ret = vfs_optionisset_nolock(&vfsp->vfs_mntopts, opt, argp);
vfs_list_unlock();
return (ret);
}
/*
* Construct a comma separated string of the options set in the given
* mount table, return the string in the given buffer. Return non-zero if
* the buffer would overflow.
*
* This function is *not* for general use by filesystems.
*
* Note: caller is responsible for locking the vfs list, if needed,
* to protect mp.
*/
int
vfs_buildoptionstr(const mntopts_t *mp, char *buf, int len)
{
char *cp;
uint_t i;
buf[0] = '\0';
cp = buf;
for (i = 0; i < mp->mo_count; i++) {
struct mntopt *mop;
mop = &mp->mo_list[i];
if (mop->mo_flags & MO_SET) {
int optlen, comma = 0;
if (buf[0] != '\0')
comma = 1;
optlen = strlen(mop->mo_name);
if (strlen(buf) + comma + optlen + 1 > len)
goto err;
if (comma)
*cp++ = ',';
(void) strcpy(cp, mop->mo_name);
cp += optlen;
/*
* Append option value if there is one
*/
if (mop->mo_arg != NULL) {
int arglen;
arglen = strlen(mop->mo_arg);
if (strlen(buf) + arglen + 2 > len)
goto err;
*cp++ = '=';
(void) strcpy(cp, mop->mo_arg);
cp += arglen;
}
}
}
return (0);
err:
return (EOVERFLOW);
}
static void
vfs_freecancelopt(char **moc)
{
if (moc != NULL) {
int ccnt = 0;
char **cp;
for (cp = moc; *cp != NULL; cp++) {
kmem_free(*cp, strlen(*cp) + 1);
ccnt++;
}
kmem_free(moc, (ccnt + 1) * sizeof (char *));
}
}
static void
vfs_freeopt(mntopt_t *mop)
{
if (mop->mo_name != NULL)
kmem_free(mop->mo_name, strlen(mop->mo_name) + 1);
vfs_freecancelopt(mop->mo_cancel);
if (mop->mo_arg != NULL)
kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
}
/*
* Free a mount options table
*
* This function is *not* for general use by filesystems.
*
* Note: caller is responsible for locking the vfs list, if needed,
* to protect mp.
*/
void
vfs_freeopttbl(mntopts_t *mp)
{
uint_t i, count;
count = mp->mo_count;
for (i = 0; i < count; i++) {
vfs_freeopt(&mp->mo_list[i]);
}
if (count) {
kmem_free(mp->mo_list, sizeof (mntopt_t) * count);
mp->mo_count = 0;
mp->mo_list = NULL;
}
}
/* ARGSUSED */
static int
vfs_mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
caller_context_t *ct)
{
return (0);
}
/* ARGSUSED */
static int
vfs_mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
caller_context_t *ct)
{
return (0);
}
/*
* The dummy vnode is currently used only by file events notification
* module which is just interested in the timestamps.
*/
/* ARGSUSED */
static int
vfs_mntdummygetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
caller_context_t *ct)
{
bzero(vap, sizeof (vattr_t));
vap->va_type = VREG;
vap->va_nlink = 1;
vap->va_ctime = vfs_mnttab_ctime;
/*
* it is ok to just copy mtime as the time will be monotonically
* increasing.
*/
vap->va_mtime = vfs_mnttab_mtime;
vap->va_atime = vap->va_mtime;
return (0);
}
static void
vfs_mnttabvp_setup(void)
{
vnode_t *tvp;
vnodeops_t *vfs_mntdummyvnops;
const fs_operation_def_t mnt_dummyvnodeops_template[] = {
VOPNAME_READ, { .vop_read = vfs_mntdummyread },
VOPNAME_WRITE, { .vop_write = vfs_mntdummywrite },
VOPNAME_GETATTR, { .vop_getattr = vfs_mntdummygetattr },
VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
NULL, NULL
};
if (vn_make_ops("mnttab", mnt_dummyvnodeops_template,
&vfs_mntdummyvnops) != 0) {
cmn_err(CE_WARN, "vfs_mnttabvp_setup: vn_make_ops failed");
/* Shouldn't happen, but not bad enough to panic */
return;
}
/*
* A global dummy vnode is allocated to represent mntfs files.
* The mntfs file (/etc/mnttab) can be monitored for file events
* and receive an event when mnttab changes. Dummy VOP calls
* will be made on this vnode. The file events notification module
* intercepts this vnode and delivers relevant events.
*/
tvp = vn_alloc(KM_SLEEP);
tvp->v_flag = VNOMOUNT|VNOMAP|VNOSWAP|VNOCACHE;
vn_setops(tvp, vfs_mntdummyvnops);
tvp->v_type = VREG;
/*
* The mnt dummy ops do not reference v_data.
* No other module intercepting this vnode should either.
* Just set it to point to itself.
*/
tvp->v_data = (caddr_t)tvp;
tvp->v_vfsp = rootvfs;
vfs_mntdummyvp = tvp;
}
/*
* performs fake read/write ops
*/
static void
vfs_mnttab_rwop(int rw)
{
struct uio uio;
struct iovec iov;
char buf[1];
if (vfs_mntdummyvp == NULL)
return;
bzero(&uio, sizeof (uio));
bzero(&iov, sizeof (iov));
iov.iov_base = buf;
iov.iov_len = 0;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_loffset = 0;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_resid = 0;
if (rw) {
(void) VOP_WRITE(vfs_mntdummyvp, &uio, 0, kcred, NULL);
} else {
(void) VOP_READ(vfs_mntdummyvp, &uio, 0, kcred, NULL);
}
}
/*
* Generate a write operation.
*/
void
vfs_mnttab_writeop(void)
{
vfs_mnttab_rwop(1);
}
/*
* Generate a read operation.
*/
void
vfs_mnttab_readop(void)
{
vfs_mnttab_rwop(0);
}
/*
* Free any mnttab information recorded in the vfs struct.
* The vfs must not be on the vfs list.
*/
static void
vfs_freemnttab(struct vfs *vfsp)
{
ASSERT(!VFS_ON_LIST(vfsp));
/*
* Free device and mount point information
*/
if (vfsp->vfs_mntpt != NULL) {
refstr_rele(vfsp->vfs_mntpt);
vfsp->vfs_mntpt = NULL;
}
if (vfsp->vfs_resource != NULL) {
refstr_rele(vfsp->vfs_resource);
vfsp->vfs_resource = NULL;
}
/*
* Now free mount options information
*/
vfs_freeopttbl(&vfsp->vfs_mntopts);
}
/*
* Return the last mnttab modification time
*/
void
vfs_mnttab_modtime(timespec_t *ts)
{
ASSERT(RW_LOCK_HELD(&vfslist));
*ts = vfs_mnttab_mtime;
}
/*
* See if mnttab is changed
*/
void
vfs_mnttab_poll(timespec_t *old, struct pollhead **phpp)
{
int changed;
*phpp = (struct pollhead *)NULL;
/*
* Note: don't grab vfs list lock before accessing vfs_mnttab_mtime.
* Can lead to deadlock against vfs_mnttab_modtimeupd(). It is safe
* to not grab the vfs list lock because tv_sec is monotonically
* increasing.
*/
changed = (old->tv_nsec != vfs_mnttab_mtime.tv_nsec) ||
(old->tv_sec != vfs_mnttab_mtime.tv_sec);
if (!changed) {
*phpp = &vfs_pollhd;
}
}
/*
* Update the mnttab modification time and wake up any waiters for
* mnttab changes
*/
void
vfs_mnttab_modtimeupd()
{
hrtime_t oldhrt, newhrt;
ASSERT(RW_WRITE_HELD(&vfslist));
oldhrt = ts2hrt(&vfs_mnttab_mtime);
gethrestime(&vfs_mnttab_mtime);
newhrt = ts2hrt(&vfs_mnttab_mtime);
if (oldhrt == (hrtime_t)0)
vfs_mnttab_ctime = vfs_mnttab_mtime;
/*
* Attempt to provide unique mtime (like uniqtime but not).
*/
if (newhrt == oldhrt) {
newhrt++;
hrt2ts(newhrt, &vfs_mnttab_mtime);
}
pollwakeup(&vfs_pollhd, (short)POLLRDBAND);
vfs_mnttab_writeop();
}
int
dounmount(struct vfs *vfsp, int flag, cred_t *cr)
{
vnode_t *coveredvp;
int error;
extern void teardown_vopstats(vfs_t *);
/*
* Get covered vnode. This will be NULL if the vfs is not linked
* into the file system name space (i.e., domount() with MNT_NOSPICE).
*/
coveredvp = vfsp->vfs_vnodecovered;
ASSERT(coveredvp == NULL || vn_vfswlock_held(coveredvp));
/*
* Purge all dnlc entries for this vfs.
*/
(void) dnlc_purge_vfsp(vfsp, 0);
/* For forcible umount, skip VFS_SYNC() since it may hang */
if ((flag & MS_FORCE) == 0)
(void) VFS_SYNC(vfsp, 0, cr);
/*
* Lock the vfs to maintain fs status quo during unmount. This
* has to be done after the sync because ufs_update tries to acquire
* the vfs_reflock.
*/
vfs_lock_wait(vfsp);
if (error = VFS_UNMOUNT(vfsp, flag, cr)) {
vfs_unlock(vfsp);
if (coveredvp != NULL)
vn_vfsunlock(coveredvp);
} else if (coveredvp != NULL) {
teardown_vopstats(vfsp);
/*
* vfs_remove() will do a VN_RELE(vfsp->vfs_vnodecovered)
* when it frees vfsp so we do a VN_HOLD() so we can
* continue to use coveredvp afterwards.
*/
VN_HOLD(coveredvp);
vfs_remove(vfsp);
vn_vfsunlock(coveredvp);
VN_RELE(coveredvp);
} else {
teardown_vopstats(vfsp);
/*
* Release the reference to vfs that is not linked
* into the name space.
*/
vfs_unlock(vfsp);
VFS_RELE(vfsp);
}
return (error);
}
/*
* Vfs_unmountall() is called by uadmin() to unmount all
* mounted file systems (except the root file system) during shutdown.
* It follows the existing locking protocol when traversing the vfs list
* to sync and unmount vfses. Even though there should be no
* other thread running while the system is shutting down, it is prudent
* to still follow the locking protocol.
*/
void
vfs_unmountall(void)
{
struct vfs *vfsp;
struct vfs *prev_vfsp = NULL;
int error;
/*
* Toss all dnlc entries now so that the per-vfs sync
* and unmount operations don't have to slog through
* a bunch of uninteresting vnodes over and over again.
*/
dnlc_purge();
vfs_list_lock();
for (vfsp = rootvfs->vfs_prev; vfsp != rootvfs; vfsp = prev_vfsp) {
prev_vfsp = vfsp->vfs_prev;
if (vfs_lock(vfsp) != 0)
continue;
error = vn_vfswlock(vfsp->vfs_vnodecovered);
vfs_unlock(vfsp);
if (error)
continue;
vfs_list_unlock();
(void) VFS_SYNC(vfsp, SYNC_CLOSE, CRED());
(void) dounmount(vfsp, 0, CRED());
/*
* Since we dropped the vfslist lock above we must
* verify that next_vfsp still exists, else start over.
*/
vfs_list_lock();
for (vfsp = rootvfs->vfs_prev;
vfsp != rootvfs; vfsp = vfsp->vfs_prev)
if (vfsp == prev_vfsp)
break;
if (vfsp == rootvfs && prev_vfsp != rootvfs)
prev_vfsp = rootvfs->vfs_prev;
}
vfs_list_unlock();
}
/*
* Called to add an entry to the end of the vfs mount in progress list
*/
void
vfs_addmip(dev_t dev, struct vfs *vfsp)
{
struct ipmnt *mipp;
mipp = (struct ipmnt *)kmem_alloc(sizeof (struct ipmnt), KM_SLEEP);
mipp->mip_next = NULL;
mipp->mip_dev = dev;
mipp->mip_vfsp = vfsp;
mutex_enter(&vfs_miplist_mutex);
if (vfs_miplist_end != NULL)
vfs_miplist_end->mip_next = mipp;
else
vfs_miplist = mipp;
vfs_miplist_end = mipp;
mutex_exit(&vfs_miplist_mutex);
}
/*
* Called to remove an entry from the mount in progress list
* Either because the mount completed or it failed.
*/
void
vfs_delmip(struct vfs *vfsp)
{
struct ipmnt *mipp, *mipprev;
mutex_enter(&vfs_miplist_mutex);
mipprev = NULL;
for (mipp = vfs_miplist;
mipp && mipp->mip_vfsp != vfsp; mipp = mipp->mip_next) {
mipprev = mipp;
}
if (mipp == NULL)
return; /* shouldn't happen */
if (mipp == vfs_miplist_end)
vfs_miplist_end = mipprev;
if (mipprev == NULL)
vfs_miplist = mipp->mip_next;
else
mipprev->mip_next = mipp->mip_next;
mutex_exit(&vfs_miplist_mutex);
kmem_free(mipp, sizeof (struct ipmnt));
}
/*
* vfs_add is called by a specific filesystem's mount routine to add
* the new vfs into the vfs list/hash and to cover the mounted-on vnode.
* The vfs should already have been locked by the caller.
*
* coveredvp is NULL if this is the root.
*/
void
vfs_add(vnode_t *coveredvp, struct vfs *vfsp, int mflag)
{
int newflag;
ASSERT(vfs_lock_held(vfsp));
VFS_HOLD(vfsp);
newflag = vfsp->vfs_flag;
if (mflag & MS_RDONLY)
newflag |= VFS_RDONLY;
else
newflag &= ~VFS_RDONLY;
if (mflag & MS_NOSUID)
newflag |= (VFS_NOSETUID|VFS_NODEVICES);
else
newflag &= ~(VFS_NOSETUID|VFS_NODEVICES);
if (mflag & MS_NOMNTTAB)
newflag |= VFS_NOMNTTAB;
else
newflag &= ~VFS_NOMNTTAB;
if (coveredvp != NULL) {
ASSERT(vn_vfswlock_held(coveredvp));
coveredvp->v_vfsmountedhere = vfsp;
VN_HOLD(coveredvp);
}
vfsp->vfs_vnodecovered = coveredvp;
vfsp->vfs_flag = newflag;
vfs_list_add(vfsp);
}
/*
* Remove a vfs from the vfs list, null out the pointer from the
* covered vnode to the vfs (v_vfsmountedhere), and null out the pointer
* from the vfs to the covered vnode (vfs_vnodecovered). Release the
* reference to the vfs and to the covered vnode.
*
* Called from dounmount after it's confirmed with the file system
* that the unmount is legal.
*/
void
vfs_remove(struct vfs *vfsp)
{
vnode_t *vp;
ASSERT(vfs_lock_held(vfsp));
/*
* Can't unmount root. Should never happen because fs will
* be busy.
*/
if (vfsp == rootvfs)
panic("vfs_remove: unmounting root");
vfs_list_remove(vfsp);
/*
* Unhook from the file system name space.
*/
vp = vfsp->vfs_vnodecovered;
ASSERT(vn_vfswlock_held(vp));
vp->v_vfsmountedhere = NULL;
vfsp->vfs_vnodecovered = NULL;
VN_RELE(vp);
/*
* Release lock and wakeup anybody waiting.
*/
vfs_unlock(vfsp);
VFS_RELE(vfsp);
}
/*
* Lock a filesystem to prevent access to it while mounting,
* unmounting and syncing. Return EBUSY immediately if lock
* can't be acquired.
*/
int
vfs_lock(vfs_t *vfsp)
{
vn_vfslocks_entry_t *vpvfsentry;
vpvfsentry = vn_vfslocks_getlock(vfsp);
if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
return (0);
vn_vfslocks_rele(vpvfsentry);
return (EBUSY);
}
int
vfs_rlock(vfs_t *vfsp)
{
vn_vfslocks_entry_t *vpvfsentry;
vpvfsentry = vn_vfslocks_getlock(vfsp);
if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
return (0);
vn_vfslocks_rele(vpvfsentry);
return (EBUSY);
}
void
vfs_lock_wait(vfs_t *vfsp)
{
vn_vfslocks_entry_t *vpvfsentry;
vpvfsentry = vn_vfslocks_getlock(vfsp);
rwst_enter(&vpvfsentry->ve_lock, RW_WRITER);
}
void
vfs_rlock_wait(vfs_t *vfsp)
{
vn_vfslocks_entry_t *vpvfsentry;
vpvfsentry = vn_vfslocks_getlock(vfsp);
rwst_enter(&vpvfsentry->ve_lock, RW_READER);
}
/*
* Unlock a locked filesystem.
*/
void
vfs_unlock(vfs_t *vfsp)
{
vn_vfslocks_entry_t *vpvfsentry;
/*
* vfs_unlock will mimic sema_v behaviour to fix 4748018.
* And these changes should remain for the patch changes as it is.
*/
if (panicstr)
return;
/*
* ve_refcount needs to be dropped twice here.
* 1. To release refernce after a call to vfs_locks_getlock()
* 2. To release the reference from the locking routines like
* vfs_rlock_wait/vfs_wlock_wait/vfs_wlock etc,.
*/
vpvfsentry = vn_vfslocks_getlock(vfsp);
vn_vfslocks_rele(vpvfsentry);
rwst_exit(&vpvfsentry->ve_lock);
vn_vfslocks_rele(vpvfsentry);
}
/*
* Utility routine that allows a filesystem to construct its
* fsid in "the usual way" - by munging some underlying dev_t and
* the filesystem type number into the 64-bit fsid. Note that
* this implicitly relies on dev_t persistence to make filesystem
* id's persistent.
*
* There's nothing to prevent an individual fs from constructing its
* fsid in a different way, and indeed they should.
*
* Since we want fsids to be 32-bit quantities (so that they can be
* exported identically by either 32-bit or 64-bit APIs, as well as
* the fact that fsid's are "known" to NFS), we compress the device
* number given down to 32-bits, and panic if that isn't possible.
*/
void
vfs_make_fsid(fsid_t *fsi, dev_t dev, int val)
{
if (!cmpldev((dev32_t *)&fsi->val[0], dev))
panic("device number too big for fsid!");
fsi->val[1] = val;
}
int
vfs_lock_held(vfs_t *vfsp)
{
int held;
vn_vfslocks_entry_t *vpvfsentry;
/*
* vfs_lock_held will mimic sema_held behaviour
* if panicstr is set. And these changes should remain
* for the patch changes as it is.
*/
if (panicstr)
return (1);
vpvfsentry = vn_vfslocks_getlock(vfsp);
held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
vn_vfslocks_rele(vpvfsentry);
return (held);
}
struct _kthread *
vfs_lock_owner(vfs_t *vfsp)
{
struct _kthread *owner;
vn_vfslocks_entry_t *vpvfsentry;
/*
* vfs_wlock_held will mimic sema_held behaviour
* if panicstr is set. And these changes should remain
* for the patch changes as it is.
*/
if (panicstr)
return (NULL);
vpvfsentry = vn_vfslocks_getlock(vfsp);
owner = rwst_owner(&vpvfsentry->ve_lock);
vn_vfslocks_rele(vpvfsentry);
return (owner);
}
/*
* vfs list locking.
*
* Rather than manipulate the vfslist lock directly, we abstract into lock
* and unlock routines to allow the locking implementation to be changed for
* clustering.
*
* Whenever the vfs list is modified through its hash links, the overall list
* lock must be obtained before locking the relevant hash bucket. But to see
* whether a given vfs is on the list, it suffices to obtain the lock for the
* hash bucket without getting the overall list lock. (See getvfs() below.)
*/
void
vfs_list_lock()
{
rw_enter(&vfslist, RW_WRITER);
}
void
vfs_list_read_lock()
{
rw_enter(&vfslist, RW_READER);
}
void
vfs_list_unlock()
{
rw_exit(&vfslist);
}
/*
* Low level worker routines for adding entries to and removing entries from
* the vfs list.
*/
static void
vfs_hash_add(struct vfs *vfsp, int insert_at_head)
{
int vhno;
struct vfs **hp;
dev_t dev;
ASSERT(RW_WRITE_HELD(&vfslist));
dev = expldev(vfsp->vfs_fsid.val[0]);
vhno = VFSHASH(getmajor(dev), getminor(dev));
mutex_enter(&rvfs_list[vhno].rvfs_lock);
/*
* Link into the hash table, inserting it at the end, so that LOFS
* with the same fsid as UFS (or other) file systems will not hide the
* UFS.
*/
if (insert_at_head) {
vfsp->vfs_hash = rvfs_list[vhno].rvfs_head;
rvfs_list[vhno].rvfs_head = vfsp;
} else {
for (hp = &rvfs_list[vhno].rvfs_head; *hp != NULL;
hp = &(*hp)->vfs_hash)
continue;
/*
* hp now contains the address of the pointer to update
* to effect the insertion.
*/
vfsp->vfs_hash = NULL;
*hp = vfsp;
}
rvfs_list[vhno].rvfs_len++;
mutex_exit(&rvfs_list[vhno].rvfs_lock);
}
static void
vfs_hash_remove(struct vfs *vfsp)
{
int vhno;
struct vfs *tvfsp;
dev_t dev;
ASSERT(RW_WRITE_HELD(&vfslist));
dev = expldev(vfsp->vfs_fsid.val[0]);
vhno = VFSHASH(getmajor(dev), getminor(dev));
mutex_enter(&rvfs_list[vhno].rvfs_lock);
/*
* Remove from hash.
*/
if (rvfs_list[vhno].rvfs_head == vfsp) {
rvfs_list[vhno].rvfs_head = vfsp->vfs_hash;
rvfs_list[vhno].rvfs_len--;
goto foundit;
}
for (tvfsp = rvfs_list[vhno].rvfs_head; tvfsp != NULL;
tvfsp = tvfsp->vfs_hash) {
if (tvfsp->vfs_hash == vfsp) {
tvfsp->vfs_hash = vfsp->vfs_hash;
rvfs_list[vhno].rvfs_len--;
goto foundit;
}
}
cmn_err(CE_WARN, "vfs_list_remove: vfs not found in hash");
foundit:
mutex_exit(&rvfs_list[vhno].rvfs_lock);
}
void
vfs_list_add(struct vfs *vfsp)
{
zone_t *zone;
/*
* The zone that owns the mount is the one that performed the mount.
* Note that this isn't necessarily the same as the zone mounted into.
* The corresponding zone_rele() will be done when the vfs_t is
* being free'd.
*/
vfsp->vfs_zone = curproc->p_zone;
zone_hold(vfsp->vfs_zone);
/*
* Find the zone mounted into, and put this mount on its vfs list.
*/
zone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
ASSERT(zone != NULL);
/*
* Special casing for the root vfs. This structure is allocated
* statically and hooked onto rootvfs at link time. During the
* vfs_mountroot call at system startup time, the root file system's
* VFS_MOUNTROOT routine will call vfs_add with this root vfs struct
* as argument. The code below must detect and handle this special
* case. The only apparent justification for this special casing is
* to ensure that the root file system appears at the head of the
* list.
*
* XXX: I'm assuming that it's ok to do normal list locking when
* adding the entry for the root file system (this used to be
* done with no locks held).
*/
vfs_list_lock();
/*
* Link into the vfs list proper.
*/
if (vfsp == &root) {
/*
* Assert: This vfs is already on the list as its first entry.
* Thus, there's nothing to do.
*/
ASSERT(rootvfs == vfsp);
/*
* Add it to the head of the global zone's vfslist.
*/
ASSERT(zone == global_zone);
ASSERT(zone->zone_vfslist == NULL);
zone->zone_vfslist = vfsp;
} else {
/*
* Link to end of list using vfs_prev (as rootvfs is now a
* doubly linked circular list) so list is in mount order for
* mnttab use.
*/
rootvfs->vfs_prev->vfs_next = vfsp;
vfsp->vfs_prev = rootvfs->vfs_prev;
rootvfs->vfs_prev = vfsp;
vfsp->vfs_next = rootvfs;
/*
* Do it again for the zone-private list (which may be NULL).
*/
if (zone->zone_vfslist == NULL) {
ASSERT(zone != global_zone);
zone->zone_vfslist = vfsp;
} else {
zone->zone_vfslist->vfs_zone_prev->vfs_zone_next = vfsp;
vfsp->vfs_zone_prev = zone->zone_vfslist->vfs_zone_prev;
zone->zone_vfslist->vfs_zone_prev = vfsp;
vfsp->vfs_zone_next = zone->zone_vfslist;
}
}
/*
* Link into the hash table, inserting it at the end, so that LOFS
* with the same fsid as UFS (or other) file systems will not hide
* the UFS.
*/
vfs_hash_add(vfsp, 0);
/*
* update the mnttab modification time
*/
vfs_mnttab_modtimeupd();
vfs_list_unlock();
zone_rele(zone);
}
void
vfs_list_remove(struct vfs *vfsp)
{
zone_t *zone;
zone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
ASSERT(zone != NULL);
/*
* Callers are responsible for preventing attempts to unmount the
* root.
*/
ASSERT(vfsp != rootvfs);
vfs_list_lock();
/*
* Remove from hash.
*/
vfs_hash_remove(vfsp);
/*
* Remove from vfs list.
*/
vfsp->vfs_prev->vfs_next = vfsp->vfs_next;
vfsp->vfs_next->vfs_prev = vfsp->vfs_prev;
vfsp->vfs_next = vfsp->vfs_prev = NULL;
/*
* Remove from zone-specific vfs list.
*/
if (zone->zone_vfslist == vfsp)
zone->zone_vfslist = vfsp->vfs_zone_next;
if (vfsp->vfs_zone_next == vfsp) {
ASSERT(vfsp->vfs_zone_prev == vfsp);
ASSERT(zone->zone_vfslist == vfsp);
zone->zone_vfslist = NULL;
}
vfsp->vfs_zone_prev->vfs_zone_next = vfsp->vfs_zone_next;
vfsp->vfs_zone_next->vfs_zone_prev = vfsp->vfs_zone_prev;
vfsp->vfs_zone_next = vfsp->vfs_zone_prev = NULL;
/*
* update the mnttab modification time
*/
vfs_mnttab_modtimeupd();
vfs_list_unlock();
zone_rele(zone);
}
struct vfs *
getvfs(fsid_t *fsid)
{
struct vfs *vfsp;
int val0 = fsid->val[0];
int val1 = fsid->val[1];
dev_t dev = expldev(val0);
int vhno = VFSHASH(getmajor(dev), getminor(dev));
kmutex_t *hmp = &rvfs_list[vhno].rvfs_lock;
mutex_enter(hmp);
for (vfsp = rvfs_list[vhno].rvfs_head; vfsp; vfsp = vfsp->vfs_hash) {
if (vfsp->vfs_fsid.val[0] == val0 &&
vfsp->vfs_fsid.val[1] == val1) {
VFS_HOLD(vfsp);
mutex_exit(hmp);
return (vfsp);
}
}
mutex_exit(hmp);
return (NULL);
}
/*
* Search the vfs mount in progress list for a specified device/vfs entry.
* Returns 0 if the first entry in the list that the device matches has the
* given vfs pointer as well. If the device matches but a different vfs
* pointer is encountered in the list before the given vfs pointer then
* a 1 is returned.
*/
int
vfs_devmounting(dev_t dev, struct vfs *vfsp)
{
int retval = 0;
struct ipmnt *mipp;
mutex_enter(&vfs_miplist_mutex);
for (mipp = vfs_miplist; mipp != NULL; mipp = mipp->mip_next) {
if (mipp->mip_dev == dev) {
if (mipp->mip_vfsp != vfsp)
retval = 1;
break;
}
}
mutex_exit(&vfs_miplist_mutex);
return (retval);
}
/*
* Search the vfs list for a specified device. Returns 1, if entry is found
* or 0 if no suitable entry is found.
*/
int
vfs_devismounted(dev_t dev)
{
struct vfs *vfsp;
int found;
vfs_list_read_lock();
vfsp = rootvfs;
found = 0;
do {
if (vfsp->vfs_dev == dev) {
found = 1;
break;
}
vfsp = vfsp->vfs_next;
} while (vfsp != rootvfs);
vfs_list_unlock();
return (found);
}
/*
* Search the vfs list for a specified device. Returns a pointer to it
* or NULL if no suitable entry is found. The caller of this routine
* is responsible for releasing the returned vfs pointer.
*/
struct vfs *
vfs_dev2vfsp(dev_t dev)
{
struct vfs *vfsp;
int found;
vfs_list_read_lock();
vfsp = rootvfs;
found = 0;
do {
/*
* The following could be made more efficient by making
* the entire loop use vfs_zone_next if the call is from
* a zone. The only callers, however, ustat(2) and
* umount2(2), don't seem to justify the added
* complexity at present.
*/
if (vfsp->vfs_dev == dev &&
ZONE_PATH_VISIBLE(refstr_value(vfsp->vfs_mntpt),
curproc->p_zone)) {
VFS_HOLD(vfsp);
found = 1;
break;
}
vfsp = vfsp->vfs_next;
} while (vfsp != rootvfs);
vfs_list_unlock();
return (found ? vfsp: NULL);
}
/*
* Search the vfs list for a specified mntpoint. Returns a pointer to it
* or NULL if no suitable entry is found. The caller of this routine
* is responsible for releasing the returned vfs pointer.
*
* Note that if multiple mntpoints match, the last one matching is
* returned in an attempt to return the "top" mount when overlay
* mounts are covering the same mount point. This is accomplished by starting
* at the end of the list and working our way backwards, stopping at the first
* matching mount.
*/
struct vfs *
vfs_mntpoint2vfsp(const char *mp)
{
struct vfs *vfsp;
struct vfs *retvfsp = NULL;
zone_t *zone = curproc->p_zone;
struct vfs *list;
vfs_list_read_lock();
if (getzoneid() == GLOBAL_ZONEID) {
/*
* The global zone may see filesystems in any zone.
*/
vfsp = rootvfs->vfs_prev;
do {
if (strcmp(refstr_value(vfsp->vfs_mntpt), mp) == 0) {
retvfsp = vfsp;
break;
}
vfsp = vfsp->vfs_prev;
} while (vfsp != rootvfs->vfs_prev);
} else if ((list = zone->zone_vfslist) != NULL) {
const char *mntpt;
vfsp = list->vfs_zone_prev;
do {
mntpt = refstr_value(vfsp->vfs_mntpt);
mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
if (strcmp(mntpt, mp) == 0) {
retvfsp = vfsp;
break;
}
vfsp = vfsp->vfs_zone_prev;
} while (vfsp != list->vfs_zone_prev);
}
if (retvfsp)
VFS_HOLD(retvfsp);
vfs_list_unlock();
return (retvfsp);
}
/*
* Search the vfs list for a specified vfsops.
* if vfs entry is found then return 1, else 0.
*/
int
vfs_opsinuse(vfsops_t *ops)
{
struct vfs *vfsp;
int found;
vfs_list_read_lock();
vfsp = rootvfs;
found = 0;
do {
if (vfs_getops(vfsp) == ops) {
found = 1;
break;
}
vfsp = vfsp->vfs_next;
} while (vfsp != rootvfs);
vfs_list_unlock();
return (found);
}
/*
* Allocate an entry in vfssw for a file system type
*/
struct vfssw *
allocate_vfssw(const char *type)
{
struct vfssw *vswp;
if (type[0] == '\0' || strlen(type) + 1 > _ST_FSTYPSZ) {
/*
* The vfssw table uses the empty string to identify an
* available entry; we cannot add any type which has
* a leading NUL. The string length is limited to
* the size of the st_fstype array in struct stat.
*/
return (NULL);
}
ASSERT(VFSSW_WRITE_LOCKED());
for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++)
if (!ALLOCATED_VFSSW(vswp)) {
vswp->vsw_name = kmem_alloc(strlen(type) + 1, KM_SLEEP);
(void) strcpy(vswp->vsw_name, type);
ASSERT(vswp->vsw_count == 0);
vswp->vsw_count = 1;
mutex_init(&vswp->vsw_lock, NULL, MUTEX_DEFAULT, NULL);
return (vswp);
}
return (NULL);
}
/*
* Impose additional layer of translation between vfstype names
* and module names in the filesystem.
*/
static const char *
vfs_to_modname(const char *vfstype)
{
if (strcmp(vfstype, "proc") == 0) {
vfstype = "procfs";
} else if (strcmp(vfstype, "fd") == 0) {
vfstype = "fdfs";
} else if (strncmp(vfstype, "nfs", 3) == 0) {
vfstype = "nfs";
}
return (vfstype);
}
/*
* Find a vfssw entry given a file system type name.
* Try to autoload the filesystem if it's not found.
* If it's installed, return the vfssw locked to prevent unloading.
*/
struct vfssw *
vfs_getvfssw(const char *type)
{
struct vfssw *vswp;
const char *modname;
RLOCK_VFSSW();
vswp = vfs_getvfsswbyname(type);
modname = vfs_to_modname(type);
if (rootdir == NULL) {
/*
* If we haven't yet loaded the root file system, then our
* _init won't be called until later. Allocate vfssw entry,
* because mod_installfs won't be called.
*/
if (vswp == NULL) {
RUNLOCK_VFSSW();
WLOCK_VFSSW();
if ((vswp = vfs_getvfsswbyname(type)) == NULL) {
if ((vswp = allocate_vfssw(type)) == NULL) {
WUNLOCK_VFSSW();
return (NULL);
}
}
WUNLOCK_VFSSW();
RLOCK_VFSSW();
}
if (!VFS_INSTALLED(vswp)) {
RUNLOCK_VFSSW();
(void) modloadonly("fs", modname);
} else
RUNLOCK_VFSSW();
return (vswp);
}
/*
* Try to load the filesystem. Before calling modload(), we drop
* our lock on the VFS switch table, and pick it up after the
* module is loaded. However, there is a potential race: the
* module could be unloaded after the call to modload() completes
* but before we pick up the lock and drive on. Therefore,
* we keep reloading the module until we've loaded the module
* _and_ we have the lock on the VFS switch table.
*/
while (vswp == NULL || !VFS_INSTALLED(vswp)) {
RUNLOCK_VFSSW();
if (modload("fs", modname) == -1)
return (NULL);
RLOCK_VFSSW();
if (vswp == NULL)
if ((vswp = vfs_getvfsswbyname(type)) == NULL)
break;
}
RUNLOCK_VFSSW();
return (vswp);
}
/*
* Find a vfssw entry given a file system type name.
*/
struct vfssw *
vfs_getvfsswbyname(const char *type)
{
struct vfssw *vswp;
ASSERT(VFSSW_LOCKED());
if (type == NULL || *type == '\0')
return (NULL);
for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
if (strcmp(type, vswp->vsw_name) == 0) {
vfs_refvfssw(vswp);
return (vswp);
}
}
return (NULL);
}
/*
* Find a vfssw entry given a set of vfsops.
*/
struct vfssw *
vfs_getvfsswbyvfsops(vfsops_t *vfsops)
{
struct vfssw *vswp;
RLOCK_VFSSW();
for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
if (ALLOCATED_VFSSW(vswp) && &vswp->vsw_vfsops == vfsops) {
vfs_refvfssw(vswp);
RUNLOCK_VFSSW();
return (vswp);
}
}
RUNLOCK_VFSSW();
return (NULL);
}
/*
* Reference a vfssw entry.
*/
void
vfs_refvfssw(struct vfssw *vswp)
{
mutex_enter(&vswp->vsw_lock);
vswp->vsw_count++;
mutex_exit(&vswp->vsw_lock);
}
/*
* Unreference a vfssw entry.
*/
void
vfs_unrefvfssw(struct vfssw *vswp)
{
mutex_enter(&vswp->vsw_lock);
vswp->vsw_count--;
mutex_exit(&vswp->vsw_lock);
}
int sync_timeout = 30; /* timeout for syncing a page during panic */
int sync_timeleft; /* portion of sync_timeout remaining */
static int sync_retries = 20; /* number of retries when not making progress */
static int sync_triesleft; /* portion of sync_retries remaining */
static pgcnt_t old_pgcnt, new_pgcnt;
static int new_bufcnt, old_bufcnt;
/*
* Sync all of the mounted filesystems, and then wait for the actual i/o to
* complete. We wait by counting the number of dirty pages and buffers,
* pushing them out using bio_busy() and page_busy(), and then counting again.
* This routine is used during both the uadmin A_SHUTDOWN code as well as
* the SYNC phase of the panic code (see comments in panic.c). It should only
* be used after some higher-level mechanism has quiesced the system so that
* new writes are not being initiated while we are waiting for completion.
*
* To ensure finite running time, our algorithm uses two timeout mechanisms:
* sync_timeleft (a timer implemented by the omnipresent deadman() cyclic), and
* sync_triesleft (a progress counter used by the vfs_syncall() loop below).
* Together these ensure that syncing completes if our i/o paths are stuck.
* The counters are declared above so they can be found easily in the debugger.
*
* The sync_timeleft counter is reset by bio_busy() and page_busy() using the
* vfs_syncprogress() subroutine whenever we make progress through the lists of
* pages and buffers. It is decremented and expired by the deadman() cyclic.
* When vfs_syncall() decides it is done, we disable the deadman() counter by
* setting sync_timeleft to zero. This timer guards against vfs_syncall()
* deadlocking or hanging inside of a broken filesystem or driver routine.
*
* The sync_triesleft counter is updated by vfs_syncall() itself. If we make
* sync_retries consecutive calls to bio_busy() and page_busy() without
* decreasing either the number of dirty buffers or dirty pages below the
* lowest count we have seen so far, we give up and return from vfs_syncall().
*
* Each loop iteration ends with a call to delay() one second to allow time for
* i/o completion and to permit the user time to read our progress messages.
*/
void
vfs_syncall(void)
{
if (rootdir == NULL && !modrootloaded)
return; /* panic during boot - no filesystems yet */
printf("syncing file systems...");
vfs_syncprogress();
sync();
vfs_syncprogress();
sync_triesleft = sync_retries;
old_bufcnt = new_bufcnt = INT_MAX;
old_pgcnt = new_pgcnt = ULONG_MAX;
while (sync_triesleft > 0) {
old_bufcnt = MIN(old_bufcnt, new_bufcnt);
old_pgcnt = MIN(old_pgcnt, new_pgcnt);
new_bufcnt = bio_busy(B_TRUE);
new_pgcnt = page_busy(B_TRUE);
vfs_syncprogress();
if (new_bufcnt == 0 && new_pgcnt == 0)
break;
if (new_bufcnt < old_bufcnt || new_pgcnt < old_pgcnt)
sync_triesleft = sync_retries;
else
sync_triesleft--;
if (new_bufcnt)
printf(" [%d]", new_bufcnt);
if (new_pgcnt)
printf(" %lu", new_pgcnt);
delay(hz);
}
if (new_bufcnt != 0 || new_pgcnt != 0)
printf(" done (not all i/o completed)\n");
else
printf(" done\n");
sync_timeleft = 0;
delay(hz);
}
/*
* If we are in the middle of the sync phase of panic, reset sync_timeleft to
* sync_timeout to indicate that we are making progress and the deadman()
* omnipresent cyclic should not yet time us out. Note that it is safe to
* store to sync_timeleft here since the deadman() is firing at high-level
* on top of us. If we are racing with the deadman(), either the deadman()
* will decrement the old value and then we will reset it, or we will
* reset it and then the deadman() will immediately decrement it. In either
* case, correct behavior results.
*/
void
vfs_syncprogress(void)
{
if (panicstr)
sync_timeleft = sync_timeout;
}
/*
* Map VFS flags to statvfs flags. These shouldn't really be separate
* flags at all.
*/
uint_t
vf_to_stf(uint_t vf)
{
uint_t stf = 0;
if (vf & VFS_RDONLY)
stf |= ST_RDONLY;
if (vf & VFS_NOSETUID)
stf |= ST_NOSUID;
if (vf & VFS_NOTRUNC)
stf |= ST_NOTRUNC;
return (stf);
}
/*
* Entries for (illegal) fstype 0.
*/
/* ARGSUSED */
int
vfsstray_sync(struct vfs *vfsp, short arg, struct cred *cr)
{
cmn_err(CE_PANIC, "stray vfs operation");
return (0);
}
/*
* Entries for (illegal) fstype 0.
*/
int
vfsstray(void)
{
cmn_err(CE_PANIC, "stray vfs operation");
return (0);
}
/*
* Support for dealing with forced UFS unmount and its interaction with
* LOFS. Could be used by any filesystem.
* See bug 1203132.
*/
int
vfs_EIO(void)
{
return (EIO);
}
/*
* We've gotta define the op for sync separately, since the compiler gets
* confused if we mix and match ANSI and normal style prototypes when
* a "short" argument is present and spits out a warning.
*/
/*ARGSUSED*/
int
vfs_EIO_sync(struct vfs *vfsp, short arg, struct cred *cr)
{
return (EIO);
}
vfs_t EIO_vfs;
vfsops_t *EIO_vfsops;
/*
* Called from startup() to initialize all loaded vfs's
*/
void
vfsinit(void)
{
struct vfssw *vswp;
int error;
extern int vopstats_enabled;
extern void vopstats_startup();
static const fs_operation_def_t EIO_vfsops_template[] = {
VFSNAME_MOUNT, { .error = vfs_EIO },
VFSNAME_UNMOUNT, { .error = vfs_EIO },
VFSNAME_ROOT, { .error = vfs_EIO },
VFSNAME_STATVFS, { .error = vfs_EIO },
VFSNAME_SYNC, { .vfs_sync = vfs_EIO_sync },
VFSNAME_VGET, { .error = vfs_EIO },
VFSNAME_MOUNTROOT, { .error = vfs_EIO },
VFSNAME_FREEVFS, { .error = vfs_EIO },
VFSNAME_VNSTATE, { .error = vfs_EIO },
NULL, NULL
};
static const fs_operation_def_t stray_vfsops_template[] = {
VFSNAME_MOUNT, { .error = vfsstray },
VFSNAME_UNMOUNT, { .error = vfsstray },
VFSNAME_ROOT, { .error = vfsstray },
VFSNAME_STATVFS, { .error = vfsstray },
VFSNAME_SYNC, { .vfs_sync = vfsstray_sync },
VFSNAME_VGET, { .error = vfsstray },
VFSNAME_MOUNTROOT, { .error = vfsstray },
VFSNAME_FREEVFS, { .error = vfsstray },
VFSNAME_VNSTATE, { .error = vfsstray },
NULL, NULL
};
/* Create vfs cache */
vfs_cache = kmem_cache_create("vfs_cache", sizeof (struct vfs),
sizeof (uintptr_t), NULL, NULL, NULL, NULL, NULL, 0);
/* Initialize the vnode cache (file systems may use it during init). */
vn_create_cache();
/* Setup event monitor framework */
fem_init();
/* Initialize the dummy stray file system type. */
error = vfs_setfsops(0, stray_vfsops_template, NULL);
/* Initialize the dummy EIO file system. */
error = vfs_makefsops(EIO_vfsops_template, &EIO_vfsops);
if (error != 0) {
cmn_err(CE_WARN, "vfsinit: bad EIO vfs ops template");
/* Shouldn't happen, but not bad enough to panic */
}
VFS_INIT(&EIO_vfs, EIO_vfsops, (caddr_t)NULL);
/*
* Default EIO_vfs.vfs_flag to VFS_UNMOUNTED so a lookup
* on this vfs can immediately notice it's invalid.
*/
EIO_vfs.vfs_flag |= VFS_UNMOUNTED;
/*
* Call the init routines of non-loadable filesystems only.
* Filesystems which are loaded as separate modules will be
* initialized by the module loading code instead.
*/
for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
RLOCK_VFSSW();
if (vswp->vsw_init != NULL)
(*vswp->vsw_init)(vswp - vfssw, vswp->vsw_name);
RUNLOCK_VFSSW();
}
vopstats_startup();
if (vopstats_enabled) {
/* EIO_vfs can collect stats, but we don't retrieve them */
initialize_vopstats(&EIO_vfs.vfs_vopstats);
EIO_vfs.vfs_fstypevsp = NULL;
EIO_vfs.vfs_vskap = NULL;
EIO_vfs.vfs_flag |= VFS_STATS;
}
xattr_init();
}
vfs_t *
vfs_alloc(int kmflag)
{
vfs_t *vfsp;
vfsp = kmem_cache_alloc(vfs_cache, kmflag);
/*
* Do the simplest initialization here.
* Everything else gets done in vfs_init()
*/
bzero(vfsp, sizeof (vfs_t));
return (vfsp);
}
void
vfs_free(vfs_t *vfsp)
{
/*
* One would be tempted to assert that "vfsp->vfs_count == 0".
* The problem is that this gets called out of domount() with
* a partially initialized vfs and a vfs_count of 1. This is
* also called from vfs_rele() with a vfs_count of 0. We can't
* call VFS_RELE() from domount() if VFS_MOUNT() hasn't successfully
* returned. This is because VFS_MOUNT() fully initializes the
* vfs structure and its associated data. VFS_RELE() will call
* VFS_FREEVFS() which may panic the system if the data structures
* aren't fully initialized from a successful VFS_MOUNT()).
*/
/* If FEM was in use, make sure everything gets cleaned up */
if (vfsp->vfs_femhead) {
ASSERT(vfsp->vfs_femhead->femh_list == NULL);
mutex_destroy(&vfsp->vfs_femhead->femh_lock);
kmem_free(vfsp->vfs_femhead, sizeof (*(vfsp->vfs_femhead)));
vfsp->vfs_femhead = NULL;
}
if (vfsp->vfs_implp)
vfsimpl_teardown(vfsp);
sema_destroy(&vfsp->vfs_reflock);
kmem_cache_free(vfs_cache, vfsp);
}
/*
* Increments the vfs reference count by one atomically.
*/
void
vfs_hold(vfs_t *vfsp)
{
atomic_add_32(&vfsp->vfs_count, 1);
ASSERT(vfsp->vfs_count != 0);
}
/*
* Decrements the vfs reference count by one atomically. When
* vfs reference count becomes zero, it calls the file system
* specific vfs_freevfs() to free up the resources.
*/
void
vfs_rele(vfs_t *vfsp)
{
ASSERT(vfsp->vfs_count != 0);
if (atomic_add_32_nv(&vfsp->vfs_count, -1) == 0) {
VFS_FREEVFS(vfsp);
lofi_remove(vfsp);
if (vfsp->vfs_zone)
zone_rele(vfsp->vfs_zone);
vfs_freemnttab(vfsp);
vfs_free(vfsp);
}
}
/*
* Generic operations vector support.
*
* This is used to build operations vectors for both the vfs and vnode.
* It's normally called only when a file system is loaded.
*
* There are many possible algorithms for this, including the following:
*
* (1) scan the list of known operations; for each, see if the file system
* includes an entry for it, and fill it in as appropriate.
*
* (2) set up defaults for all known operations. scan the list of ops
* supplied by the file system; for each which is both supplied and
* known, fill it in.
*
* (3) sort the lists of known ops & supplied ops; scan the list, filling
* in entries as we go.
*
* we choose (1) for simplicity, and because performance isn't critical here.
* note that (2) could be sped up using a precomputed hash table on known ops.
* (3) could be faster than either, but only if the lists were very large or
* supplied in sorted order.
*
*/
int
fs_build_vector(void *vector, int *unused_ops,
const fs_operation_trans_def_t *translation,
const fs_operation_def_t *operations)
{
int i, num_trans, num_ops, used;
/*
* Count the number of translations and the number of supplied
* operations.
*/
{
const fs_operation_trans_def_t *p;
for (num_trans = 0, p = translation;
p->name != NULL;
num_trans++, p++)
;
}
{
const fs_operation_def_t *p;
for (num_ops = 0, p = operations;
p->name != NULL;
num_ops++, p++)
;
}
/* Walk through each operation known to our caller. There will be */
/* one entry in the supplied "translation table" for each. */
used = 0;
for (i = 0; i < num_trans; i++) {
int j, found;
char *curname;
fs_generic_func_p result;
fs_generic_func_p *location;
curname = translation[i].name;
/* Look for a matching operation in the list supplied by the */
/* file system. */
found = 0;
for (j = 0; j < num_ops; j++) {
if (strcmp(operations[j].name, curname) == 0) {
used++;
found = 1;
break;
}
}
/*
* If the file system is using a "placeholder" for default
* or error functions, grab the appropriate function out of
* the translation table. If the file system didn't supply
* this operation at all, use the default function.
*/
if (found) {
result = operations[j].func.fs_generic;
if (result == fs_default) {
result = translation[i].defaultFunc;
} else if (result == fs_error) {
result = translation[i].errorFunc;
} else if (result == NULL) {
/* Null values are PROHIBITED */
return (EINVAL);
}
} else {
result = translation[i].defaultFunc;
}
/* Now store the function into the operations vector. */
location = (fs_generic_func_p *)
(((char *)vector) + translation[i].offset);
*location = result;
}
*unused_ops = num_ops - used;
return (0);
}
/* Placeholder functions, should never be called. */
int
fs_error(void)
{
cmn_err(CE_PANIC, "fs_error called");
return (0);
}
int
fs_default(void)
{
cmn_err(CE_PANIC, "fs_default called");
return (0);
}
#ifdef __sparc
/*
* Part of the implementation of booting off a mirrored root
* involves a change of dev_t for the root device. To
* accomplish this, first remove the existing hash table
* entry for the root device, convert to the new dev_t,
* then re-insert in the hash table at the head of the list.
*/
void
vfs_root_redev(vfs_t *vfsp, dev_t ndev, int fstype)
{
vfs_list_lock();
vfs_hash_remove(vfsp);
vfsp->vfs_dev = ndev;
vfs_make_fsid(&vfsp->vfs_fsid, ndev, fstype);
vfs_hash_add(vfsp, 1);
vfs_list_unlock();
}
#else /* x86 NEWBOOT */
#if defined(__x86)
extern int hvmboot_rootconf();
#endif /* __x86 */
int
rootconf()
{
int error;
struct vfssw *vsw;
extern void pm_init();
char *fstyp, *fsmod;
getrootfs(&fstyp, &fsmod);
#if defined(__x86)
/*
* hvmboot_rootconf() is defined in the hvm_bootstrap misc module,
* which lives in /platform/i86hvm, and hence is only available when
* booted in an x86 hvm environment. If the hvm_bootstrap misc module
* is not available then the modstub for this function will return 0.
* If the hvm_bootstrap misc module is available it will be loaded
* and hvmboot_rootconf() will be invoked.
*/
if (error = hvmboot_rootconf())
return (error);
#endif /* __x86 */
if (error = clboot_rootconf())
return (error);
if (modload("fs", fsmod) == -1)
panic("Cannot _init %s module", fsmod);
RLOCK_VFSSW();
vsw = vfs_getvfsswbyname(fstyp);
RUNLOCK_VFSSW();
if (vsw == NULL) {
cmn_err(CE_CONT, "Cannot find %s filesystem\n", fstyp);
return (ENXIO);
}
VFS_INIT(rootvfs, &vsw->vsw_vfsops, 0);
VFS_HOLD(rootvfs);
/* always mount readonly first */
rootvfs->vfs_flag |= VFS_RDONLY;
pm_init();
if (netboot)
(void) strplumb();
error = VFS_MOUNTROOT(rootvfs, ROOT_INIT);
vfs_unrefvfssw(vsw);
rootdev = rootvfs->vfs_dev;
if (error)
cmn_err(CE_CONT, "Cannot mount root on %s fstype %s\n",
rootfs.bo_name, fstyp);
else
cmn_err(CE_CONT, "?root on %s fstype %s\n",
rootfs.bo_name, fstyp);
return (error);
}
/*
* XXX this is called by nfs only and should probably be removed
* If booted with ASKNAME, prompt on the console for a filesystem
* name and return it.
*/
void
getfsname(char *askfor, char *name, size_t namelen)
{
if (boothowto & RB_ASKNAME) {
printf("%s name: ", askfor);
console_gets(name, namelen);
}
}
/*
* Init the root filesystem type (rootfs.bo_fstype) from the "fstype"
* property.
*
* Filesystem types starting with the prefix "nfs" are diskless clients;
* init the root filename name (rootfs.bo_name), too.
*
* If we are booting via NFS we currently have these options:
* nfs - dynamically choose NFS V2, V3, or V4 (default)
* nfs2 - force NFS V2
* nfs3 - force NFS V3
* nfs4 - force NFS V4
* Because we need to maintain backward compatibility with the naming
* convention that the NFS V2 filesystem name is "nfs" (see vfs_conf.c)
* we need to map "nfs" => "nfsdyn" and "nfs2" => "nfs". The dynamic
* nfs module will map the type back to either "nfs", "nfs3", or "nfs4".
* This is only for root filesystems, all other uses such as cachefs
* will expect that "nfs" == NFS V2.
*/
static void
getrootfs(char **fstypp, char **fsmodp)
{
extern char *strplumb_get_netdev_path(void);
char *propstr = NULL;
/*
* Check fstype property; for diskless it should be one of "nfs",
* "nfs2", "nfs3" or "nfs4".
*/
if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
DDI_PROP_DONTPASS, "fstype", &propstr)
== DDI_SUCCESS) {
(void) strncpy(rootfs.bo_fstype, propstr, BO_MAXFSNAME);
ddi_prop_free(propstr);
/*
* if the boot property 'fstype' is not set, but 'zfs-bootfs' is set,
* assume the type of this root filesystem is 'zfs'.
*/
} else if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
DDI_PROP_DONTPASS, "zfs-bootfs", &propstr)
== DDI_SUCCESS) {
(void) strncpy(rootfs.bo_fstype, "zfs", BO_MAXFSNAME);
ddi_prop_free(propstr);
}
if (strncmp(rootfs.bo_fstype, "nfs", 3) != 0) {
*fstypp = *fsmodp = rootfs.bo_fstype;
return;
}
++netboot;
if (strcmp(rootfs.bo_fstype, "nfs2") == 0)
(void) strcpy(rootfs.bo_fstype, "nfs");
else if (strcmp(rootfs.bo_fstype, "nfs") == 0)
(void) strcpy(rootfs.bo_fstype, "nfsdyn");
/*
* check if path to network interface is specified in bootpath
* or by a hypervisor domain configuration file.
* XXPV - enable strlumb_get_netdev_path()
*/
if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), DDI_PROP_DONTPASS,
"xpv-nfsroot")) {
(void) strcpy(rootfs.bo_name, "/xpvd/xnf@0");
} else if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
DDI_PROP_DONTPASS, "bootpath", &propstr)
== DDI_SUCCESS) {
(void) strncpy(rootfs.bo_name, propstr, BO_MAXOBJNAME);
ddi_prop_free(propstr);
} else {
/* attempt to determine netdev_path via boot_mac address */
netdev_path = strplumb_get_netdev_path();
if (netdev_path == NULL)
panic("cannot find boot network interface");
(void) strncpy(rootfs.bo_name, netdev_path, BO_MAXOBJNAME);
}
*fstypp = rootfs.bo_fstype;
*fsmodp = "nfs";
}
#endif
/*
* VFS feature routines
*/
#define VFTINDEX(feature) (((feature) >> 32) & 0xFFFFFFFF)
#define VFTBITS(feature) ((feature) & 0xFFFFFFFFLL)
/* Register a feature in the vfs */
void
vfs_set_feature(vfs_t *vfsp, vfs_feature_t feature)
{
/* Note that vfs_featureset[] is found in *vfsp->vfs_implp */
if (vfsp->vfs_implp == NULL)
return;
vfsp->vfs_featureset[VFTINDEX(feature)] |= VFTBITS(feature);
}
/*
* Query a vfs for a feature.
* Returns 1 if feature is present, 0 if not
*/
int
vfs_has_feature(vfs_t *vfsp, vfs_feature_t feature)
{
int ret = 0;
/* Note that vfs_featureset[] is found in *vfsp->vfs_implp */
if (vfsp->vfs_implp == NULL)
return (ret);
if (vfsp->vfs_featureset[VFTINDEX(feature)] & VFTBITS(feature))
ret = 1;
return (ret);
}
/*
* Propagate feature set from one vfs to another
*/
void
vfs_propagate_features(vfs_t *from, vfs_t *to)
{
int i;
if (to->vfs_implp == NULL || from->vfs_implp == NULL)
return;
for (i = 1; i <= to->vfs_featureset[0]; i++) {
to->vfs_featureset[i] = from->vfs_featureset[i];
}
}
#define LOFICTL_PATH "/devices/pseudo/lofi@0:%d"
/*
* Return the vnode for the lofi node if there's a lofi mount in place.
* Returns -1 when there's no lofi node, 0 on success, and > 0 on
* failure.
*/
int
vfs_get_lofi(vfs_t *vfsp, vnode_t **vpp)
{
char *path = NULL;
int strsize;
int err;
if (vfsp->vfs_lofi_minor == 0) {
*vpp = NULL;
return (-1);
}
strsize = snprintf(NULL, 0, LOFICTL_PATH, vfsp->vfs_lofi_minor);
path = kmem_alloc(strsize + 1, KM_SLEEP);
(void) snprintf(path, strsize + 1, LOFICTL_PATH, vfsp->vfs_lofi_minor);
err = lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, vpp);
if (err)
*vpp = NULL;
kmem_free(path, strsize + 1);
return (err);
}