mntvnops.c revision a51518b114948c25adf55d3b4393ccc00ba2b5b8
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/atomic.h>
#include <sys/mntio.h>
#include <sys/mnttab.h>
#include <sys/mount.h>
#include <sys/sunddi.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/vfs.h>
#include <sys/vfs_opreg.h>
#include <sys/fs/mntdata.h>
#include <fs/fs_subr.h>
#include <sys/vmsystm.h>
#include <vm/seg_vn.h>
#define MNTROOTINO 2
static mntnode_t *mntgetnode(vnode_t *);
vnodeops_t *mntvnodeops;
vnodeops_t *mntdummyvnodeops;
extern struct vnode *mntdummyvp;
/*
* Design of kernel mnttab accounting.
*
* To support whitespace in mount names, we implement an ioctl
* (MNTIOC_GETMNTENT) which allows a programmatic interface to the data in
* /etc/mnttab. The libc functions getmntent() and getextmntent() are built
* atop this interface.
*
* To minimize the amount of memory used in the kernel, we keep all the
* necessary information in the user's address space. Large server
* configurations can have /etc/mnttab files in excess of 64k.
*
* To support both vanilla read() calls as well as ioctl() calls, we have two
* different snapshots of the kernel data structures, mnt_read and mnt_ioctl.
* These snapshots include the base location in user memory, the number of
* mounts in the snapshot, and any metadata associated with it. The metadata is
* used only to support the ioctl() interface, and is a series of extmnttab
* structures. When the user issues an ioctl(), we simply copyout a pointer to
* that structure, and the rest is handled in userland.
*/
/*
* NOTE: The following variable enables the generation of the "dev=xxx"
* in the option string for a mounted file system. Really this should
* be gotten rid of altogether, but for the sake of backwards compatibility
* we had to leave it in. It is defined as a 32-bit device number. This
* means that when 64-bit device numbers are in use, if either the major or
* minor part of the device number will not fit in a 16 bit quantity, the
* "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and
* 1999/131 for details. The cmpldev() function used to generate the 32-bit
* device number handles this check and assigns the proper value.
*/
int mntfs_enabledev = 1; /* enable old "dev=xxx" option */
static int
mntfs_devsize(struct vfs *vfsp)
{
dev32_t odev;
(void) cmpldev(&odev, vfsp->vfs_dev);
return (snprintf(NULL, 0, "dev=%x", odev));
}
static int
mntfs_devprint(struct vfs *vfsp, char *buf)
{
dev32_t odev;
(void) cmpldev(&odev, vfsp->vfs_dev);
return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev));
}
static int
mntfs_optsize(struct vfs *vfsp)
{
int i, size = 0;
mntopt_t *mop;
for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) {
mop = &vfsp->vfs_mntopts.mo_list[i];
if (mop->mo_flags & MO_NODISPLAY)
continue;
if (mop->mo_flags & MO_SET) {
if (size)
size++; /* space for comma */
size += strlen(mop->mo_name);
/*
* count option value if there is one
*/
if (mop->mo_arg != NULL) {
size += strlen(mop->mo_arg) + 1;
}
}
}
if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) {
/*
* Add space for "zone=<zone_name>" if required.
*/
if (size)
size++; /* space for comma */
size += sizeof ("zone=") - 1;
size += strlen(vfsp->vfs_zone->zone_name);
}
if (mntfs_enabledev) {
if (size != 0)
size++; /* space for comma */
size += mntfs_devsize(vfsp);
}
if (size == 0)
size = strlen("-");
return (size);
}
static int
mntfs_optprint(struct vfs *vfsp, char *buf)
{
int i, optinbuf = 0;
mntopt_t *mop;
char *origbuf = buf;
for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) {
mop = &vfsp->vfs_mntopts.mo_list[i];
if (mop->mo_flags & MO_NODISPLAY)
continue;
if (mop->mo_flags & MO_SET) {
if (optinbuf)
*buf++ = ',';
else
optinbuf = 1;
buf += snprintf(buf, MAX_MNTOPT_STR,
"%s", mop->mo_name);
/*
* print option value if there is one
*/
if (mop->mo_arg != NULL) {
buf += snprintf(buf, MAX_MNTOPT_STR, "=%s",
mop->mo_arg);
}
}
}
if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) {
if (optinbuf)
*buf++ = ',';
else
optinbuf = 1;
buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s",
vfsp->vfs_zone->zone_name);
}
if (mntfs_enabledev) {
if (optinbuf++)
*buf++ = ',';
buf += mntfs_devprint(vfsp, buf);
}
if (!optinbuf) {
buf += snprintf(buf, MAX_MNTOPT_STR, "-");
}
return (buf - origbuf);
}
static size_t
mntfs_vfs_len(vfs_t *vfsp, zone_t *zone)
{
size_t size = 0;
const char *resource, *mntpt;
mntpt = refstr_value(vfsp->vfs_mntpt);
if (mntpt != NULL && mntpt[0] != '\0') {
size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1;
} else {
size += strlen("-") + 1;
}
resource = refstr_value(vfsp->vfs_resource);
if (resource != NULL && resource[0] != '\0') {
if (resource[0] != '/') {
size += strlen(resource) + 1;
} else if (!ZONE_PATH_VISIBLE(resource, zone)) {
/*
* Same as the zone's view of the mount point.
*/
size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1;
} else {
size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1;
}
} else {
size += strlen("-") + 1;
}
size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1;
size += mntfs_optsize(vfsp);
size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime);
return (size);
}
static void
mntfs_zonerootvfs(zone_t *zone, vfs_t *rootvfsp)
{
/*
* Basically copy over the real vfs_t on which the root vnode is
* located, changing its mountpoint and resource to match those of
* the zone's rootpath.
*/
*rootvfsp = *zone->zone_rootvp->v_vfsp;
rootvfsp->vfs_mntpt = refstr_alloc(zone->zone_rootpath);
rootvfsp->vfs_resource = rootvfsp->vfs_mntpt;
}
static size_t
mntfs_zone_len(uint_t *nent_ptr, zone_t *zone, int showhidden)
{
struct vfs *zonelist;
struct vfs *vfsp;
size_t size = 0;
uint_t cnt = 0;
ASSERT(zone->zone_rootpath != NULL);
/*
* If the zone has a root entry, it will be the first in the list. If
* it doesn't, we conjure one up.
*/
vfsp = zonelist = zone->zone_vfslist;
if (zonelist == NULL ||
strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) {
vfs_t tvfs;
/*
* The root of the zone is not a mount point. The vfs we want
* to report is that of the zone's root vnode.
*/
ASSERT(zone != global_zone);
mntfs_zonerootvfs(zone, &tvfs);
size += mntfs_vfs_len(&tvfs, zone);
refstr_rele(tvfs.vfs_mntpt);
cnt++;
}
if (zonelist == NULL)
goto out;
do {
/*
* Skip mounts that should not show up in mnttab
*/
if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) {
vfsp = vfsp->vfs_zone_next;
continue;
}
cnt++;
size += mntfs_vfs_len(vfsp, zone);
vfsp = vfsp->vfs_zone_next;
} while (vfsp != zonelist);
out:
*nent_ptr = cnt;
return (size);
}
static size_t
mntfs_global_len(uint_t *nent_ptr, int showhidden)
{
struct vfs *vfsp;
size_t size = 0;
uint_t cnt = 0;
vfsp = rootvfs;
do {
/*
* Skip mounts that should not show up in mnttab
*/
if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) {
vfsp = vfsp->vfs_next;
continue;
}
cnt++;
size += mntfs_vfs_len(vfsp, global_zone);
vfsp = vfsp->vfs_next;
} while (vfsp != rootvfs);
*nent_ptr = cnt;
return (size);
}
static void
mntfs_vfs_generate(vfs_t *vfsp, zone_t *zone, struct extmnttab *tab,
char **basep, int forread)
{
const char *resource, *mntpt;
char *cp = *basep;
mntpt = refstr_value(vfsp->vfs_mntpt);
resource = refstr_value(vfsp->vfs_resource);
if (tab)
tab->mnt_special = cp;
if (resource != NULL && resource[0] != '\0') {
if (resource[0] != '/') {
cp += snprintf(cp, MAXPATHLEN, "%s", resource);
} else if (!ZONE_PATH_VISIBLE(resource, zone)) {
/*
* Use the mount point as the resource.
*/
cp += snprintf(cp, MAXPATHLEN, "%s",
ZONE_PATH_TRANSLATE(mntpt, zone));
} else {
cp += snprintf(cp, MAXPATHLEN, "%s",
ZONE_PATH_TRANSLATE(resource, zone));
}
} else {
cp += snprintf(cp, MAXPATHLEN, "-");
}
*cp++ = forread ? '\t' : '\0';
if (tab)
tab->mnt_mountp = cp;
if (mntpt != NULL && mntpt[0] != '\0') {
/*
* We know the mount point is visible from within the zone,
* otherwise it wouldn't be on the zone's vfs list.
*/
cp += snprintf(cp, MAXPATHLEN, "%s",
ZONE_PATH_TRANSLATE(mntpt, zone));
} else {
cp += snprintf(cp, MAXPATHLEN, "-");
}
*cp++ = forread ? '\t' : '\0';
if (tab)
tab->mnt_fstype = cp;
cp += snprintf(cp, MAXPATHLEN, "%s",
vfssw[vfsp->vfs_fstype].vsw_name);
*cp++ = forread ? '\t' : '\0';
if (tab)
tab->mnt_mntopts = cp;
cp += mntfs_optprint(vfsp, cp);
*cp++ = forread ? '\t' : '\0';
if (tab)
tab->mnt_time = cp;
cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime);
*cp++ = forread ? '\n' : '\0';
if (tab) {
tab->mnt_major = getmajor(vfsp->vfs_dev);
tab->mnt_minor = getminor(vfsp->vfs_dev);
}
*basep = cp;
}
static void
mntfs_zone_generate(zone_t *zone, int showhidden, struct extmnttab *tab,
char *basep, int forread)
{
vfs_t *zonelist;
vfs_t *vfsp;
char *cp = basep;
/*
* If the zone has a root entry, it will be the first in the list. If
* it doesn't, we conjure one up.
*/
vfsp = zonelist = zone->zone_vfslist;
if (zonelist == NULL ||
strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) {
vfs_t tvfs;
/*
* The root of the zone is not a mount point. The vfs we want
* to report is that of the zone's root vnode.
*/
ASSERT(zone != global_zone);
mntfs_zonerootvfs(zone, &tvfs);
mntfs_vfs_generate(&tvfs, zone, tab, &cp, forread);
refstr_rele(tvfs.vfs_mntpt);
if (tab)
tab++;
}
if (zonelist == NULL)
return;
do {
/*
* Skip mounts that should not show up in mnttab
*/
if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) {
vfsp = vfsp->vfs_zone_next;
continue;
}
mntfs_vfs_generate(vfsp, zone, tab, &cp, forread);
if (tab)
tab++;
vfsp = vfsp->vfs_zone_next;
} while (vfsp != zonelist);
}
static void
mntfs_global_generate(int showhidden, struct extmnttab *tab, char *basep,
int forread)
{
vfs_t *vfsp;
char *cp = basep;
vfsp = rootvfs;
do {
/*
* Skip mounts that should not show up in mnttab
*/
if (!showhidden && vfsp->vfs_flag & VFS_NOMNTTAB) {
vfsp = vfsp->vfs_next;
continue;
}
mntfs_vfs_generate(vfsp, global_zone, tab, &cp, forread);
if (tab)
tab++;
vfsp = vfsp->vfs_next;
} while (vfsp != rootvfs);
}
static char *
mntfs_mapin(char *base, size_t size)
{
size_t rlen = roundup(size, PAGESIZE);
struct as *as = curproc->p_as;
char *addr;
as_rangelock(as);
map_addr(&addr, rlen, 0, 1, 0);
if (addr == NULL || as_map(as, addr, rlen, segvn_create, zfod_argsp)) {
as_rangeunlock(as);
return (NULL);
}
as_rangeunlock(as);
if (copyout(base, addr, size)) {
(void) as_unmap(as, addr, rlen);
return (NULL);
}
return (addr);
}
static void
mntfs_freesnap(mntsnap_t *snap)
{
if (snap->mnts_text != NULL)
(void) as_unmap(curproc->p_as, snap->mnts_text,
roundup(snap->mnts_textsize, PAGESIZE));
snap->mnts_textsize = snap->mnts_count = 0;
if (snap->mnts_metadata != NULL)
(void) as_unmap(curproc->p_as, snap->mnts_metadata,
roundup(snap->mnts_metasize, PAGESIZE));
snap->mnts_metasize = 0;
}
#ifdef _SYSCALL32_IMPL
typedef struct extmnttab32 {
uint32_t mnt_special;
uint32_t mnt_mountp;
uint32_t mnt_fstype;
uint32_t mnt_mntopts;
uint32_t mnt_time;
uint_t mnt_major;
uint_t mnt_minor;
} extmnttab32_t;
#endif
/*
* called to generate a dummy read vop call so that
* any module monitoring /etc/mnttab for access gets notified.
*/
static void
mntdummyreadop()
{
struct uio uio;
struct iovec iov;
char tbuf[1];
if (mntdummyvp == NULL)
return;
/*
* Make a VOP_READ call on the dummy vnode so that any
* module interested in mnttab getting modified could
* intercept this vnode and capture the event.
*
* Pass a dummy uio struct. Nobody should reference the buffer.
* We need to pass a valid uio struct pointer to take care of
* any module intercepting this vnode which could attempt to
* look at it. Currently only the file events notification
* module intercepts this vnode.
*/
bzero(&uio, sizeof (uio));
bzero(&iov, sizeof (iov));
iov.iov_base = tbuf;
iov.iov_len = 0;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_loffset = 0;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_resid = 0;
(void) VOP_READ(mntdummyvp, &uio, 0, kcred, NULL);
}
/*
* Snapshot the latest version of the kernel mounted resource information
*
* There are two types of snapshots: one destined for reading, and one destined
* for ioctl(). The difference is that the ioctl() interface is delimited by
* NULLs, while the read() interface is delimited by tabs and newlines.
*/
/* ARGSUSED */
static int
mntfs_snapshot(mntnode_t *mnp, int forread, int datamodel)
{
size_t size;
timespec_t lastmodt;
mntdata_t *mntdata = MTOD(mnp);
zone_t *zone = mntdata->mnt_zone;
boolean_t global_view = (MTOD(mnp)->mnt_zone == global_zone);
boolean_t showhidden = ((mnp->mnt_flags & MNT_SHOWHIDDEN) != 0);
struct extmnttab *metadata_baseaddr;
char *text_baseaddr;
int i;
mntsnap_t *snap;
if (forread)
snap = &mnp->mnt_read;
else
snap = &mnp->mnt_ioctl;
vfs_list_read_lock();
/*
* Check if the mnttab info has changed since the last snapshot
*/
vfs_mnttab_modtime(&lastmodt);
if (snap->mnts_count &&
lastmodt.tv_sec == snap->mnts_time.tv_sec &&
lastmodt.tv_nsec == snap->mnts_time.tv_nsec) {
vfs_list_unlock();
return (0);
}
if (snap->mnts_count != 0)
mntfs_freesnap(snap);
if (global_view)
size = mntfs_global_len(&snap->mnts_count, showhidden);
else
size = mntfs_zone_len(&snap->mnts_count, zone, showhidden);
ASSERT(size != 0);
if (!forread)
metadata_baseaddr = kmem_alloc(
snap->mnts_count * sizeof (struct extmnttab), KM_SLEEP);
else
metadata_baseaddr = NULL;
text_baseaddr = kmem_alloc(size, KM_SLEEP);
if (global_view)
mntfs_global_generate(showhidden, metadata_baseaddr,
text_baseaddr, forread);
else
mntfs_zone_generate(zone, showhidden,
metadata_baseaddr, text_baseaddr, forread);
vfs_mnttab_modtime(&snap->mnts_time);
vfs_list_unlock();
snap->mnts_text = mntfs_mapin(text_baseaddr, size);
snap->mnts_textsize = size;
kmem_free(text_baseaddr, size);
/*
* The pointers in the metadata refer to addreesses in the range
* [base_addr, base_addr + size]. Now that we have mapped the text into
* the user's address space, we have to convert these addresses into the
* new (user) range. We also handle the conversion for 32-bit and
* 32-bit applications here.
*/
if (!forread) {
struct extmnttab *tab;
#ifdef _SYSCALL32_IMPL
struct extmnttab32 *tab32;
if (datamodel == DATAMODEL_ILP32) {
tab = (struct extmnttab *)metadata_baseaddr;
tab32 = (struct extmnttab32 *)metadata_baseaddr;
for (i = 0; i < snap->mnts_count; i++) {
tab32[i].mnt_special =
(uintptr_t)snap->mnts_text +
(tab[i].mnt_special - text_baseaddr);
tab32[i].mnt_mountp =
(uintptr_t)snap->mnts_text +
(tab[i].mnt_mountp - text_baseaddr);
tab32[i].mnt_fstype =
(uintptr_t)snap->mnts_text +
(tab[i].mnt_fstype - text_baseaddr);
tab32[i].mnt_mntopts =
(uintptr_t)snap->mnts_text +
(tab[i].mnt_mntopts - text_baseaddr);
tab32[i].mnt_time = (uintptr_t)snap->mnts_text +
(tab[i].mnt_time - text_baseaddr);
tab32[i].mnt_major = tab[i].mnt_major;
tab32[i].mnt_minor = tab[i].mnt_minor;
}
snap->mnts_metasize =
snap->mnts_count * sizeof (struct extmnttab32);
snap->mnts_metadata = mntfs_mapin(
(char *)metadata_baseaddr,
snap->mnts_metasize);
} else {
#endif
tab = (struct extmnttab *)metadata_baseaddr;
for (i = 0; i < snap->mnts_count; i++) {
tab[i].mnt_special = snap->mnts_text +
(tab[i].mnt_special - text_baseaddr);
tab[i].mnt_mountp = snap->mnts_text +
(tab[i].mnt_mountp - text_baseaddr);
tab[i].mnt_fstype = snap->mnts_text +
(tab[i].mnt_fstype - text_baseaddr);
tab[i].mnt_mntopts = snap->mnts_text +
(tab[i].mnt_mntopts - text_baseaddr);
tab[i].mnt_time = snap->mnts_text +
(tab[i].mnt_time - text_baseaddr);
}
snap->mnts_metasize =
snap->mnts_count * sizeof (struct extmnttab);
snap->mnts_metadata = mntfs_mapin(
(char *)metadata_baseaddr, snap->mnts_metasize);
#ifdef _SYSCALL32_IMPL
}
#endif
kmem_free(metadata_baseaddr,
snap->mnts_count * sizeof (struct extmnttab));
}
mntdata->mnt_size = size;
if (snap->mnts_text == NULL ||
(!forread && snap->mnts_metadata == NULL)) {
mntfs_freesnap(snap);
return (ENOMEM);
}
mntdummyreadop();
return (0);
}
/*
* Public function to convert vfs_mntopts into a string.
* A buffer of sufficient size is allocated, which is returned via bufp,
* and whose length is returned via lenp.
*/
void
mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp)
{
size_t len;
char *buf;
vfs_list_read_lock();
len = mntfs_optsize(vfsp) + 1;
buf = kmem_alloc(len, KM_NOSLEEP);
if (buf == NULL) {
*bufp = NULL;
vfs_list_unlock();
return;
}
buf[len - 1] = '\0';
(void) mntfs_optprint(vfsp, buf);
ASSERT(buf[len - 1] == '\0');
vfs_list_unlock();
*bufp = buf;
*lenp = len;
}
/* ARGSUSED */
static int
mntopen(vnode_t **vpp, int flag, cred_t *cr)
{
vnode_t *vp = *vpp;
mntnode_t *nmnp;
/*
* Not allowed to open for writing, return error.
*/
if (flag & FWRITE)
return (EPERM);
/*
* Create a new mnt/vnode for each open, this will give us a handle to
* hang the snapshot on.
*/
nmnp = mntgetnode(vp);
*vpp = MTOV(nmnp);
atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1);
VN_RELE(vp);
return (0);
}
/* ARGSUSED */
static int
mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
{
mntnode_t *mnp = VTOM(vp);
/* Clean up any locks or shares held by the current process */
cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
cleanshares(vp, ttoproc(curthread)->p_pid);
if (count > 1)
return (0);
if (vp->v_count == 1) {
mntfs_freesnap(&mnp->mnt_read);
mntfs_freesnap(&mnp->mnt_ioctl);
atomic_add_32(&MTOD(mnp)->mnt_nopen, -1);
}
return (0);
}
/* ARGSUSED */
static int
mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct)
{
int error = 0;
off_t off = uio->uio_offset;
size_t len = uio->uio_resid;
mntnode_t *mnp = VTOM(vp);
char *buf;
mntsnap_t *snap = &mnp->mnt_read;
int datamodel;
if (off == (off_t)0 || snap->mnts_count == 0) {
/*
* It is assumed that any kernel callers wishing
* to read mnttab will be using extmnttab entries
* and not extmnttab32 entries, whether or not
* the kernel is LP64 or ILP32. Thus, force the
* datamodel that mntfs_snapshot uses to be
* DATAMODEL_LP64.
*/
if (uio->uio_segflg == UIO_SYSSPACE)
datamodel = DATAMODEL_LP64;
else
datamodel = get_udatamodel();
if ((error = mntfs_snapshot(mnp, 1, datamodel)) != 0)
return (error);
}
if ((size_t)(off + len) > snap->mnts_textsize)
len = snap->mnts_textsize - off;
if (off < 0 || len > snap->mnts_textsize)
return (EFAULT);
if (len == 0)
return (0);
/*
* The mnttab image is stored in the user's address space,
* so we have to copy it into the kernel from userland,
* then copy it back out to the specified address.
*/
buf = kmem_alloc(len, KM_SLEEP);
if (copyin(snap->mnts_text + off, buf, len))
error = EFAULT;
else {
error = uiomove(buf, len, UIO_READ, uio);
}
kmem_free(buf, len);
mntdummyreadop();
return (error);
}
static int
mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
{
mntnode_t *mnp = VTOM(vp);
int error;
vnode_t *rvp;
extern timespec_t vfs_mnttab_ctime;
mntdata_t *mntdata = MTOD(VTOM(vp));
mntsnap_t *snap = mnp->mnt_read.mnts_count ?
&mnp->mnt_read : &mnp->mnt_ioctl;
/*
* Return all the attributes. Should be refined
* so that it returns only those asked for.
* Most of this is complete fakery anyway.
*/
rvp = mnp->mnt_mountvp;
/*
* Attributes are same as underlying file with modifications
*/
if (error = VOP_GETATTR(rvp, vap, flags, cr))
return (error);
/*
* We always look like a regular file
*/
vap->va_type = VREG;
/*
* mode should basically be read only
*/
vap->va_mode &= 07444;
vap->va_fsid = vp->v_vfsp->vfs_dev;
vap->va_blksize = DEV_BSIZE;
vap->va_rdev = 0;
vap->va_seq = 0;
/*
* Set nlink to the number of open vnodes for mnttab info
* plus one for existing.
*/
vap->va_nlink = mntdata->mnt_nopen + 1;
/*
* If we haven't taken a snapshot yet, set the
* size to the size of the latest snapshot.
*/
vap->va_size = snap->mnts_textsize ? snap->mnts_textsize :
mntdata->mnt_size;
/*
* Fetch mtime from the vfs mnttab timestamp
*/
vap->va_ctime = vfs_mnttab_ctime;
vfs_list_read_lock();
vfs_mnttab_modtime(&vap->va_mtime);
vap->va_atime = vap->va_mtime;
vfs_list_unlock();
/*
* Nodeid is always ROOTINO;
*/
vap->va_nodeid = (ino64_t)MNTROOTINO;
vap->va_nblocks = btod(vap->va_size);
return (0);
}
static int
mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr)
{
mntnode_t *mnp = VTOM(vp);
if (mode & (VWRITE|VEXEC))
return (EROFS);
/*
* Do access check on the underlying directory vnode.
*/
return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr));
}
/*
* New /mntfs vnode required; allocate it and fill in most of the fields.
*/
static mntnode_t *
mntgetnode(vnode_t *dp)
{
mntnode_t *mnp;
vnode_t *vp;
mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP);
mnp->mnt_vnode = vn_alloc(KM_SLEEP);
mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp;
vp = MTOV(mnp);
vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
vn_setops(vp, mntvnodeops);
vp->v_vfsp = dp->v_vfsp;
vp->v_type = VREG;
vp->v_data = (caddr_t)mnp;
return (mnp);
}
/*
* Free the storage obtained from mntgetnode().
*/
static void
mntfreenode(mntnode_t *mnp)
{
vnode_t *vp = MTOV(mnp);
vn_invalid(vp);
vn_free(vp);
kmem_free(mnp, sizeof (*mnp));
}
/* ARGSUSED */
static int
mntfsync(vnode_t *vp, int syncflag, cred_t *cr)
{
return (0);
}
/* ARGSUSED */
static void
mntinactive(vnode_t *vp, cred_t *cr)
{
mntnode_t *mnp = VTOM(vp);
mntfreenode(mnp);
}
/* ARGSUSED */
static int
mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp)
{
if (*noffp == 0)
VTOM(vp)->mnt_offset = 0;
return (0);
}
/*
* Return the answer requested to poll().
* POLLRDBAND will return when the mtime of the mnttab
* information is newer than the latest one read for this open.
*/
/* ARGSUSED */
static int
mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp)
{
mntnode_t *mnp = VTOM(vp);
mntsnap_t *snap = &mnp->mnt_read;
if (mnp->mnt_ioctl.mnts_time.tv_sec > snap->mnts_time.tv_sec ||
(mnp->mnt_ioctl.mnts_time.tv_sec == snap->mnts_time.tv_sec &&
mnp->mnt_ioctl.mnts_time.tv_nsec > snap->mnts_time.tv_nsec))
snap = &mnp->mnt_ioctl;
*revp = 0;
*phpp = (pollhead_t *)NULL;
if (ev & POLLIN)
*revp |= POLLIN;
if (ev & POLLRDNORM)
*revp |= POLLRDNORM;
if (ev & POLLRDBAND) {
vfs_mnttab_poll(&snap->mnts_time, phpp);
if (*phpp == (pollhead_t *)NULL)
*revp |= POLLRDBAND;
}
if (*revp || *phpp != NULL || any) {
return (0);
}
/*
* If someone is polling an unsupported poll events (e.g.
* POLLOUT, POLLPRI, etc.), just return POLLERR revents.
* That way we will ensure that we don't return a 0
* revents with a NULL pollhead pointer.
*/
*revp = POLLERR;
return (0);
}
/* ARGSUSED */
static int
mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag,
cred_t *cr, int *rvalp)
{
uint_t *up = (uint_t *)arg;
mntnode_t *mnp = VTOM(vp);
mntsnap_t *snap = &mnp->mnt_ioctl;
int error;
error = 0;
switch (cmd) {
case MNTIOC_NMNTS: { /* get no. of mounted resources */
if (snap->mnts_count == 0) {
if ((error =
mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0)
return (error);
}
if (suword32(up, snap->mnts_count) != 0)
error = EFAULT;
break;
}
case MNTIOC_GETDEVLIST: { /* get mounted device major/minor nos */
uint_t *devlist;
int i;
size_t len;
if (snap->mnts_count == 0) {
if ((error =
mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0)
return (error);
}
len = 2 * snap->mnts_count * sizeof (uint_t);
devlist = kmem_alloc(len, KM_SLEEP);
for (i = 0; i < snap->mnts_count; i++) {
#ifdef _SYSCALL32_IMPL
if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) {
struct extmnttab32 tab;
if ((error = xcopyin(snap->mnts_text +
i * sizeof (struct extmnttab32), &tab,
sizeof (tab))) != 0)
break;
devlist[i*2] = tab.mnt_major;
devlist[i*2+1] = tab.mnt_minor;
} else {
#endif
struct extmnttab tab;
if ((error = xcopyin(snap->mnts_text +
i * sizeof (struct extmnttab), &tab,
sizeof (tab))) != 0)
break;
devlist[i*2] = tab.mnt_major;
devlist[i*2+1] = tab.mnt_minor;
#ifdef _SYSCALL32_IMPL
}
#endif
}
if (error == 0)
error = xcopyout(devlist, up, len);
kmem_free(devlist, len);
break;
}
case MNTIOC_SETTAG: /* set tag on mounted file system */
case MNTIOC_CLRTAG: /* clear tag on mounted file system */
{
struct mnttagdesc *dp = (struct mnttagdesc *)arg;
STRUCT_DECL(mnttagdesc, tagdesc);
char *cptr;
uint32_t major, minor;
char tagbuf[MAX_MNTOPT_TAG];
char *pbuf;
size_t len;
uint_t start = 0;
mntdata_t *mntdata = MTOD(mnp);
zone_t *zone = mntdata->mnt_zone;
STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK);
if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) {
error = EFAULT;
break;
}
pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
if (zone != global_zone) {
(void) strcpy(pbuf, zone->zone_rootpath);
/* truncate "/" and nul */
start = zone->zone_rootpathlen - 2;
ASSERT(pbuf[start] == '/');
}
cptr = STRUCT_FGETP(tagdesc, mtd_mntpt);
error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len);
if (error) {
kmem_free(pbuf, MAXPATHLEN);
break;
}
if (start != 0 && pbuf[start] != '/') {
kmem_free(pbuf, MAXPATHLEN);
error = EINVAL;
break;
}
cptr = STRUCT_FGETP(tagdesc, mtd_tag);
if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) {
kmem_free(pbuf, MAXPATHLEN);
break;
}
major = STRUCT_FGET(tagdesc, mtd_major);
minor = STRUCT_FGET(tagdesc, mtd_minor);
if (cmd == MNTIOC_SETTAG)
error = vfs_settag(major, minor, pbuf, tagbuf, cr);
else
error = vfs_clrtag(major, minor, pbuf, tagbuf, cr);
kmem_free(pbuf, MAXPATHLEN);
break;
}
case MNTIOC_SHOWHIDDEN:
{
mutex_enter(&vp->v_lock);
mnp->mnt_flags |= MNT_SHOWHIDDEN;
mutex_exit(&vp->v_lock);
break;
}
case MNTIOC_GETMNTENT:
{
size_t idx;
uintptr_t addr;
idx = mnp->mnt_offset;
if (snap->mnts_count == 0 || idx == 0) {
if ((error =
mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0)
return (error);
}
/*
* If the next index is beyond the end of the current mnttab,
* return EOF
*/
if (idx >= snap->mnts_count) {
*rvalp = 1;
return (0);
}
#ifdef _SYSCALL32_IMPL
if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) {
addr = (uintptr_t)(snap->mnts_metadata + idx *
sizeof (struct extmnttab32));
error = suword32((void *)arg, addr);
} else {
#endif
addr = (uintptr_t)(snap->mnts_metadata + idx *
sizeof (struct extmnttab));
error = sulword((void *)arg, addr);
#ifdef _SYSCALL32_IMPL
}
#endif
if (error != 0)
return (error);
mnp->mnt_offset++;
break;
}
default:
error = EINVAL;
break;
}
return (error);
}
/* ARGSUSED */
static int
mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
caller_context_t *ct)
{
return (0);
}
/* ARGSUSED */
static int
mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
caller_context_t *ct)
{
return (0);
}
/*
* /mntfs vnode operations vector
*/
const fs_operation_def_t mnt_vnodeops_template[] = {
VOPNAME_OPEN, { .vop_open = mntopen },
VOPNAME_CLOSE, { .vop_close = mntclose },
VOPNAME_READ, { .vop_read = mntread },
VOPNAME_IOCTL, { .vop_ioctl = mntioctl },
VOPNAME_GETATTR, { .vop_getattr = mntgetattr },
VOPNAME_ACCESS, { .vop_access = mntaccess },
VOPNAME_FSYNC, { .vop_fsync = mntfsync },
VOPNAME_INACTIVE, { .vop_inactive = mntinactive },
VOPNAME_SEEK, { .vop_seek = mntseek },
VOPNAME_POLL, { .vop_poll = mntpoll },
VOPNAME_DISPOSE, { .error = fs_error },
VOPNAME_SHRLOCK, { .error = fs_error },
NULL, NULL
};
const fs_operation_def_t mnt_dummyvnodeops_template[] = {
VOPNAME_READ, { .vop_read = mntdummyread },
VOPNAME_WRITE, { .vop_write = mntdummywrite },
VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
NULL, NULL
};