devfs_vfsops.c revision da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* This is the device filesystem.
*
* It is a combination of a namer to drive autoconfiguration,
* plus the access methods for the device drivers of the system.
*
* The prototype is fairly dependent on specfs for the latter part
* of its implementation, though a final version would integrate the two.
*/
#include <sys/types.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/time.h>
#include <sys/pathname.h>
#include <sys/vfs.h>
#include <sys/vfs_opreg.h>
#include <sys/vnode.h>
#include <sys/stat.h>
#include <sys/uio.h>
#include <sys/stat.h>
#include <sys/errno.h>
#include <sys/cmn_err.h>
#include <sys/cred.h>
#include <sys/statvfs.h>
#include <sys/mount.h>
#include <sys/debug.h>
#include <sys/modctl.h>
#include <fs/fs_subr.h>
#include <sys/fs/dv_node.h>
#include <sys/fs/snode.h>
#include <sys/sunndi.h>
#include <sys/policy.h>
#include <sys/sunmdi.h>
/*
* devfs vfs operations.
*/
static int devfs_mount(struct vfs *, struct vnode *, struct mounta *,
struct cred *);
static int devfs_unmount(struct vfs *, int, struct cred *);
static int devfs_root(struct vfs *, struct vnode **);
static int devfs_statvfs(struct vfs *, struct statvfs64 *);
static int devfs_mountroot(struct vfs *, enum whymountroot);
static int devfsinit(int, char *);
static vfsdef_t devfs_vfssw = {
VFSDEF_VERSION,
"devfs", /* type name string */
devfsinit, /* init routine */
0, /* flags */
NULL /* mount options table prototype */
};
static kmutex_t devfs_lock; /* protects global data */
static int devfstype; /* fstype */
static dev_t devfsdev; /* the fictious 'device' we live on */
static struct devfs_data *devfs_mntinfo; /* linked list of instances */
/*
* Module linkage information
*/
static struct modlfs modlfs = {
&mod_fsops, "devices filesystem %I%", &devfs_vfssw
};
static struct modlinkage modlinkage = {
MODREV_1, (void *)&modlfs, NULL
};
int
_init(void)
{
int e;
mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL);
dv_node_cache_init();
if ((e = mod_install(&modlinkage)) != 0) {
dv_node_cache_fini();
mutex_destroy(&devfs_lock);
return (e);
}
dcmn_err(("devfs loaded\n"));
return (0);
}
int
_fini(void)
{
return (EBUSY);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
/*ARGSUSED1*/
static int
devfsinit(int fstype, char *name)
{
static const fs_operation_def_t devfs_vfsops_template[] = {
VFSNAME_MOUNT, { .vfs_mount = devfs_mount },
VFSNAME_UNMOUNT, { .vfs_unmount = devfs_unmount },
VFSNAME_ROOT, { .vfs_root = devfs_root },
VFSNAME_STATVFS, { .vfs_statvfs = devfs_statvfs },
VFSNAME_SYNC, { .vfs_sync = fs_sync },
VFSNAME_MOUNTROOT, { .vfs_mountroot = devfs_mountroot },
NULL, NULL
};
int error;
int dev;
extern major_t getudev(void); /* gack - what a function */
devfstype = fstype;
/*
* Associate VFS ops vector with this fstype
*/
error = vfs_setfsops(fstype, devfs_vfsops_template, NULL);
if (error != 0) {
cmn_err(CE_WARN, "devfsinit: bad vfs ops template");
return (error);
}
error = vn_make_ops("dev fs", dv_vnodeops_template, &dv_vnodeops);
if (error != 0) {
(void) vfs_freevfsops_by_type(fstype);
cmn_err(CE_WARN, "devfsinit: bad vnode ops template");
return (error);
}
/*
* Invent a dev_t (sigh).
*/
if ((dev = getudev()) == (major_t)-1) {
cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name);
dev = 0;
}
devfsdev = makedevice(dev, 0);
return (0);
}
/*
* The name of the mount point and the name of the attribute
* filesystem are passed down from userland for now.
*/
static int
devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
struct cred *cr)
{
struct devfs_data *devfs_data;
struct vnode *avp;
struct dv_node *dv;
struct vattr va;
dcmn_err(("devfs_mount\n"));
if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
return (EPERM);
/*
* check that the mount point is sane
*/
if (mvp->v_type != VDIR)
return (ENOTDIR);
ASSERT(uap->flags & MS_SYSSPACE);
/*
* Devfs can only be mounted from kernel during boot.
* avp is the existing /devices, the same as the mount point.
*/
avp = mvp;
/*
* Create and initialize the vfs-private data.
* This includes a hand-crafted root vnode (we build
* this here mostly so that traverse() doesn't sleep
* in VFS_ROOT()).
*/
mutex_enter(&devfs_lock);
ASSERT(devfs_mntinfo == NULL);
dv = dv_mkroot(vfsp, devfsdev);
dv->dv_attrvp = avp; /* attribute root vp */
ASSERT(dv == dv->dv_dotdot);
devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP);
devfs_data->devfs_vfsp = vfsp;
devfs_data->devfs_root = dv;
vfsp->vfs_data = (caddr_t)devfs_data;
vfsp->vfs_fstype = devfstype;
vfsp->vfs_dev = devfsdev;
vfsp->vfs_bsize = DEV_BSIZE;
vfsp->vfs_mtime = ddi_get_time();
vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype);
/* We're there. */
devfs_mntinfo = devfs_data;
mutex_exit(&devfs_lock);
va.va_mask = AT_ATIME|AT_MTIME;
gethrestime(&va.va_atime);
gethrestime(&va.va_mtime);
(void) VOP_SETATTR(DVTOV(dv), &va, 0, cr, NULL);
return (0);
}
/*
* We never unmount devfs in a real production system.
*/
/*ARGSUSED*/
static int
devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr)
{
return (EBUSY);
}
/*
* return root vnode for given vfs
*/
static int
devfs_root(struct vfs *vfsp, struct vnode **vpp)
{
dcmn_err(("devfs_root\n"));
*vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root);
VN_HOLD(*vpp);
return (0);
}
/*
* return 'generic superblock' information to userland.
*
* not much that we can usefully admit to here
*/
static int
devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
{
extern kmem_cache_t *dv_node_cache;
dev32_t d32;
dcmn_err(("devfs_statvfs\n"));
bzero(sbp, sizeof (*sbp));
sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
/*
* We could compute the number of devfsnodes here .. but since
* it's dynamic anyway, it's not clear how useful this is.
*/
sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc");
/* no illusions that free/avail files is relevant to devfs */
sbp->f_ffree = 0;
sbp->f_favail = 0;
/* no illusions that blocks are relevant to devfs */
sbp->f_bfree = 0;
sbp->f_bavail = 0;
sbp->f_blocks = 0;
(void) cmpldev(&d32, vfsp->vfs_dev);
sbp->f_fsid = d32;
(void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name);
sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
sbp->f_namemax = MAXNAMELEN - 1;
(void) strcpy(sbp->f_fstr, "devices");
return (0);
}
/*
* devfs always mount after root is mounted, so this should never
* be invoked.
*/
/*ARGSUSED*/
static int
devfs_mountroot(struct vfs *vfsp, enum whymountroot why)
{
dcmn_err(("devfs_mountroot\n"));
return (EINVAL);
}
struct dv_node *
devfs_dip_to_dvnode(dev_info_t *dip)
{
char *dirpath;
struct vnode *dirvp;
ASSERT(dip != NULL);
/* no-op if devfs not mounted yet */
if (devfs_mntinfo == NULL)
return (NULL);
/*
* The lookupname below only looks up cached dv_nodes
* because devfs_clean_key is set in thread specific data.
*/
dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
(void) ddi_pathname(dip, dirpath);
if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) {
dcmn_err(("directory %s not found\n", dirpath));
kmem_free(dirpath, MAXPATHLEN);
return (NULL);
}
kmem_free(dirpath, MAXPATHLEN);
return (VTODV(dirvp));
}
/*
* If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root
* and not a vHCI we also need to clean any vHCI branches because they
* may contain pHCI nodes. A detach_node() of a pHCI will fail if its
* mdi_devi_offline() fails, and the mdi_devi_offline() of the last
* pHCI will fail unless an ndi_devi_offline() of the Client nodes under
* the vHCI is successful - which requires a clean vHCI branch to removed
* the devi_refs associated with devfs vnodes.
*/
static int
devfs_clean_vhci(dev_info_t *dip, void *args)
{
struct dv_node *dvp;
uint_t flags = (uint_t)(uintptr_t)args;
(void) tsd_set(devfs_clean_key, (void *)1);
dvp = devfs_dip_to_dvnode(dip);
if (dvp) {
(void) dv_cleandir(dvp, NULL, flags);
VN_RELE(DVTOV(dvp));
}
(void) tsd_set(devfs_clean_key, NULL);
return (DDI_WALK_CONTINUE);
}
/*
* devfs_clean()
*
* Destroy unreferenced dv_node's and detach devices.
*
* devfs_clean will try its best to clean up unused nodes. It is
* no longer valid to assume that just because devfs_clean fails,
* the device is not removable. This is because device contracts
* can result in userland processes releasing a device during the
* device offline process in the kernel. Thus it is no longer
* correct to fail an offline just because devfs_clean finds
* referenced dv_nodes. To enforce this, devfs_clean() always
* returns success i.e. 0.
*
* devfs_clean() may return before removing all possible nodes if
* we cannot acquire locks in areas of the code where potential for
* deadlock exists (see comments in dv_find() and dv_cleandir() for
* examples of this).
*
* devfs caches unreferenced dv_node to speed by the performance
* of ls, find, etc. devfs_clean() is invoked to cleanup cached
* dv_nodes to reclaim memory as well as to facilitate device
* removal (dv_node reference devinfo nodes, which prevents driver
* detach).
*
* If a shell parks in a /devices directory, the dv_node will be
* held, preventing the corresponding device to be detached.
* This would be a denial of service against DR. To prevent this,
* DR code calls devfs_clean() with the DV_CLEAN_FORCE flag.
* The dv_cleandir() implementation does the right thing to ensure
* successful DR.
*/
int
devfs_clean(dev_info_t *dip, char *devnm, uint_t flags)
{
struct dv_node *dvp;
dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x",
(void *)dip, flags));
/* avoid recursion back into the device tree */
(void) tsd_set(devfs_clean_key, (void *)1);
dvp = devfs_dip_to_dvnode(dip);
if (dvp == NULL) {
(void) tsd_set(devfs_clean_key, NULL);
return (0);
}
(void) dv_cleandir(dvp, devnm, flags);
(void) tsd_set(devfs_clean_key, NULL);
VN_RELE(DVTOV(dvp));
/*
* If we are doing a DV_CLEAN_FORCE, and we did not start at the
* root, and we did not start at a vHCI node then clean vHCI
* branches too. Failure to clean vHCI branch does not cause EBUSY.
*
* Also, to accommodate nexus callers that clean 'self' to DR 'child'
* (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch
* above fails - this prevents a busy DR 'child' sibling from causing
* the DR of 'child' to fail because a vHCI branch was not cleaned.
*/
if ((flags & DV_CLEAN_FORCE) && (dip != ddi_root_node()) &&
(mdi_component_is_vhci(dip, NULL) != MDI_SUCCESS)) {
/*
* NOTE: for backport the following is recommended
* (void) devfs_clean_vhci(scsi_vhci_dip,
* (void *)(uintptr_t)flags);
*/
mdi_walk_vhcis(devfs_clean_vhci, (void *)(uintptr_t)flags);
}
return (0);
}
/*
* lookup a devfs relative pathname, returning held vnodes for the final
* component and the containing directory (if requested).
*
* NOTE: We can't use lookupname because this would use the current
* processes credentials (CRED) in the call lookuppnvp instead
* of kcred. It also does not give you the flexibility so
* specify the directory to start the resolution in (devicesdir).
*/
int
devfs_lookupname(
char *pathname, /* user pathname */
vnode_t **dirvpp, /* ret for ptr to parent dir vnode */
vnode_t **compvpp) /* ret for ptr to component vnode */
{
struct pathname pn;
int error;
ASSERT(devicesdir); /* devfs must be initialized */
ASSERT(pathname); /* must have some path */
if (error = pn_get(pathname, UIO_SYSSPACE, &pn))
return (error);
/* make the path relative to /devices. */
pn_skipslash(&pn);
if (pn_pathleft(&pn) == 0) {
/* all we had was "\0" or "/" (which skipslash skiped) */
if (dirvpp)
*dirvpp = NULL;
if (compvpp) {
VN_HOLD(devicesdir);
*compvpp = devicesdir;
}
} else {
/*
* Use devfs lookup to resolve pathname to the vnode for
* the device via relative lookup in devfs. Extra holds for
* using devicesdir as directory we are searching and for
* being our root without being == rootdir.
*/
VN_HOLD(devicesdir);
VN_HOLD(devicesdir);
error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp,
devicesdir, devicesdir, kcred);
}
pn_free(&pn);
return (error);
}
/*
* Given a devfs path (without the /devices prefix), walk
* the dv_node sub-tree rooted at the path.
*/
int
devfs_walk(
char *path,
void (*callback)(struct dv_node *, void *),
void *arg)
{
char *dirpath, *devnm;
struct vnode *dirvp;
ASSERT(path && callback);
if (*path != '/' || devfs_mntinfo == NULL)
return (ENXIO);
dcmn_err(("devfs_walk: path = %s", path));
dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
(void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path);
devnm = strrchr(dirpath, '/');
ASSERT(devnm);
*devnm++ = '\0';
if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) {
dcmn_err(("directory %s not found\n", dirpath));
kmem_free(dirpath, MAXPATHLEN);
return (ENXIO);
}
/*
* if path == "/", visit the root dv_node
*/
if (*devnm == '\0') {
callback(VTODV(dirvp), arg);
devnm = NULL;
}
dv_walk(VTODV(dirvp), devnm, callback, arg);
VN_RELE(dirvp);
kmem_free(dirpath, MAXPATHLEN);
return (0);
}
int
devfs_devpolicy(vnode_t *vp, devplcy_t **dpp)
{
struct vnode *rvp;
struct dv_node *dvp;
int rval = -1;
/* fail if devfs not mounted yet */
if (devfs_mntinfo == NULL)
return (rval);
if (VOP_REALVP(vp, &rvp, NULL) == 0 && vn_matchops(rvp, dv_vnodeops)) {
dvp = VTODV(rvp);
rw_enter(&dvp->dv_contents, RW_READER);
if (dvp->dv_priv) {
dphold(dvp->dv_priv);
*dpp = dvp->dv_priv;
rval = 0;
}
rw_exit(&dvp->dv_contents);
}
return (rval);
}