ufs_filio.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/systm.h>
#include <sys/sysmacros.h>
#include <sys/resource.h>
#include <sys/signal.h>
#include <sys/cred.h>
#include <sys/user.h>
#include <sys/buf.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/disp.h>
#include <sys/file.h>
#include <sys/fcntl.h>
#include <sys/flock.h>
#include <sys/kmem.h>
#include <sys/uio.h>
#include <sys/conf.h>
#include <sys/mman.h>
#include <sys/pathname.h>
#include <sys/debug.h>
#include <sys/vmmeter.h>
#include <sys/vmsystm.h>
#include <sys/cmn_err.h>
#include <sys/vtrace.h>
#include <sys/filio.h>
#include <sys/dnlc.h>
#include <sys/fs/ufs_filio.h>
#include <sys/fs/ufs_lockfs.h>
#include <sys/fs/ufs_fs.h>
#include <sys/fs/ufs_inode.h>
#include <sys/fs/ufs_fsdir.h>
#include <sys/fs/ufs_quota.h>
#include <sys/fs/ufs_trans.h>
#include <sys/fs/ufs_log.h>
#include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */
#include <sys/errno.h>
#include <sys/sysinfo.h>
#include <vm/hat.h>
#include <vm/page.h>
#include <vm/pvn.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_map.h>
#include <vm/seg_vn.h>
#include <vm/rm.h>
#include <sys/swap.h>
#include <sys/model.h>
#include <sys/policy.h>
#include "fs/fs_subr.h"
/*
* ufs_fioio is the ufs equivalent of NFS_CNVT and is tailored to
* metamucil's needs. It may change at any time.
*/
/* ARGSUSED */
int
ufs_fioio(
struct vnode *vp, /* any file on the fs */
struct fioio *fiou, /* fioio struct in userland */
int flag, /* flag from VOP_IOCTL() */
struct cred *cr) /* credentials from ufs_ioctl */
{
int error = 0;
struct vnode *vpio = NULL; /* vnode for inode open */
struct inode *ipio = NULL; /* inode for inode open */
struct file *fpio = NULL; /* file for inode open */
struct inode *ip; /* inode for file system */
struct fs *fs; /* fs for file system */
STRUCT_DECL(fioio, fio); /* copy of user's fioio struct */
/*
* must be privileged
*/
if (secpolicy_fs_config(cr, vp->v_vfsp) != 0)
return (EPERM);
STRUCT_INIT(fio, flag & DATAMODEL_MASK);
/*
* get user's copy of fioio struct
*/
if (copyin(fiou, STRUCT_BUF(fio), STRUCT_SIZE(fio)))
return (EFAULT);
ip = VTOI(vp);
fs = ip->i_fs;
/*
* check the inode number against the fs's inode number bounds
*/
if (STRUCT_FGET(fio, fio_ino) < UFSROOTINO)
return (ESRCH);
if (STRUCT_FGET(fio, fio_ino) >= fs->fs_ncg * fs->fs_ipg)
return (ESRCH);
rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
/*
* get the inode
*/
error = ufs_iget(ip->i_vfs, STRUCT_FGET(fio, fio_ino), &ipio, cr);
rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
if (error)
return (error);
/*
* check the generation number
*/
rw_enter(&ipio->i_contents, RW_READER);
if (ipio->i_gen != STRUCT_FGET(fio, fio_gen)) {
error = ESTALE;
rw_exit(&ipio->i_contents);
goto errout;
}
/*
* check if the inode is free
*/
if (ipio->i_mode == 0) {
error = ENOENT;
rw_exit(&ipio->i_contents);
goto errout;
}
rw_exit(&ipio->i_contents);
/*
* Adapted from copen: get a file struct
* Large Files: We open this file descriptor with FOFFMAX flag
* set so that it will be like a large file open.
*/
if (falloc(NULL, (FREAD|FOFFMAX), &fpio, STRUCT_FADDR(fio, fio_fd)))
goto errout;
/*
* Adapted from vn_open: check access and then open the file
*/
vpio = ITOV(ipio);
if (error = VOP_ACCESS(vpio, VREAD, 0, cr))
goto errout;
if (error = VOP_OPEN(&vpio, FREAD, cr))
goto errout;
/*
* Adapted from copen: initialize the file struct
*/
fpio->f_vnode = vpio;
/*
* return the fd
*/
if (copyout(STRUCT_BUF(fio), fiou, STRUCT_SIZE(fio))) {
error = EFAULT;
goto errout;
}
setf(STRUCT_FGET(fio, fio_fd), fpio);
mutex_exit(&fpio->f_tlock);
return (0);
errout:
/*
* free the file struct and fd
*/
if (fpio) {
setf(STRUCT_FGET(fio, fio_fd), NULL);
unfalloc(fpio);
}
/*
* release the hold on the inode
*/
if (ipio)
VN_RELE(ITOV(ipio));
return (error);
}
/*
* ufs_fiosatime
* set access time w/o altering change time. This ioctl is tailored
* to metamucil's needs and may change at any time.
*/
int
ufs_fiosatime(
struct vnode *vp, /* file's vnode */
struct timeval *tvu, /* struct timeval in userland */
int flag, /* flag from VOP_IOCTL() */
struct cred *cr) /* credentials from ufs_ioctl */
{
struct inode *ip; /* inode for vp */
struct timeval32 tv; /* copy of user's timeval */
int now = 0;
/*
* must have sufficient privileges
*/
if (secpolicy_fs_config(cr, vp->v_vfsp) != 0)
return (EPERM);
/*
* get user's copy of timeval struct and check values
* if input is NULL, will set time to now
*/
if (tvu == NULL) {
now = 1;
} else {
if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) {
if (copyin(tvu, &tv, sizeof (tv)))
return (EFAULT);
} else {
struct timeval tv64;
if (copyin(tvu, &tv64, sizeof (tv64)))
return (EFAULT);
if (TIMEVAL_OVERFLOW(&tv64))
return (EOVERFLOW);
TIMEVAL_TO_TIMEVAL32(&tv, &tv64);
}
if (tv.tv_usec < 0 || tv.tv_usec >= 1000000)
return (EINVAL);
}
/*
* update access time
*/
ip = VTOI(vp);
rw_enter(&ip->i_contents, RW_WRITER);
ITIMES_NOLOCK(ip);
if (now) {
mutex_enter(&ufs_iuniqtime_lock);
ip->i_atime = iuniqtime;
mutex_exit(&ufs_iuniqtime_lock);
} else {
ip->i_atime = tv;
}
ip->i_flag |= IMODACC;
rw_exit(&ip->i_contents);
return (0);
}
/*
* ufs_fiogdio
* Get delayed-io state. This ioctl is tailored
* to metamucil's needs and may change at any time.
*/
/* ARGSUSED */
int
ufs_fiogdio(
struct vnode *vp, /* file's vnode */
uint_t *diop, /* dio state returned here */
int flag, /* flag from ufs_ioctl */
struct cred *cr) /* credentials from ufs_ioctl */
{
struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs;
/*
* forcibly unmounted
*/
if (ufsvfsp == NULL)
return (EIO);
if (suword32(diop, ufsvfsp->vfs_dio))
return (EFAULT);
return (0);
}
/*
* ufs_fiosdio
* Set delayed-io state. This ioctl is tailored
* to metamucil's needs and may change at any time.
*/
int
ufs_fiosdio(
struct vnode *vp, /* file's vnode */
uint_t *diop, /* dio flag */
int flag, /* flag from ufs_ioctl */
struct cred *cr) /* credentials from ufs_ioctl */
{
uint_t dio; /* copy of user's dio */
struct inode *ip; /* inode for vp */
struct ufsvfs *ufsvfsp;
struct fs *fs;
struct ulockfs *ulp;
int error = 0;
#ifdef lint
flag = flag;
#endif
/* check input conditions */
if (secpolicy_fs_config(cr, vp->v_vfsp) != 0)
return (EPERM);
if (copyin(diop, &dio, sizeof (dio)))
return (EFAULT);
if (dio > 1)
return (EINVAL);
/* file system has been forcibly unmounted */
if (VTOI(vp)->i_ufsvfs == NULL)
return (EIO);
ip = VTOI(vp);
ufsvfsp = ip->i_ufsvfs;
ulp = &ufsvfsp->vfs_ulockfs;
/* logging file system; dio ignored */
if (TRANS_ISTRANS(ufsvfsp))
return (error);
/* hold the mutex to prevent race with a lockfs request */
vfs_lock_wait(vp->v_vfsp);
mutex_enter(&ulp->ul_lock);
if (ULOCKFS_IS_HLOCK(ulp)) {
error = EIO;
goto out;
}
if (ULOCKFS_IS_ELOCK(ulp)) {
error = EBUSY;
goto out;
}
/* wait for outstanding accesses to finish */
if (error = ufs_quiesce(ulp))
goto out;
/* flush w/invalidate */
if (error = ufs_flush(vp->v_vfsp))
goto out;
/*
* update dio
*/
mutex_enter(&ufsvfsp->vfs_lock);
ufsvfsp->vfs_dio = dio;
/*
* enable/disable clean flag processing
*/
fs = ip->i_fs;
if (fs->fs_ronly == 0 &&
fs->fs_clean != FSBAD &&
fs->fs_clean != FSLOG) {
if (dio)
fs->fs_clean = FSSUSPEND;
else
fs->fs_clean = FSACTIVE;
ufs_sbwrite(ufsvfsp);
mutex_exit(&ufsvfsp->vfs_lock);
} else
mutex_exit(&ufsvfsp->vfs_lock);
out:
/*
* we need this broadcast because of the ufs_quiesce call above
*/
cv_broadcast(&ulp->ul_cv);
mutex_exit(&ulp->ul_lock);
vfs_unlock(vp->v_vfsp);
return (error);
}
/*
* ufs_fioffs - ioctl handler for flushing file system
*/
/* ARGSUSED */
int
ufs_fioffs(
struct vnode *vp,
char *vap, /* must be NULL - reserved */
struct cred *cr) /* credentials from ufs_ioctl */
{
int error;
struct ufsvfs *ufsvfsp;
struct ulockfs *ulp;
/* file system has been forcibly unmounted */
ufsvfsp = VTOI(vp)->i_ufsvfs;
if (ufsvfsp == NULL)
return (EIO);
ulp = &ufsvfsp->vfs_ulockfs;
/*
* suspend the delete thread
* this must be done outside the lockfs locking protocol
*/
ufs_thread_suspend(&ufsvfsp->vfs_delete);
vfs_lock_wait(vp->v_vfsp);
/* hold the mutex to prevent race with a lockfs request */
mutex_enter(&ulp->ul_lock);
if (ULOCKFS_IS_HLOCK(ulp)) {
error = EIO;
goto out;
}
if (ULOCKFS_IS_ELOCK(ulp)) {
error = EBUSY;
goto out;
}
/* wait for outstanding accesses to finish */
if (error = ufs_quiesce(ulp))
goto out;
/*
* If logging, and the logmap was marked as not rollable,
* make it rollable now, and start the trans_roll thread and
* the reclaim thread. The log at this point is safe to write to.
*/
if (ufsvfsp->vfs_log) {
ml_unit_t *ul = ufsvfsp->vfs_log;
struct fs *fsp = ufsvfsp->vfs_fs;
int err;
if (ul->un_flags & LDL_NOROLL) {
ul->un_flags &= ~LDL_NOROLL;
logmap_start_roll(ul);
if (!fsp->fs_ronly && (fsp->fs_reclaim &
(FS_RECLAIM|FS_RECLAIMING))) {
fsp->fs_reclaim &= ~FS_RECLAIM;
fsp->fs_reclaim |= FS_RECLAIMING;
ufs_thread_start(&ufsvfsp->vfs_reclaim,
ufs_thread_reclaim,
vp->v_vfsp);
if (!fsp->fs_ronly) {
TRANS_SBWRITE(ufsvfsp,
TOP_SBUPDATE_UPDATE);
if (err =
geterror(ufsvfsp->vfs_bufp)) {
refstr_t *mntpt;
mntpt = vfs_getmntpoint(
vp->v_vfsp);
cmn_err(CE_NOTE,
"Filesystem Flush "
"Failed to update "
"Reclaim Status for "
" %s, Write failed to "
"update superblock, "
"error %d",
refstr_value(mntpt),
err);
refstr_rele(mntpt);
}
}
}
}
}
/* synchronously flush dirty data and metadata */
error = ufs_flush(vp->v_vfsp);
out:
cv_broadcast(&ulp->ul_cv);
mutex_exit(&ulp->ul_lock);
vfs_unlock(vp->v_vfsp);
/*
* allow the delete thread to continue
*/
ufs_thread_continue(&ufsvfsp->vfs_delete);
return (error);
}
/*
* ufs_fioisbusy
* Get number of references on this vnode.
* Contract-private interface for Legato's NetWorker product.
*/
/* ARGSUSED */
int
ufs_fioisbusy(struct vnode *vp, int *isbusy, struct cred *cr)
{
int is_it_busy;
/*
* The caller holds one reference, there may be one in the dnlc
* so we need to flush it.
*/
if (vp->v_count > 1)
dnlc_purge_vp(vp);
/*
* Since we've just flushed the dnlc and we hold a reference
* to this vnode, then anything but 1 means busy (this had
* BETTER not be zero!). Also, it's possible for someone to
* have this file mmap'ed with no additional reference count.
*/
ASSERT(vp->v_count > 0);
if ((vp->v_count == 1) && (VTOI(vp)->i_mapcnt == 0))
is_it_busy = 0;
else
is_it_busy = 1;
if (suword32(isbusy, is_it_busy))
return (EFAULT);
return (0);
}
/* ARGSUSED */
int
ufs_fiodirectio(struct vnode *vp, int cmd, struct cred *cr)
{
int error = 0;
struct inode *ip = VTOI(vp);
/*
* Acquire reader lock and set/reset direct mode
*/
rw_enter(&ip->i_contents, RW_READER);
mutex_enter(&ip->i_tlock);
if (cmd == DIRECTIO_ON)
ip->i_flag |= IDIRECTIO; /* enable direct mode */
else if (cmd == DIRECTIO_OFF)
ip->i_flag &= ~IDIRECTIO; /* disable direct mode */
else
error = EINVAL;
mutex_exit(&ip->i_tlock);
rw_exit(&ip->i_contents);
return (error);
}
/*
* ufs_fiotune
* Allow some tunables to be set on a mounted fs
*/
int
ufs_fiotune(struct vnode *vp, struct fiotune *uftp, struct cred *cr)
{
struct fiotune ftp;
struct fs *fs;
struct ufsvfs *ufsvfsp;
/*
* must have sufficient privileges
*/
if (secpolicy_fs_config(cr, vp->v_vfsp) != 0)
return (EPERM);
/*
* get user's copy
*/
if (copyin(uftp, &ftp, sizeof (ftp)))
return (EFAULT);
/*
* some minimal sanity checks
*/
if ((ftp.maxcontig <= 0) ||
(ftp.rotdelay != 0) ||
(ftp.maxbpg <= 0) ||
(ftp.minfree < 0) ||
(ftp.minfree > 99) ||
((ftp.optim != FS_OPTTIME) && (ftp.optim != FS_OPTSPACE)))
return (EINVAL);
/*
* update superblock but don't write it! If it gets out, fine.
*/
fs = VTOI(vp)->i_fs;
fs->fs_maxcontig = ftp.maxcontig;
fs->fs_rotdelay = ftp.rotdelay;
fs->fs_maxbpg = ftp.maxbpg;
fs->fs_minfree = ftp.minfree;
fs->fs_optim = ftp.optim;
/*
* Adjust cluster based on the new maxcontig. The cluster size
* can be any positive value. The check for this is done above.
*/
ufsvfsp = VTOI(vp)->i_ufsvfs;
ufsvfsp->vfs_ioclustsz = fs->fs_bsize * fs->fs_maxcontig;
/*
* Adjust minfrags from minfree
*/
ufsvfsp->vfs_minfrags = (int)((int64_t)fs->fs_dsize *
fs->fs_minfree / 100);
/*
* Write the superblock
*/
if (fs->fs_ronly == 0) {
TRANS_BEGIN_ASYNC(ufsvfsp, TOP_SBUPDATE_UPDATE,
TOP_SBWRITE_SIZE);
TRANS_SBWRITE(ufsvfsp, TOP_SBUPDATE_UPDATE);
TRANS_END_ASYNC(ufsvfsp, TOP_SBUPDATE_UPDATE, TOP_SBWRITE_SIZE);
}
return (0);
}
/*
* Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
* data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
*/
int
ufs_fio_holey(vnode_t *vp, int cmd, offset_t *off)
{
inode_t *ip = VTOI(vp);
u_offset_t noff = (u_offset_t)*off; /* new offset */
u_offset_t isz;
int error;
boolean_t hole;
ASSERT(*off >= 0);
rw_enter(&ip->i_contents, RW_READER);
isz = (offset_t)ip->i_size;
if (*off >= isz) {
rw_exit(&ip->i_contents);
return (ENXIO);
}
/*
* Check for the usual case where a file has no holes.
* If so we can optimise to set the end of the file as the first
* (virtual) hole. This avoids bmap_find() searching through
* every block in the file for a (non-existent) hole.
*/
if (!bmap_has_holes(ip)) {
rw_exit(&ip->i_contents);
if (cmd == _FIO_SEEK_HOLE) {
*off = isz;
return (0);
}
/* *off must already point to valid data (non hole) */
return (0);
}
/*
* Calling bmap_read() one block at a time on a 1TB file takes forever,
* so we use a special function to search for holes or blocks.
*/
if (cmd == _FIO_SEEK_HOLE)
hole = B_TRUE;
else
hole = B_FALSE;
error = bmap_find(ip, hole, &noff);
rw_exit(&ip->i_contents);
/* end of file? */
if (error == ENXIO) {
/*
* Handle the virtual hole at the end of file.
*/
if (cmd == _FIO_SEEK_HOLE) {
*off = isz;
return (0);
}
return (ENXIO);
}
if (noff < *off)
return (error);
*off = noff;
return (error);
}