ufs_thread.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Portions of this source code were derived from Berkeley 4.3 BSD
* under license from the Regents of the University of California.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/vfs.h>
#include <sys/user.h>
#include <sys/callb.h>
#include <sys/cpuvar.h>
#include <sys/fs/ufs_inode.h>
#include <sys/fs/ufs_log.h>
#include <sys/fs/ufs_trans.h>
#include <sys/fs/ufs_acl.h>
#include <sys/fs/ufs_bio.h>
#include <sys/fs/ufs_fsdir.h>
#include <sys/debug.h>
#include <sys/cmn_err.h>
#include <sys/sysmacros.h>
extern pri_t minclsyspri;
extern int hash2ints();
extern struct kmem_cache *inode_cache; /* cache of free inodes */
extern int ufs_idle_waiters;
extern struct instats ins;
static void ufs_attr_purge(struct inode *);
/*
* initialize a thread's queue struct
*/
void
ufs_thread_init(struct ufs_q *uq, int lowat)
{
bzero((caddr_t)uq, sizeof (*uq));
cv_init(&uq->uq_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&uq->uq_mutex, NULL, MUTEX_DEFAULT, NULL);
uq->uq_lowat = lowat;
uq->uq_hiwat = 2 * lowat;
uq->uq_threadp = NULL;
}
/*
* start a thread for a queue (assumes success)
*/
void
ufs_thread_start(struct ufs_q *uq, void (*func)(), struct vfs *vfsp)
{
mutex_enter(&uq->uq_mutex);
if (uq->uq_threadp == NULL) {
uq->uq_threadp = thread_create(NULL, 0, func, vfsp, 0, &p0,
TS_RUN, minclsyspri);
uq->uq_flags = 0;
}
mutex_exit(&uq->uq_mutex);
}
/*
* wait for the thread to exit
*/
void
ufs_thread_exit(struct ufs_q *uq)
{
kt_did_t ufs_thread_did = 0;
mutex_enter(&uq->uq_mutex);
uq->uq_flags &= ~(UQ_SUSPEND | UQ_SUSPENDED);
if (uq->uq_threadp != NULL) {
ufs_thread_did = uq->uq_threadp->t_did;
uq->uq_flags |= (UQ_EXIT|UQ_WAIT);
cv_broadcast(&uq->uq_cv);
}
mutex_exit(&uq->uq_mutex);
/*
* It's safe to call thread_join() with an already-gone
* t_did, but we have to obtain it before the kernel
* thread structure is freed. We do so above under the
* protection of the uq_mutex when we're sure the thread
* still exists and it's save to de-reference it.
* We also have to check if ufs_thread_did is != 0
* before calling thread_join() since thread 0 in the system
* gets a t_did of 0.
*/
if (ufs_thread_did)
thread_join(ufs_thread_did);
}
/*
* wait for a thread to suspend itself on the caller's behalf
* the caller is responsible for continuing the thread
*/
void
ufs_thread_suspend(struct ufs_q *uq)
{
mutex_enter(&uq->uq_mutex);
if (uq->uq_threadp != NULL) {
/*
* wait while another thread is suspending this thread.
* no need to do a cv_broadcast(), as whoever suspended
* the thread must continue at some point.
*/
while ((uq->uq_flags & UQ_SUSPEND) &&
(uq->uq_threadp != NULL)) {
uq->uq_flags |= UQ_WAIT;
cv_wait(&uq->uq_cv, &uq->uq_mutex);
}
/*
* wait for the thread to suspend itself
*/
uq->uq_flags |= UQ_SUSPEND;
while (((uq->uq_flags & UQ_SUSPENDED) == 0) &&
(uq->uq_threadp != NULL)) {
uq->uq_flags |= UQ_WAIT;
cv_broadcast(&uq->uq_cv);
cv_wait(&uq->uq_cv, &uq->uq_mutex);
}
}
mutex_exit(&uq->uq_mutex);
}
/*
* allow a thread to continue from a ufs_thread_suspend()
* This thread must be the same as the thread that called
* ufs_thread_suspend.
*/
void
ufs_thread_continue(struct ufs_q *uq)
{
mutex_enter(&uq->uq_mutex);
uq->uq_flags &= ~(UQ_SUSPEND | UQ_SUSPENDED);
cv_broadcast(&uq->uq_cv);
mutex_exit(&uq->uq_mutex);
}
/*
* some common code for managing a threads execution
* uq is locked at entry and return
* may sleep
* may exit
*/
/*
* Kind of a hack passing in the callb_cpr_t * here.
* It should really be part of the ufs_q structure.
* I did not put it in there because we are already in beta
* and I was concerned that changing ufs_inode.h to include
* callb.h might break something.
*/
int
ufs_thread_run(struct ufs_q *uq, callb_cpr_t *cprinfop)
{
again:
ASSERT(uq->uq_ne >= 0);
if (uq->uq_flags & UQ_SUSPEND) {
uq->uq_flags |= UQ_SUSPENDED;
} else if (uq->uq_flags & UQ_EXIT) {
/*
* exiting; empty the queue (may infinite loop)
*/
if (uq->uq_ne)
return (uq->uq_ne);
uq->uq_threadp = NULL;
if (uq->uq_flags & UQ_WAIT)
cv_broadcast(&uq->uq_cv);
uq->uq_flags &= ~(UQ_EXIT | UQ_WAIT);
CALLB_CPR_EXIT(cprinfop);
thread_exit();
} else if (uq->uq_ne >= uq->uq_lowat) {
/*
* process a block of entries until below high water mark
*/
return (uq->uq_ne - (uq->uq_lowat >> 1));
}
if (uq->uq_flags & UQ_WAIT) {
uq->uq_flags &= ~UQ_WAIT;
cv_broadcast(&uq->uq_cv);
}
CALLB_CPR_SAFE_BEGIN(cprinfop);
cv_wait(&uq->uq_cv, &uq->uq_mutex);
CALLB_CPR_SAFE_END(cprinfop, &uq->uq_mutex);
goto again;
}
/*
* DELETE INODE
* The following routines implement the protocol for freeing the resources
* held by an idle and deleted inode.
*/
void
ufs_delete(struct ufsvfs *ufsvfsp, struct inode *ip, int dolockfs)
{
ushort_t mode;
struct vnode *vp = ITOV(ip);
struct ulockfs *ulp;
int trans_size;
int dorwlock = ((ip->i_mode & IFMT) == IFREG);
int issync;
int err;
struct inode *dp;
/*
* not on a trans device or not part of a transaction
*/
ASSERT(!TRANS_ISTRANS(ufsvfsp) ||
((curthread->t_flag & T_DONTBLOCK) == 0));
/*
* Ignore if deletes are not allowed (wlock/hlock)
*/
if (ULOCKFS_IS_NOIDEL(ITOUL(ip))) {
VN_RELE(vp);
return;
}
if ((vp->v_count > 1) || (ip->i_mode == 0)) {
VN_RELE(vp);
return;
}
/*
* If we are called as part of setting a fs lock, then only
* do part of the lockfs protocol. In other words, don't hang.
*/
if (dolockfs) {
if (ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_DELETE_MASK))
return;
} else {
/*
* check for recursive VOP call
*/
if (curthread->t_flag & T_DONTBLOCK) {
ulp = NULL;
} else {
ulp = &ufsvfsp->vfs_ulockfs;
curthread->t_flag |= T_DONTBLOCK;
}
}
/*
* Hold rwlock to synchronize with (nfs) writes
*/
if (dorwlock)
rw_enter(&ip->i_rwlock, RW_WRITER);
/*
* Delete the attribute directory.
*/
if (ip->i_oeftflag != 0) {
TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE,
trans_size = (int)TOP_REMOVE_SIZE(ip));
rw_enter(&ip->i_contents, RW_WRITER);
err = ufs_iget(ip->i_vfs, ip->i_oeftflag,
&dp, CRED());
if (err == 0) {
rw_enter(&dp->i_rwlock, RW_WRITER);
rw_enter(&dp->i_contents, RW_WRITER);
dp->i_flag |= IUPD|ICHG;
dp->i_seq++;
TRANS_INODE(dp->i_ufsvfs, dp);
dp->i_nlink -= 2;
ufs_setreclaim(dp);
/*
* Should get rid of any negative cache entries that
* might be lingering, as well as ``.'' and
* ``..''. If we don't, the VN_RELE() below
* won't actually put dp on the delete queue
* and it'll hang out until someone forces it
* (lockfs -f, umount, ...). The only reliable
* way of doing this at the moment is to call
* dnlc_purge_vp(ITOV(dp)), which is unacceptably
* slow, so we'll just note the problem in this
* comment for now.
*/
dnlc_remove(ITOV(dp), ".");
dnlc_remove(ITOV(dp), "..");
ITIMES_NOLOCK(dp);
if (!TRANS_ISTRANS(ufsvfsp)) {
ufs_iupdat(dp, I_SYNC);
}
rw_exit(&dp->i_contents);
rw_exit(&dp->i_rwlock);
VN_RELE(ITOV(dp));
}
/*
* Clear out attribute pointer
*/
ip->i_oeftflag = 0;
rw_exit(&ip->i_contents);
TRANS_END_CSYNC(ufsvfsp, err, issync,
TOP_REMOVE, trans_size);
dnlc_remove(ITOV(ip), XATTR_DIR_NAME);
}
if ((ip->i_mode & IFMT) == IFATTRDIR) {
ufs_attr_purge(ip);
}
(void) TRANS_ITRUNC(ip, (u_offset_t)0, I_FREE, CRED());
/*
* the inode's space has been freed; now free the inode
*/
if (ulp) {
trans_size = TOP_IFREE_SIZE(ip);
TRANS_BEGIN_ASYNC(ufsvfsp, TOP_IFREE, trans_size);
}
rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
rw_enter(&ip->i_contents, RW_WRITER);
TRANS_INODE(ufsvfsp, ip);
mode = ip->i_mode;
ip->i_mode = 0;
ip->i_rdev = 0;
ip->i_ordev = 0;
ip->i_flag |= IMOD;
if (ip->i_ufs_acl) {
(void) ufs_si_free(ip->i_ufs_acl, vp->v_vfsp, CRED());
ip->i_ufs_acl = NULL;
ip->i_shadow = 0;
}
/*
* This inode is torn down but still retains it's identity
* (inode number). It could get recycled soon so it's best
* to clean up the vnode just in case.
*/
mutex_enter(&vp->v_lock);
vn_recycle(vp);
mutex_exit(&vp->v_lock);
/*
* free the inode
*/
ufs_ifree(ip, ip->i_number, mode);
/*
* release quota resources; can't fail
*/
(void) chkiq((struct ufsvfs *)vp->v_vfsp->vfs_data,
/* change */ -1, ip, (uid_t)ip->i_uid, 0, CRED(),
(char **)NULL, (size_t *)NULL);
dqrele(ip->i_dquot);
ip->i_dquot = NULL;
ip->i_flag &= ~(IDEL | IDIRECTIO);
ip->i_cflags = 0;
if (!TRANS_ISTRANS(ufsvfsp)) {
ufs_iupdat(ip, I_SYNC);
}
rw_exit(&ip->i_contents);
rw_exit(&ufsvfsp->vfs_dqrwlock);
if (dorwlock)
rw_exit(&ip->i_rwlock);
VN_RELE(vp);
/*
* End of transaction
*/
if (ulp) {
TRANS_END_ASYNC(ufsvfsp, TOP_IFREE, trans_size);
if (dolockfs)
ufs_lockfs_end(ulp);
else
curthread->t_flag &= ~T_DONTBLOCK;
}
}
/*
* thread that frees up deleted inodes
*/
void
ufs_thread_delete(struct vfs *vfsp)
{
struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
struct ufs_q *uq = &ufsvfsp->vfs_delete;
struct inode *ip;
long ne;
callb_cpr_t cprinfo;
CALLB_CPR_INIT(&cprinfo, &uq->uq_mutex, callb_generic_cpr,
"ufsdelete");
mutex_enter(&uq->uq_mutex);
again:
/*
* sleep until there is work to do
*/
ne = ufs_thread_run(uq, &cprinfo);
/*
* process up to ne entries
*/
while (ne-- && (ip = uq->uq_ihead)) {
/*
* process first entry on queue. Assumed conditions are:
* ip is held (v_count >= 1)
* ip is referenced (i_flag & IREF)
* ip is free (i_nlink <= 0)
*/
if ((uq->uq_ihead = ip->i_freef) == ip)
uq->uq_ihead = NULL;
ip->i_freef->i_freeb = ip->i_freeb;
ip->i_freeb->i_freef = ip->i_freef;
ip->i_freef = ip;
ip->i_freeb = ip;
uq->uq_ne--;
mutex_exit(&uq->uq_mutex);
ufs_delete(ufsvfsp, ip, 1);
mutex_enter(&uq->uq_mutex);
}
goto again;
}
/*
* drain ne entries off the delete queue. As new queue entries may
* be added while we're working, ne is interpreted as follows:
*
* ne > 0 => remove up to ne entries
* ne == 0 => remove all entries currently on the queue
* ne == -1 => remove entries until the queue is empty
*/
void
ufs_delete_drain(struct vfs *vfsp, int ne, int dolockfs)
{
struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
struct ufs_q *uq;
struct inode *ip;
int drain_cnt = 0;
int done;
/*
* if forcibly unmounted; ignore
*/
if (ufsvfsp == NULL)
return;
uq = &ufsvfsp->vfs_delete;
mutex_enter(&uq->uq_mutex);
if (ne == 0)
drain_cnt = uq->uq_ne;
else if (ne > 0)
drain_cnt = ne;
/*
* process up to ne entries
*/
done = 0;
while (!done && (ip = uq->uq_ihead)) {
if (ne != -1)
drain_cnt--;
if (ne != -1 && drain_cnt == 0)
done = 1;
if ((uq->uq_ihead = ip->i_freef) == ip)
uq->uq_ihead = NULL;
ip->i_freef->i_freeb = ip->i_freeb;
ip->i_freeb->i_freef = ip->i_freef;
ip->i_freef = ip;
ip->i_freeb = ip;
uq->uq_ne--;
mutex_exit(&uq->uq_mutex);
ufs_delete(ufsvfsp, ip, dolockfs);
mutex_enter(&uq->uq_mutex);
}
mutex_exit(&uq->uq_mutex);
}
void
ufs_sync_with_thread(struct ufs_q *uq)
{
mutex_enter(&uq->uq_mutex);
uq->uq_flags |= UQ_WAIT;
/*
* Someone other than the thread we're interested in might
* send a signal, so make sure the thread's given an
* acknowledgement.
*/
while ((uq->uq_threadp != NULL) && (uq->uq_flags & UQ_WAIT)) {
cv_broadcast(&uq->uq_cv);
cv_wait(&uq->uq_cv, &uq->uq_mutex);
}
mutex_exit(&uq->uq_mutex);
}
/*
* Get rid of everything that's currently in the delete queue,
* plus whatever the delete thread is working on at the moment.
*
* This ability is required for providing true POSIX semantics
* regarding close(2), unlink(2), etc, even when logging is enabled.
* The standard requires that the released space be immediately
* observable (statvfs(2)) and allocatable (e.g., write(2)).
*/
void
ufs_delete_drain_wait(struct ufsvfs *ufsvfsp, int dolockfs)
{
struct ufs_q *uq = &ufsvfsp->vfs_delete;
int error;
(void) ufs_delete_drain(ufsvfsp->vfs_vfs, 0, dolockfs);
ufs_sync_with_thread(uq);
/*
* Commit any outstanding transactions to make sure
* any canceled freed blocks are available for allocation.
*/
curthread->t_flag |= T_DONTBLOCK;
TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE, error);
if (!error) {
TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UPDATE,
TOP_COMMIT_SIZE);
}
curthread->t_flag &= ~T_DONTBLOCK;
}
/*
* Adjust the resource usage in a struct statvfs based on
* what's in the delete queue. Assumes that the delete
* thread has been suspended.
*
* We do not consider the impact of ACLs or extended attributes
* that may be deleted as a side-effect of deleting a file.
* Those are metadata, and their sizes aren't reflected in the
* sizes returned by stat(), so this is not a problem.
*/
void
ufs_delete_adjust_stats(struct ufsvfs *ufsvfsp, struct statvfs64 *sp)
{
struct inode *ip;
struct fs *fs = ufsvfsp->vfs_fs;
struct ufs_q *uq = &ufsvfsp->vfs_delete;
/*
* To be self-consistent with the existing contents of
* *sp, we have to keep the queue stable during our
* traversal. mainly, this keeps anyone from doing a
* ufs_delete_drain() on top of us.
*/
mutex_enter(&uq->uq_mutex);
ip = uq->uq_ihead;
if (ip != NULL) {
do {
sp->f_bfree += dbtofsb(fs, ip->i_blocks);
sp->f_ffree += 1;
ip = ip->i_freef;
} while (ip != uq->uq_ihead);
}
mutex_exit(&uq->uq_mutex);
}
/*
* IDLE INODE
* The following routines implement the protocol for maintaining an
* LRU list of idle inodes and for moving the idle inodes to the
* reuse list when the number of allocated inodes exceeds the user
* tunable high-water mark (ufs_ninode).
*/
/*
* clean an idle inode and move it to the reuse list
*/
static void
ufs_idle_free(struct inode *ip)
{
int pages;
int hno;
kmutex_t *ihm;
struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
struct vnode *vp = ITOV(ip);
/*
* inode is held
*/
/*
* remember `pages' for stats below
*/
pages = (ip->i_mode && vn_has_cached_data(vp) && vp->v_type != VCHR);
/*
* start the dirty pages to disk and then invalidate them
* unless the inode is invalid (ISTALE)
*/
if ((ip->i_flag & ISTALE) == 0) {
(void) TRANS_SYNCIP(ip, B_ASYNC, I_ASYNC, TOP_SYNCIP_FREE);
(void) TRANS_SYNCIP(ip,
(TRANS_ISERROR(ufsvfsp)) ?
B_INVAL | B_FORCE : B_INVAL,
I_ASYNC, TOP_SYNCIP_FREE);
}
/*
* wait for any current ufs_iget to finish and block future ufs_igets
*/
ASSERT(ip->i_number != 0);
hno = INOHASH(ip->i_number);
ihm = &ih_lock[hno];
mutex_enter(ihm);
/*
* It must be guaranteed that v_count >= 2, otherwise
* something must be wrong with this vnode already.
* That is why we use v_count-- instead of VN_RELE().
* Acquire the vnode lock in case another thread is in
* VN_RELE().
*/
mutex_enter(&vp->v_lock);
if (vp->v_count < 2)
cmn_err(CE_PANIC,
"ufs_idle_free: vnode ref count is less than 2");
vp->v_count--;
if ((vp->v_type != VCHR && vn_has_cached_data(vp)) ||
vp->v_count != 1 ||
ip->i_flag & (IMOD|IMODACC|IACC|ICHG|IUPD|IATTCHG)) {
/*
* Another thread has referenced this inode while
* we are trying to free it. Call VN_RELE() to
* release our reference.
*/
mutex_exit(&vp->v_lock);
mutex_exit(ihm);
VN_RELE(vp);
} else {
/*
* The inode is currently unreferenced and can not
* acquire further references because it has no pages
* and the hash is locked. Inodes acquire references
* via the hash list or via their pages.
*/
mutex_exit(&vp->v_lock);
/*
* remove it from the cache
*/
remque(ip);
mutex_exit(ihm);
/*
* Stale inodes have no valid ufsvfs
*/
if ((ip->i_flag & ISTALE) == 0 && ip->i_dquot) {
TRANS_DQRELE(ufsvfsp, ip->i_dquot);
ip->i_dquot = NULL;
}
ufs_si_del(ip);
if (pages) {
CPU_STATS_ADDQ(CPU, sys, ufsipage, 1);
} else {
CPU_STATS_ADDQ(CPU, sys, ufsinopage, 1);
}
ASSERT((vp->v_type == VCHR) || !vn_has_cached_data(vp));
ufs_free_inode(ip);
}
}
/*
* this thread processes the global idle queue
*/
iqhead_t *ufs_junk_iq;
iqhead_t *ufs_useful_iq;
int ufs_njunk_iq = 0;
int ufs_nuseful_iq = 0;
int ufs_niqhash;
int ufs_iqhashmask;
struct ufs_q ufs_idle_q;
void
ufs_thread_idle(void)
{
callb_cpr_t cprinfo;
int i;
int ne;
ufs_niqhash = (ufs_idle_q.uq_lowat >> 1) / IQHASHQLEN;
ufs_niqhash = 1 << highbit(ufs_niqhash); /* round up to power of 2 */
ufs_iqhashmask = ufs_niqhash - 1;
ufs_junk_iq = kmem_alloc(ufs_niqhash * sizeof (*ufs_junk_iq),
KM_SLEEP);
ufs_useful_iq = kmem_alloc(ufs_niqhash * sizeof (*ufs_useful_iq),
KM_SLEEP);
/* Initialize hash queue headers */
for (i = 0; i < ufs_niqhash; i++) {
ufs_junk_iq[i].i_freef = (inode_t *)&ufs_junk_iq[i];
ufs_junk_iq[i].i_freeb = (inode_t *)&ufs_junk_iq[i];
ufs_useful_iq[i].i_freef = (inode_t *)&ufs_useful_iq[i];
ufs_useful_iq[i].i_freeb = (inode_t *)&ufs_useful_iq[i];
}
CALLB_CPR_INIT(&cprinfo, &ufs_idle_q.uq_mutex, callb_generic_cpr,
"ufsidle");
again:
/*
* Whenever the idle thread is awakened, it repeatedly gives
* back half of the idle queue until the idle queue falls
* below lowat.
*/
mutex_enter(&ufs_idle_q.uq_mutex);
if (ufs_idle_q.uq_ne < ufs_idle_q.uq_lowat) {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
cv_wait(&ufs_idle_q.uq_cv, &ufs_idle_q.uq_mutex);
CALLB_CPR_SAFE_END(&cprinfo, &ufs_idle_q.uq_mutex);
}
mutex_exit(&ufs_idle_q.uq_mutex);
/*
* Give back 1/2 of the idle queue
*/
ne = ufs_idle_q.uq_ne >> 1;
ins.in_tidles.value.ul += ne;
ufs_idle_some(ne);
goto again;
}
/*
* Reclaim callback for ufs inode cache.
* Invoked by the kernel memory allocator when memory gets tight.
*/
/*ARGSUSED*/
void
ufs_inode_cache_reclaim(void *cdrarg)
{
/*
* If we are low on memory and the idle queue is over its
* halfway mark, then free 50% of the idle q
*
* We don't free all of the idle inodes because the inodes
* for popular NFS files may have been kicked from the dnlc.
* The inodes for these files will end up on the idle queue
* after every NFS access.
*
* If we repeatedly push them from the idle queue then
* NFS users may be unhappy as an extra buf cache operation
* is incurred for every NFS operation to these files.
*
* It's not common, but I have seen it happen.
*
*/
if (ufs_idle_q.uq_ne < (ufs_idle_q.uq_lowat >> 1))
return;
mutex_enter(&ufs_idle_q.uq_mutex);
cv_broadcast(&ufs_idle_q.uq_cv);
mutex_exit(&ufs_idle_q.uq_mutex);
}
/*
* Free up some idle inodes
*/
void
ufs_idle_some(int ne)
{
int i;
struct inode *ip;
struct vnode *vp;
static int junk_rotor = 0;
static int useful_rotor = 0;
for (i = 0; i < ne; ++i) {
mutex_enter(&ufs_idle_q.uq_mutex);
if (ufs_njunk_iq) {
while (ufs_junk_iq[junk_rotor].i_freef ==
(inode_t *)&ufs_junk_iq[junk_rotor]) {
junk_rotor = IQNEXT(junk_rotor);
}
ip = ufs_junk_iq[junk_rotor].i_freef;
ASSERT(ip->i_flag & IJUNKIQ);
} else if (ufs_nuseful_iq) {
while (ufs_useful_iq[useful_rotor].i_freef ==
(inode_t *)&ufs_useful_iq[useful_rotor]) {
useful_rotor = IQNEXT(useful_rotor);
}
ip = ufs_useful_iq[useful_rotor].i_freef;
ASSERT(!(ip->i_flag & IJUNKIQ));
} else {
mutex_exit(&ufs_idle_q.uq_mutex);
return;
}
/*
* emulate ufs_iget
*/
vp = ITOV(ip);
VN_HOLD(vp);
mutex_exit(&ufs_idle_q.uq_mutex);
rw_enter(&ip->i_contents, RW_WRITER);
/*
* VN_RELE should not be called if
* ufs_rmidle returns true, as it will
* effectively be done in ufs_idle_free.
*/
if (ufs_rmidle(ip)) {
rw_exit(&ip->i_contents);
ufs_idle_free(ip);
} else {
rw_exit(&ip->i_contents);
VN_RELE(vp);
}
}
}
/*
* drain entries for vfsp from the idle queue
* vfsp == NULL means drain the entire thing
*/
void
ufs_idle_drain(struct vfs *vfsp)
{
struct inode *ip, *nip;
struct inode *ianchor = NULL;
int i;
mutex_enter(&ufs_idle_q.uq_mutex);
if (ufs_njunk_iq) {
/* for each hash q */
for (i = 0; i < ufs_niqhash; i++) {
/* search down the hash q */
for (ip = ufs_junk_iq[i].i_freef;
ip != (inode_t *)&ufs_junk_iq[i];
ip = ip->i_freef) {
if (ip->i_vfs == vfsp || vfsp == NULL) {
/* found a matching entry */
VN_HOLD(ITOV(ip));
mutex_exit(&ufs_idle_q.uq_mutex);
rw_enter(&ip->i_contents, RW_WRITER);
/*
* See comments in ufs_idle_some()
* as we will call ufs_idle_free()
* after scanning both queues.
*/
if (ufs_rmidle(ip)) {
rw_exit(&ip->i_contents);
ip->i_freef = ianchor;
ianchor = ip;
} else {
rw_exit(&ip->i_contents);
VN_RELE(ITOV(ip));
}
/* restart this hash q */
ip = (inode_t *)&ufs_junk_iq[i];
mutex_enter(&ufs_idle_q.uq_mutex);
}
}
}
}
if (ufs_nuseful_iq) {
/* for each hash q */
for (i = 0; i < ufs_niqhash; i++) {
/* search down the hash q */
for (ip = ufs_useful_iq[i].i_freef;
ip != (inode_t *)&ufs_useful_iq[i];
ip = ip->i_freef) {
if (ip->i_vfs == vfsp || vfsp == NULL) {
/* found a matching entry */
VN_HOLD(ITOV(ip));
mutex_exit(&ufs_idle_q.uq_mutex);
rw_enter(&ip->i_contents, RW_WRITER);
/*
* See comments in ufs_idle_some()
* as we will call ufs_idle_free()
* after scanning both queues.
*/
if (ufs_rmidle(ip)) {
rw_exit(&ip->i_contents);
ip->i_freef = ianchor;
ianchor = ip;
} else {
rw_exit(&ip->i_contents);
VN_RELE(ITOV(ip));
}
/* restart this hash q */
ip = (inode_t *)&ufs_useful_iq[i];
mutex_enter(&ufs_idle_q.uq_mutex);
}
}
}
}
mutex_exit(&ufs_idle_q.uq_mutex);
/* no more matching entries, release those we have found (if any) */
for (ip = ianchor; ip; ip = nip) {
nip = ip->i_freef;
ip->i_freef = ip;
ufs_idle_free(ip);
}
}
/*
* RECLAIM DELETED INODES
* The following thread scans the file system once looking for deleted files
*/
void
ufs_thread_reclaim(struct vfs *vfsp)
{
struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
struct ufs_q *uq = &ufsvfsp->vfs_reclaim;
struct fs *fs = ufsvfsp->vfs_fs;
struct buf *bp = 0;
int err = 0;
daddr_t bno;
ino_t ino;
struct dinode *dp;
struct inode *ip;
callb_cpr_t cprinfo;
CALLB_CPR_INIT(&cprinfo, &uq->uq_mutex, callb_generic_cpr,
"ufsreclaim");
/*
* mount decided that we don't need a reclaim thread
*/
if ((fs->fs_reclaim & FS_RECLAIMING) == 0)
err++;
/*
* don't reclaim if readonly
*/
if (fs->fs_ronly)
err++;
for (ino = 0; ino < (fs->fs_ncg * fs->fs_ipg) && !err; ++ino) {
/*
* Check whether we are the target of another
* thread having called ufs_thread_exit() or
* ufs_thread_suspend().
*/
mutex_enter(&uq->uq_mutex);
again:
if (uq->uq_flags & UQ_EXIT) {
err++;
mutex_exit(&uq->uq_mutex);
break;
} else if (uq->uq_flags & UQ_SUSPEND) {
uq->uq_flags |= UQ_SUSPENDED;
/*
* Release the buf before we cv_wait()
* otherwise we may deadlock with the
* thread that called ufs_thread_suspend().
*/
if (bp) {
brelse(bp);
bp = 0;
}
if (uq->uq_flags & UQ_WAIT) {
uq->uq_flags &= ~UQ_WAIT;
cv_broadcast(&uq->uq_cv);
}
CALLB_CPR_SAFE_BEGIN(&cprinfo);
cv_wait(&uq->uq_cv, &uq->uq_mutex);
CALLB_CPR_SAFE_END(&cprinfo, &uq->uq_mutex);
goto again;
}
mutex_exit(&uq->uq_mutex);
/*
* if we don't already have the buf; get it
*/
bno = fsbtodb(fs, itod(fs, ino));
if ((bp == 0) || (bp->b_blkno != bno)) {
if (bp)
brelse(bp);
bp = UFS_BREAD(ufsvfsp,
ufsvfsp->vfs_dev, bno, fs->fs_bsize);
bp->b_flags |= B_AGE;
}
if (bp->b_flags & B_ERROR) {
err++;
continue;
}
/*
* nlink <= 0 and mode != 0 means deleted
*/
dp = (struct dinode *)bp->b_un.b_addr + itoo(fs, ino);
if ((dp->di_nlink <= 0) && (dp->di_mode != 0)) {
/*
* can't hold the buf (deadlock)
*/
brelse(bp);
bp = 0;
rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
/*
* iget/iput sequence will put inode on ifree
* thread queue if it is idle. This is a nop
* for busy (open, deleted) inodes
*/
if (ufs_iget(vfsp, ino, &ip, CRED()))
err++;
else
VN_RELE(ITOV(ip));
rw_exit(&ufsvfsp->vfs_dqrwlock);
}
}
if (bp)
brelse(bp);
if (!err) {
/*
* reset the reclaiming-bit
*/
mutex_enter(&ufsvfsp->vfs_lock);
fs->fs_reclaim &= ~FS_RECLAIMING;
mutex_exit(&ufsvfsp->vfs_lock);
TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_RECLAIM);
}
/*
* exit the reclaim thread
*/
mutex_enter(&uq->uq_mutex);
uq->uq_threadp = NULL;
uq->uq_flags &= ~UQ_WAIT;
cv_broadcast(&uq->uq_cv);
CALLB_CPR_EXIT(&cprinfo);
thread_exit();
}
/*
* HLOCK FILE SYSTEM
* hlock the file system's whose logs have device errors
*/
struct ufs_q ufs_hlock;
/*ARGSUSED*/
void
ufs_thread_hlock(void *ignore)
{
int retry;
callb_cpr_t cprinfo;
CALLB_CPR_INIT(&cprinfo, &ufs_hlock.uq_mutex, callb_generic_cpr,
"ufshlock");
for (;;) {
/*
* sleep until there is work to do
*/
mutex_enter(&ufs_hlock.uq_mutex);
(void) ufs_thread_run(&ufs_hlock, &cprinfo);
ufs_hlock.uq_ne = 0;
mutex_exit(&ufs_hlock.uq_mutex);
/*
* hlock the error'ed fs's
* retry after a bit if another app is doing lockfs stuff
*/
do {
retry = ufs_trans_hlock();
if (retry) {
mutex_enter(&ufs_hlock.uq_mutex);
CALLB_CPR_SAFE_BEGIN(&cprinfo);
(void) cv_timedwait(&ufs_hlock.uq_cv,
&ufs_hlock.uq_mutex,
lbolt + hz);
CALLB_CPR_SAFE_END(&cprinfo,
&ufs_hlock.uq_mutex);
mutex_exit(&ufs_hlock.uq_mutex);
}
} while (retry);
}
}
static void
ufs_attr_purge(struct inode *dp)
{
int err;
int error;
off_t dirsize; /* size of the directory */
off_t offset; /* offset in the directory */
int entryoffsetinblk; /* offset of ep in fbp's buffer */
struct inode *tp;
struct fbuf *fbp; /* pointer to directory block */
struct direct *ep; /* directory entry */
int trans_size;
int issync;
struct ufsvfs *ufsvfsp = dp->i_ufsvfs;
rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
fbp = NULL;
dirsize = roundup(dp->i_size, DIRBLKSIZ);
offset = 0;
entryoffsetinblk = 0;
/*
* Purge directory cache
*/
dnlc_dir_purge(&dp->i_danchor);
while (offset < dirsize) {
/*
* If offset is on a block boundary,
* read the next directory block.
* Release previous if it exists.
*/
if (blkoff(dp->i_fs, offset) == 0) {
if (fbp != NULL) {
fbrelse(fbp, S_OTHER);
}
err = blkatoff(dp, offset, (char **)0, &fbp);
if (err) {
goto out;
}
entryoffsetinblk = 0;
}
ep = (struct direct *)(fbp->fb_addr + entryoffsetinblk);
if (ep->d_ino == 0 || (ep->d_name[0] == '.' &&
ep->d_name[1] == '\0') ||
(ep->d_name[0] == '.' && ep->d_name[1] == '.' &&
ep->d_name[2] == '\0')) {
entryoffsetinblk += ep->d_reclen;
} else {
if ((err = ufs_iget(dp->i_vfs, ep->d_ino,
&tp, CRED())) != 0) {
goto out;
}
TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE,
trans_size = (int)TOP_REMOVE_SIZE(tp));
/*
* Delete inode.
*/
dnlc_remove(ITOV(dp), ep->d_name);
rw_enter(&tp->i_contents, RW_WRITER);
tp->i_flag |= ICHG;
tp->i_seq++;
TRANS_INODE(tp->i_ufsvfs, tp);
tp->i_nlink--;
ufs_setreclaim(tp);
ITIMES_NOLOCK(tp);
rw_exit(&tp->i_contents);
VN_RELE(ITOV(tp));
entryoffsetinblk += ep->d_reclen;
TRANS_END_CSYNC(ufsvfsp, error,
issync, TOP_REMOVE, trans_size);
}
offset += ep->d_reclen;
}
if (fbp) {
fbrelse(fbp, S_OTHER);
}
out:
rw_exit(&ufsvfsp->vfs_dqrwlock);
}