cachefs_subr.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/cred.h>
#include <sys/proc.h>
#include <sys/user.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/pathname.h>
#include <sys/uio.h>
#include <sys/tiuser.h>
#include <sys/sysmacros.h>
#include <sys/kmem.h>
#include <sys/mount.h>
#include <sys/ioctl.h>
#include <sys/statvfs.h>
#include <sys/errno.h>
#include <sys/debug.h>
#include <sys/cmn_err.h>
#include <sys/utsname.h>
#include <sys/modctl.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/fcntl.h>
#include <sys/fbuf.h>
#include <sys/dnlc.h>
#include <sys/callb.h>
#include <sys/kobj.h>
#include <sys/rwlock.h>
#include <sys/vmsystm.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/page.h>
#include <vm/pvn.h>
#include <vm/seg.h>
#include <vm/seg_map.h>
#include <vm/seg_vn.h>
#include <vm/rm.h>
#include <sys/fs/cachefs_fs.h>
#include <sys/fs/cachefs_log.h>
#include <sys/fs/cachefs_dir.h>
extern struct seg *segkmap;
caddr_t segmap_getmap();
int segmap_release();
extern struct cnode *cachefs_freeback;
extern struct cnode *cachefs_freefront;
extern cachefscache_t *cachefs_cachelist;
#ifdef CFSDEBUG
int cachefsdebug = 0;
#endif
int cachefs_max_threads = CFS_MAX_THREADS;
ino64_t cachefs_check_fileno = 0;
struct kmem_cache *cachefs_cache_kmcache = NULL;
struct kmem_cache *cachefs_req_cache = NULL;
static int
cachefs_async_populate_reg(struct cachefs_populate_req *, cred_t *,
vnode_t *, vnode_t *);
/*
* Cache routines
*/
/*
* ------------------------------------------------------------------
*
* cachefs_cache_create
*
* Description:
* Creates a cachefscache_t object and initializes it to
* be NOCACHE and NOFILL mode.
* Arguments:
* Returns:
* Returns a pointer to the created object or NULL if
* threads could not be created.
* Preconditions:
*/
cachefscache_t *
cachefs_cache_create(void)
{
cachefscache_t *cachep;
struct cachefs_req *rp;
/* allocate zeroed memory for the object */
cachep = kmem_cache_alloc(cachefs_cache_kmcache, KM_SLEEP);
bzero(cachep, sizeof (*cachep));
cv_init(&cachep->c_cwcv, NULL, CV_DEFAULT, NULL);
cv_init(&cachep->c_cwhaltcv, NULL, CV_DEFAULT, NULL);
mutex_init(&cachep->c_contentslock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&cachep->c_fslistlock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&cachep->c_log_mutex, NULL, MUTEX_DEFAULT, NULL);
/* set up the work queue and get the sync thread created */
cachefs_workq_init(&cachep->c_workq);
cachep->c_workq.wq_keepone = 1;
cachep->c_workq.wq_cachep = cachep;
rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
rp->cfs_cmd = CFS_NOOP;
rp->cfs_cr = kcred;
rp->cfs_req_u.cu_fs_sync.cf_cachep = cachep;
crhold(rp->cfs_cr);
cachefs_addqueue(rp, &cachep->c_workq);
cachep->c_flags |= CACHE_NOCACHE | CACHE_NOFILL | CACHE_ALLOC_PENDING;
return (cachep);
}
/*
* ------------------------------------------------------------------
*
* cachefs_cache_destroy
*
* Description:
* Destroys the cachefscache_t object.
* Arguments:
* cachep the cachefscache_t object to destroy
* Returns:
* Preconditions:
* precond(cachep)
*/
void
cachefs_cache_destroy(cachefscache_t *cachep)
{
clock_t tend;
int error = 0;
#ifdef CFSRLDEBUG
uint_t index;
#endif /* CFSRLDEBUG */
/* stop async threads */
while (cachep->c_workq.wq_thread_count > 0)
(void) cachefs_async_halt(&cachep->c_workq, 1);
/* kill off the cachep worker thread */
mutex_enter(&cachep->c_contentslock);
while (cachep->c_flags & CACHE_CACHEW_THREADRUN) {
cachep->c_flags |= CACHE_CACHEW_THREADEXIT;
cv_signal(&cachep->c_cwcv);
tend = lbolt + (60 * hz);
(void) cv_timedwait(&cachep->c_cwhaltcv,
&cachep->c_contentslock, tend);
}
if ((cachep->c_flags & CACHE_ALLOC_PENDING) == 0) {
cachep->c_usage.cu_flags &= ~CUSAGE_ACTIVE;
(void) cachefs_cache_rssync(cachep);
}
mutex_exit(&cachep->c_contentslock);
/* if there is a cache */
if ((cachep->c_flags & CACHE_NOCACHE) == 0) {
if ((cachep->c_flags & CACHE_NOFILL) == 0) {
#ifdef CFSRLDEBUG
/* blow away dangling rl debugging info */
mutex_enter(&cachep->c_contentslock);
for (index = 0;
index <= cachep->c_rlinfo.rl_entries;
index++) {
rl_entry_t *rlent;
error = cachefs_rl_entry_get(cachep, index,
rlent);
/*
* Since we are destroying the cache,
* better to ignore and proceed
*/
if (error)
break;
cachefs_rl_debug_destroy(rlent);
}
mutex_exit(&cachep->c_contentslock);
#endif /* CFSRLDEBUG */
/* sync the cache */
if (!error)
cachefs_cache_sync(cachep);
} else {
/* get rid of any unused fscache objects */
mutex_enter(&cachep->c_fslistlock);
fscache_list_gc(cachep);
mutex_exit(&cachep->c_fslistlock);
}
ASSERT(cachep->c_fslist == NULL);
VN_RELE(cachep->c_resfilevp);
VN_RELE(cachep->c_dirvp);
VN_RELE(cachep->c_lockvp);
VN_RELE(cachep->c_lostfoundvp);
}
if (cachep->c_log_ctl != NULL)
cachefs_kmem_free(cachep->c_log_ctl,
sizeof (cachefs_log_control_t));
if (cachep->c_log != NULL)
cachefs_log_destroy_cookie(cachep->c_log);
cv_destroy(&cachep->c_cwcv);
cv_destroy(&cachep->c_cwhaltcv);
mutex_destroy(&cachep->c_contentslock);
mutex_destroy(&cachep->c_fslistlock);
mutex_destroy(&cachep->c_log_mutex);
kmem_cache_free(cachefs_cache_kmcache, cachep);
}
/*
* ------------------------------------------------------------------
*
* cachefs_cache_active_ro
*
* Description:
* Activates the cachefscache_t object for a read-only file system.
* Arguments:
* cachep the cachefscache_t object to activate
* cdvp the vnode of the cache directory
* Returns:
* Returns 0 for success, !0 if there is a problem with the cache.
* Preconditions:
* precond(cachep)
* precond(cdvp)
* precond(cachep->c_flags & CACHE_NOCACHE)
*/
int
cachefs_cache_activate_ro(cachefscache_t *cachep, vnode_t *cdvp)
{
cachefs_log_control_t *lc;
vnode_t *labelvp = NULL;
vnode_t *rifvp = NULL;
vnode_t *lockvp = NULL;
vnode_t *statevp = NULL;
vnode_t *lostfoundvp = NULL;
struct vattr *attrp = NULL;
int error;
ASSERT(cachep->c_flags & CACHE_NOCACHE);
mutex_enter(&cachep->c_contentslock);
attrp = cachefs_kmem_alloc(sizeof (struct vattr), KM_SLEEP);
/* get the mode bits of the cache directory */
attrp->va_mask = AT_ALL;
error = VOP_GETATTR(cdvp, attrp, 0, kcred);
if (error)
goto out;
/* ensure the mode bits are 000 to keep out casual users */
if (attrp->va_mode & S_IAMB) {
cmn_err(CE_WARN, "cachefs: Cache Directory Mode must be 000\n");
error = EPERM;
goto out;
}
/* Get the lock file */
error = VOP_LOOKUP(cdvp, CACHEFS_LOCK_FILE, &lockvp, NULL, 0, NULL,
kcred);
if (error) {
cmn_err(CE_WARN, "cachefs: activate_a: cache corruption"
" run fsck.\n");
goto out;
}
/* Get the label file */
error = VOP_LOOKUP(cdvp, CACHELABEL_NAME, &labelvp, NULL, 0, NULL,
kcred);
if (error) {
cmn_err(CE_WARN, "cachefs: activate_b: cache corruption"
" run fsck.\n");
goto out;
}
/* read in the label */
error = vn_rdwr(UIO_READ, labelvp, (caddr_t)&cachep->c_label,
sizeof (struct cache_label), 0LL, UIO_SYSSPACE,
0, (rlim64_t)0, kcred, NULL);
if (error) {
cmn_err(CE_WARN, "cachefs: activate_c: cache corruption"
" run fsck.\n");
goto out;
}
/* Verify that we can handle the version this cache was created under */
if (cachep->c_label.cl_cfsversion != CFSVERSION) {
cmn_err(CE_WARN, "cachefs: Invalid Cache Version, run fsck\n");
error = EINVAL;
goto out;
}
/* Open the resource file */
error = VOP_LOOKUP(cdvp, RESOURCE_NAME, &rifvp, NULL, 0, NULL, kcred);
if (error) {
cmn_err(CE_WARN, "cachefs: activate_d: cache corruption"
" run fsck.\n");
goto out;
}
/* Read the usage struct for this cache */
error = vn_rdwr(UIO_READ, rifvp, (caddr_t)&cachep->c_usage,
sizeof (struct cache_usage), 0LL, UIO_SYSSPACE, 0,
(rlim64_t)0, kcred, NULL);
if (error) {
cmn_err(CE_WARN, "cachefs: activate_e: cache corruption"
" run fsck.\n");
goto out;
}
if (cachep->c_usage.cu_flags & CUSAGE_ACTIVE) {
cmn_err(CE_WARN, "cachefs: cache not clean. Run fsck\n");
/* ENOSPC is what UFS uses for clean flag check */
error = ENOSPC;
goto out;
}
/* Read the rlinfo for this cache */
error = vn_rdwr(UIO_READ, rifvp, (caddr_t)&cachep->c_rlinfo,
sizeof (cachefs_rl_info_t), (offset_t)sizeof (struct cache_usage),
UIO_SYSSPACE, 0, 0, kcred, NULL);
if (error) {
cmn_err(CE_WARN, "cachefs: activate_f: cache corruption"
" run fsck.\n");
goto out;
}
/* Open the lost+found directory */
error = VOP_LOOKUP(cdvp, CACHEFS_LOSTFOUND_NAME, &lostfoundvp,
NULL, 0, NULL, kcred);
if (error) {
cmn_err(CE_WARN, "cachefs: activate_g: cache corruption"
" run fsck.\n");
goto out;
}
VN_HOLD(rifvp);
VN_HOLD(cdvp);
VN_HOLD(lockvp);
VN_HOLD(lostfoundvp);
cachep->c_resfilevp = rifvp;
cachep->c_dirvp = cdvp;
cachep->c_lockvp = lockvp;
cachep->c_lostfoundvp = lostfoundvp;
/* get the cachep worker thread created */
cachep->c_flags |= CACHE_CACHEW_THREADRUN;
(void) thread_create(NULL, 0, cachefs_cachep_worker_thread,
cachep, 0, &p0, TS_RUN, minclsyspri);
/* allocate the `logging control' field */
mutex_enter(&cachep->c_log_mutex);
cachep->c_log_ctl =
cachefs_kmem_zalloc(sizeof (cachefs_log_control_t), KM_SLEEP);
lc = (cachefs_log_control_t *)cachep->c_log_ctl;
/* if the LOG_STATUS_NAME file exists, read it in and set up logging */
error = VOP_LOOKUP(cachep->c_dirvp, LOG_STATUS_NAME, &statevp,
NULL, 0, NULL, kcred);
if (error == 0) {
int vnrw_error;
vnrw_error = vn_rdwr(UIO_READ, statevp, (caddr_t)lc,
sizeof (*lc), 0LL, UIO_SYSSPACE, 0, (rlim64_t)RLIM_INFINITY,
kcred, NULL);
VN_RELE(statevp);
if (vnrw_error == 0) {
if ((cachep->c_log = cachefs_log_create_cookie(lc))
== NULL)
cachefs_log_error(cachep, ENOMEM, 0);
else if ((lc->lc_magic != CACHEFS_LOG_MAGIC) ||
(lc->lc_path[0] != '/') ||
(cachefs_log_logfile_open(cachep,
lc->lc_path) != 0))
cachefs_log_error(cachep, EINVAL, 0);
}
} else {
error = 0;
}
lc->lc_magic = CACHEFS_LOG_MAGIC;
lc->lc_cachep = (uint64_t)(uintptr_t)cachep;
mutex_exit(&cachep->c_log_mutex);
out:
if (error == 0) {
cachep->c_flags &= ~(CACHE_NOCACHE | CACHE_ALLOC_PENDING);
}
if (attrp)
cachefs_kmem_free(attrp, sizeof (struct vattr));
if (labelvp != NULL)
VN_RELE(labelvp);
if (rifvp != NULL)
VN_RELE(rifvp);
if (lockvp)
VN_RELE(lockvp);
if (lostfoundvp)
VN_RELE(lostfoundvp);
mutex_exit(&cachep->c_contentslock);
return (error);
}
int
cachefs_stop_cache(cnode_t *cp)
{
fscache_t *fscp = C_TO_FSCACHE(cp);
cachefscache_t *cachep = fscp->fs_cache;
filegrp_t *fgp;
int i;
clock_t tend;
int error = 0;
/* XXX verify lock-ordering for this function */
mutex_enter(&cachep->c_contentslock);
/*
* no work if we're already in nocache mode. hopefully this
* will be the usual case.
*/
if (cachep->c_flags & CACHE_NOCACHE) {
mutex_exit(&cachep->c_contentslock);
return (0);
}
if ((cachep->c_flags & CACHE_NOFILL) == 0) {
mutex_exit(&cachep->c_contentslock);
return (EINVAL);
}
mutex_exit(&cachep->c_contentslock);
/* We are already not caching if nfsv4 */
if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
return (0);
}
#ifdef CFSDEBUG
mutex_enter(&cachep->c_fslistlock);
ASSERT(fscp == cachep->c_fslist);
ASSERT(fscp->fs_next == NULL);
mutex_exit(&cachep->c_fslistlock);
printf("cachefs_stop_cache: resetting CACHE_NOCACHE\n");
#endif
/* XXX should i worry about disconnected during boot? */
error = cachefs_cd_access(fscp, 1, 1);
if (error)
goto out;
error = cachefs_async_halt(&fscp->fs_workq, 1);
ASSERT(error == 0);
error = cachefs_async_halt(&cachep->c_workq, 1);
ASSERT(error == 0);
/* sigh -- best to keep going if async_halt failed. */
error = 0;
/* XXX current order: cnode, fgp, fscp, cache. okay? */
cachefs_cnode_traverse(fscp, cachefs_cnode_disable_caching);
for (i = 0; i < CFS_FS_FGP_BUCKET_SIZE; i++) {
for (fgp = fscp->fs_filegrp[i]; fgp != NULL;
fgp = fgp->fg_next) {
mutex_enter(&fgp->fg_mutex);
ASSERT((fgp->fg_flags &
(CFS_FG_WRITE | CFS_FG_UPDATED)) == 0);
fgp->fg_flags |=
CFS_FG_ALLOC_FILE |
CFS_FG_ALLOC_ATTR;
fgp->fg_flags &= ~CFS_FG_READ;
if (fgp->fg_dirvp) {
fgp->fg_flags |= CFS_FG_ALLOC_FILE;
VN_RELE(fgp->fg_dirvp);
fgp->fg_dirvp = NULL;
}
if (fgp->fg_attrvp) {
fgp->fg_flags |= CFS_FG_ALLOC_ATTR;
VN_RELE(fgp->fg_attrvp);
fgp->fg_attrvp = NULL;
}
mutex_exit(&fgp->fg_mutex);
}
}
mutex_enter(&fscp->fs_fslock);
ASSERT((fscp->fs_flags & (CFS_FS_WRITE)) == 0);
fscp->fs_flags &= ~(CFS_FS_READ | CFS_FS_DIRTYINFO);
if (fscp->fs_fscdirvp) {
VN_RELE(fscp->fs_fscdirvp);
fscp->fs_fscdirvp = NULL;
}
if (fscp->fs_fsattrdir) {
VN_RELE(fscp->fs_fsattrdir);
fscp->fs_fsattrdir = NULL;
}
if (fscp->fs_infovp) {
VN_RELE(fscp->fs_infovp);
fscp->fs_infovp = NULL;
}
/* XXX dlog stuff? */
mutex_exit(&fscp->fs_fslock);
/*
* release resources grabbed in cachefs_cache_activate_ro
*/
mutex_enter(&cachep->c_contentslock);
/* kill off the cachep worker thread */
while (cachep->c_flags & CACHE_CACHEW_THREADRUN) {
cachep->c_flags |= CACHE_CACHEW_THREADEXIT;
cv_signal(&cachep->c_cwcv);
tend = lbolt + (60 * hz);
(void) cv_timedwait(&cachep->c_cwhaltcv,
&cachep->c_contentslock, tend);
}
if (cachep->c_resfilevp) {
VN_RELE(cachep->c_resfilevp);
cachep->c_resfilevp = NULL;
}
if (cachep->c_dirvp) {
VN_RELE(cachep->c_dirvp);
cachep->c_dirvp = NULL;
}
if (cachep->c_lockvp) {
VN_RELE(cachep->c_lockvp);
cachep->c_lockvp = NULL;
}
if (cachep->c_lostfoundvp) {
VN_RELE(cachep->c_lostfoundvp);
cachep->c_lostfoundvp = NULL;
}
mutex_enter(&cachep->c_log_mutex);
if (cachep->c_log_ctl) {
cachefs_kmem_free(cachep->c_log_ctl,
sizeof (cachefs_log_control_t));
cachep->c_log_ctl = NULL;
}
if (cachep->c_log) {
cachefs_log_destroy_cookie(cachep->c_log);
cachep->c_log = NULL;
}
mutex_exit(&cachep->c_log_mutex);
/* XXX do what mountroot_init does when ! foundcache */
cachep->c_flags |= CACHE_NOCACHE;
mutex_exit(&cachep->c_contentslock);
/* XXX should i release this here? */
cachefs_cd_release(fscp);
out:
return (error);
}
/*
* ------------------------------------------------------------------
*
* cachefs_cache_active_rw
*
* Description:
* Activates the cachefscache_t object for a read-write file system.
* Arguments:
* cachep the cachefscache_t object to activate
* Returns:
* Preconditions:
* precond(cachep)
* precond((cachep->c_flags & CACHE_NOCACHE) == 0)
* precond(cachep->c_flags & CACHE_NOFILL)
*/
void
cachefs_cache_activate_rw(cachefscache_t *cachep)
{
cachefs_rl_listhead_t *lhp;
ASSERT((cachep->c_flags & CACHE_NOCACHE) == 0);
ASSERT(cachep->c_flags & CACHE_NOFILL);
mutex_enter(&cachep->c_contentslock);
cachep->c_flags &= ~CACHE_NOFILL;
/* move the active list to the rl list */
cachefs_rl_cleanup(cachep);
lhp = &cachep->c_rlinfo.rl_items[
CACHEFS_RL_INDEX(CACHEFS_RL_PACKED_PENDING)];
if (lhp->rli_itemcnt != 0)
cachep->c_flags |= CACHE_PACKED_PENDING;
cachefs_cache_dirty(cachep, 0);
mutex_exit(&cachep->c_contentslock);
}
/*
* ------------------------------------------------------------------
*
* cachefs_cache_dirty
*
* Description:
* Marks the cache as dirty (active).
* Arguments:
* cachep the cachefscache_t to mark as dirty
* lockit 1 means grab contents lock, 0 means caller grabbed it
* Returns:
* Preconditions:
* precond(cachep)
* precond(cache is in rw mode)
*/
void
cachefs_cache_dirty(struct cachefscache *cachep, int lockit)
{
int error;
ASSERT((cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL)) == 0);
if (lockit) {
mutex_enter(&cachep->c_contentslock);
} else {
ASSERT(MUTEX_HELD(&cachep->c_contentslock));
}
if (cachep->c_flags & CACHE_DIRTY) {
ASSERT(cachep->c_usage.cu_flags & CUSAGE_ACTIVE);
} else {
/*
* turn on the "cache active" (dirty) flag and write it
* synchronously to disk
*/
cachep->c_flags |= CACHE_DIRTY;
cachep->c_usage.cu_flags |= CUSAGE_ACTIVE;
if (error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
(caddr_t)&cachep->c_usage, sizeof (struct cache_usage),
0LL, UIO_SYSSPACE, FSYNC, (rlim64_t)RLIM_INFINITY,
kcred, NULL)) {
cmn_err(CE_WARN,
"cachefs: clean flag write error: %d\n", error);
}
}
if (lockit)
mutex_exit(&cachep->c_contentslock);
}
/*
* ------------------------------------------------------------------
*
* cachefs_cache_rssync
*
* Description:
* Syncs out the resource file for the cachefscache_t object.
* Arguments:
* cachep the cachefscache_t object to operate on
* Returns:
* Returns 0 for success, !0 on an error writing data.
* Preconditions:
* precond(cachep)
* precond(cache is in rw mode)
*/
int
cachefs_cache_rssync(struct cachefscache *cachep)
{
int error;
ASSERT((cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL |
CACHE_ALLOC_PENDING)) == 0);
if (cachep->c_rl_entries != NULL) {
error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
(caddr_t)cachep->c_rl_entries, MAXBSIZE,
(offset_t)((cachep->c_rl_window + 1) * MAXBSIZE),
UIO_SYSSPACE, FSYNC, RLIM_INFINITY, kcred, NULL);
if (error)
cmn_err(CE_WARN, "cachefs: Can't Write rl entries Info\n");
cachefs_kmem_free(cachep->c_rl_entries, MAXBSIZE);
cachep->c_rl_entries = NULL;
}
/* write the usage struct for this cache */
error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
(caddr_t)&cachep->c_usage, sizeof (struct cache_usage),
0LL, UIO_SYSSPACE, 0, (rlim64_t)RLIM_INFINITY, kcred, NULL);
if (error) {
cmn_err(CE_WARN, "cachefs: Can't Write Cache Usage Info\n");
}
/* write the rlinfo for this cache */
error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
(caddr_t)&cachep->c_rlinfo, sizeof (cachefs_rl_info_t),
(offset_t)sizeof (struct cache_usage), UIO_SYSSPACE,
0, (rlim64_t)RLIM_INFINITY, kcred, NULL);
if (error) {
cmn_err(CE_WARN, "cachefs: Can't Write Cache RL Info\n");
}
error = VOP_FSYNC(cachep->c_resfilevp, FSYNC, kcred);
return (error);
}
/*
* ------------------------------------------------------------------
*
* cachefs_cache_sync
*
* Description:
* Sync a cache which includes all of its fscaches.
* Arguments:
* cachep the cachefscache_t object to sync
* Returns:
* Preconditions:
* precond(cachep)
* precond(cache is in rw mode)
*/
void
cachefs_cache_sync(struct cachefscache *cachep)
{
struct fscache *fscp;
struct fscache **syncfsc;
int nfscs, fscidx;
int try;
int done;
if (cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL))
return;
done = 0;
for (try = 0; (try < 2) && !done; try++) {
nfscs = 0;
/*
* here we turn off the cache-wide DIRTY flag. If it's still
* off when the sync completes we can write the clean flag to
* disk telling fsck it has no work to do.
*/
#ifdef CFSCLEANFLAG
mutex_enter(&cachep->c_contentslock);
cachep->c_flags &= ~CACHE_DIRTY;
mutex_exit(&cachep->c_contentslock);
#endif /* CFSCLEANFLAG */
cachefs_log_process_queue(cachep, 1);
mutex_enter(&cachep->c_fslistlock);
syncfsc = cachefs_kmem_alloc(
cachep->c_refcnt * sizeof (struct fscache *), KM_SLEEP);
for (fscp = cachep->c_fslist; fscp; fscp = fscp->fs_next) {
fscache_hold(fscp);
ASSERT(nfscs < cachep->c_refcnt);
syncfsc[nfscs++] = fscp;
}
ASSERT(nfscs == cachep->c_refcnt);
mutex_exit(&cachep->c_fslistlock);
for (fscidx = 0; fscidx < nfscs; fscidx++) {
fscp = syncfsc[fscidx];
fscache_sync(fscp);
fscache_rele(fscp);
}
/* get rid of any unused fscache objects */
mutex_enter(&cachep->c_fslistlock);
fscache_list_gc(cachep);
mutex_exit(&cachep->c_fslistlock);
/*
* here we check the cache-wide DIRTY flag.
* If it's off,
* we can write the clean flag to disk.
*/
#ifdef CFSCLEANFLAG
mutex_enter(&cachep->c_contentslock);
if ((cachep->c_flags & CACHE_DIRTY) == 0) {
if (cachep->c_usage.cu_flags & CUSAGE_ACTIVE) {
cachep->c_usage.cu_flags &= ~CUSAGE_ACTIVE;
if (cachefs_cache_rssync(cachep) == 0) {
done = 1;
} else {
cachep->c_usage.cu_flags |=
CUSAGE_ACTIVE;
}
} else {
done = 1;
}
}
mutex_exit(&cachep->c_contentslock);
#else /* CFSCLEANFLAG */
mutex_enter(&cachep->c_contentslock);
(void) cachefs_cache_rssync(cachep);
mutex_exit(&cachep->c_contentslock);
done = 1;
#endif /* CFSCLEANFLAG */
cachefs_kmem_free(syncfsc, nfscs * sizeof (struct fscache *));
}
}
/*
* ------------------------------------------------------------------
*
* cachefs_cache_unique
*
* Description:
* Arguments:
* Returns:
* Returns a unique number.
* Preconditions:
* precond(cachep)
*/
uint_t
cachefs_cache_unique(cachefscache_t *cachep)
{
uint_t unique = 0;
int error = 0;
mutex_enter(&cachep->c_contentslock);
if (cachep->c_usage.cu_flags & CUSAGE_NEED_ADJUST ||
++(cachep->c_unique) == 0) {
cachep->c_usage.cu_unique++;
if (cachep->c_unique == 0)
cachep->c_unique = 1;
cachep->c_flags &= ~CUSAGE_NEED_ADJUST;
error = cachefs_cache_rssync(cachep);
}
if (error == 0)
unique = (cachep->c_usage.cu_unique << 16) + cachep->c_unique;
mutex_exit(&cachep->c_contentslock);
return (unique);
}
/*
* Called from c_getfrontfile. Shouldn't be called from anywhere else !
*/
static int
cachefs_createfrontfile(cnode_t *cp, struct filegrp *fgp)
{
char name[CFS_FRONTFILE_NAME_SIZE];
struct vattr *attrp = NULL;
int error = 0;
int mode;
int alloc = 0;
int freefile = 0;
int ffrele = 0;
int rlfree = 0;
rl_entry_t rl_ent;
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_FRONT)
printf("c_createfrontfile: ENTER cp %p fgp %p\n",
(void *)cp, (void *)fgp);
#endif
ASSERT(cp->c_frontvp == NULL);
ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
/* quit if we cannot write to the filegrp */
if ((fgp->fg_flags & CFS_FG_WRITE) == 0) {
error = ENOENT;
goto out;
}
/* find or create the filegrp attrcache file if necessary */
if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
error = filegrp_allocattr(fgp);
if (error)
goto out;
}
make_ascii_name(&cp->c_id, name);
/* set up attributes for the front file we want to create */
attrp = cachefs_kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
alloc++;
attrp->va_mode = S_IFREG | 0666;
mode = 0666;
attrp->va_uid = 0;
attrp->va_gid = 0;
attrp->va_type = VREG;
attrp->va_size = 0;
attrp->va_mask = AT_SIZE | AT_TYPE | AT_MODE | AT_UID | AT_GID;
/* get a file from the resource counts */
error = cachefs_allocfile(fgp->fg_fscp->fs_cache);
if (error) {
error = EINVAL;
goto out;
}
freefile++;
/* create the metadata slot if necessary */
if (cp->c_flags & CN_ALLOC_PENDING) {
error = filegrp_create_metadata(fgp, &cp->c_metadata,
&cp->c_id);
if (error) {
error = EINVAL;
goto out;
}
cp->c_flags &= ~CN_ALLOC_PENDING;
cp->c_flags |= CN_UPDATED;
}
/* get an rl entry if necessary */
if (cp->c_metadata.md_rlno == 0) {
rl_ent.rl_fileno = cp->c_id.cid_fileno;
rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
rl_ent.rl_fsid = fgp->fg_fscp->fs_cfsid;
rl_ent.rl_attrc = 0;
error = cachefs_rl_alloc(fgp->fg_fscp->fs_cache, &rl_ent,
&cp->c_metadata.md_rlno);
if (error)
goto out;
cachefs_rlent_moveto(fgp->fg_fscp->fs_cache,
CACHEFS_RL_ACTIVE, cp->c_metadata.md_rlno,
cp->c_metadata.md_frontblks);
cp->c_metadata.md_rltype = CACHEFS_RL_ACTIVE;
rlfree++;
cp->c_flags |= CN_UPDATED; /* XXX sam: do we need this? */
/* increment number of front files */
error = filegrp_ffhold(fgp);
if (error) {
error = EINVAL;
goto out;
}
ffrele++;
}
if (cp->c_flags & CN_ASYNC_POP_WORKING) {
/* lookup the already created front file */
error = VOP_LOOKUP(fgp->fg_dirvp, name, &cp->c_frontvp,
NULL, 0, NULL, kcred);
} else {
/* create the front file */
error = VOP_CREATE(fgp->fg_dirvp, name, attrp, EXCL, mode,
&cp->c_frontvp, kcred, 0);
}
if (error) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_FRONT)
printf("c_createfrontfile: Can't create cached object"
" error %u, fileno %llx\n", error,
(u_longlong_t)cp->c_id.cid_fileno);
#endif
goto out;
}
/* get a copy of the fid of the front file */
cp->c_metadata.md_fid.fid_len = MAXFIDSZ;
error = VOP_FID(cp->c_frontvp, &cp->c_metadata.md_fid);
if (error) {
/*
* If we get back ENOSPC then the fid we passed in was too
* small. For now we don't do anything and map to EINVAL.
*/
if (error == ENOSPC) {
error = EINVAL;
}
goto out;
}
dnlc_purge_vp(cp->c_frontvp);
cp->c_metadata.md_flags |= MD_FILE;
cp->c_flags |= CN_UPDATED | CN_NEED_FRONT_SYNC;
out:
if (error) {
if (cp->c_frontvp) {
VN_RELE(cp->c_frontvp);
(void) VOP_REMOVE(fgp->fg_dirvp, name, kcred);
cp->c_frontvp = NULL;
}
if (ffrele)
filegrp_ffrele(fgp);
if (freefile)
cachefs_freefile(fgp->fg_fscp->fs_cache);
if (rlfree) {
#ifdef CFSDEBUG
cachefs_rlent_verify(fgp->fg_fscp->fs_cache,
CACHEFS_RL_ACTIVE, cp->c_metadata.md_rlno);
#endif /* CFSDEBUG */
cachefs_rlent_moveto(fgp->fg_fscp->fs_cache,
CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
cp->c_metadata.md_rlno = 0;
cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
}
cachefs_nocache(cp);
}
if (alloc)
cachefs_kmem_free(attrp, sizeof (struct vattr));
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_FRONT)
printf("c_createfrontfile: EXIT error = %d name %s\n", error,
name);
#endif
return (error);
}
/*
* Releases resources associated with the front file.
* Only call this routine if a ffhold has been done.
* Its okay to call this routine if the front file does not exist.
* Note: this routine is used even if there is no front file.
*/
void
cachefs_removefrontfile(cachefs_metadata_t *mdp, cfs_cid_t *cidp,
filegrp_t *fgp)
{
int error, enoent;
char name[CFS_FRONTFILE_NAME_SIZE + 2];
ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
enoent = 0;
if (mdp->md_flags & MD_FILE) {
if (fgp->fg_dirvp == NULL) {
cmn_err(CE_WARN, "cachefs: remove error, run fsck\n");
return;
}
make_ascii_name(cidp, name);
error = VOP_REMOVE(fgp->fg_dirvp, name, kcred);
if (error == ENOENT)
enoent = 1;
if ((error) && (error != ENOENT)) {
cmn_err(CE_WARN, "UFS remove error %s %d, run fsck\n",
name, error);
}
if (mdp->md_flags & MD_ACLDIR) {
(void) strcat(name, ".d");
error = VOP_RMDIR(fgp->fg_dirvp, name, fgp->fg_dirvp,
kcred);
if ((error) && (error != ENOENT)) {
cmn_err(CE_WARN, "frontfs rmdir error %s %d"
"; run fsck\n", name, error);
}
}
mdp->md_flags &= ~(MD_FILE | MD_POPULATED | MD_ACL | MD_ACLDIR);
bzero(&mdp->md_allocinfo, mdp->md_allocents *
sizeof (struct cachefs_allocmap));
cachefs_freefile(fgp->fg_fscp->fs_cache);
}
/*
* Clear packed bit, fastsymlinks and special files
* do not have a front file.
*/
mdp->md_flags &= ~MD_PACKED;
/* XXX either rename routine or move this to caller */
if (enoent == 0)
filegrp_ffrele(fgp);
if (mdp->md_frontblks) {
cachefs_freeblocks(fgp->fg_fscp->fs_cache, mdp->md_frontblks,
mdp->md_rltype);
mdp->md_frontblks = 0;
}
}
/*
* This is the interface to the rest of CFS. This takes a cnode, and returns
* the frontvp (stuffs it in the cnode). This creates an attrcache slot and
* and frontfile if necessary.
*/
int
cachefs_getfrontfile(cnode_t *cp)
{
struct filegrp *fgp = cp->c_filegrp;
int error;
struct vattr va;
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_SUBR)
printf("c_getfrontfile: ENTER cp %p\n", (void *)cp);
#endif
ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
ASSERT(MUTEX_HELD(&cp->c_statelock));
/*
* Now we check to see if there is a front file for this entry.
* If there is, we get the vnode for it and stick it in the cnode.
* Otherwise, we create a front file, get the vnode for it and stick
* it in the cnode.
*/
if (cp->c_flags & CN_STALE) {
cp->c_flags |= CN_NOCACHE;
error = ESTALE;
goto out;
}
/*
* If the cnode is being populated, and we're not the populating
* thread, then block until the pop thread completes. If we are the
* pop thread, then we may come in here, but not to nuke the directory
* cnode at a critical juncture. If we return from a cv_wait and the
* cnode is now stale, don't bother trying to get the front file.
*/
while ((cp->c_flags & CN_ASYNC_POP_WORKING) &&
(cp->c_popthrp != curthread)) {
cv_wait(&cp->c_popcv, &cp->c_statelock);
if (cp->c_flags & CN_STALE) {
cp->c_flags |= CN_NOCACHE;
error = ESTALE;
goto out;
}
}
if ((cp->c_metadata.md_flags & MD_FILE) == 0) {
#ifdef CFSDEBUG
if (cp->c_frontvp != NULL)
CFS_DEBUG(CFSDEBUG_FRONT)
printf(
"c_getfrontfile: !MD_FILE and frontvp not null cp %p\n",
(void *)cp);
#endif
if (CTOV(cp)->v_type == VDIR)
ASSERT((cp->c_metadata.md_flags & MD_POPULATED) == 0);
error = cachefs_createfrontfile(cp, fgp);
if (error)
goto out;
} else {
/*
* A front file exists, all we need to do is to grab the fid,
* do a VFS_VGET() on the fid, stuff the vnode in the cnode,
* and return.
*/
if (fgp->fg_dirvp == NULL) {
cmn_err(CE_WARN, "cachefs: gff0: corrupted file system"
" run fsck\n");
cachefs_inval_object(cp);
cp->c_flags |= CN_NOCACHE;
error = ESTALE;
goto out;
}
error = VFS_VGET(fgp->fg_dirvp->v_vfsp, &cp->c_frontvp,
&cp->c_metadata.md_fid);
if (error || (cp->c_frontvp == NULL)) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_FRONT)
printf("cachefs: "
"gff1: front file system error %d\n",
error);
#endif /* CFSDEBUG */
cachefs_inval_object(cp);
cp->c_flags |= CN_NOCACHE;
error = ESTALE;
goto out;
}
/* don't need to check timestamps if need_front_sync is set */
if (cp->c_flags & CN_NEED_FRONT_SYNC) {
error = 0;
goto out;
}
/* don't need to check empty directories */
if (CTOV(cp)->v_type == VDIR &&
((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
error = 0;
goto out;
}
/* get modify time of the front file */
va.va_mask = AT_MTIME;
error = VOP_GETATTR(cp->c_frontvp, &va, 0, kcred);
if (error) {
cmn_err(CE_WARN, "cachefs: gff2: front file"
" system error %d", error);
cachefs_inval_object(cp);
error = (cp->c_flags & CN_NOCACHE) ? ESTALE : 0;
goto out;
}
/* compare with modify time stored in metadata */
if (bcmp(&va.va_mtime, &cp->c_metadata.md_timestamp,
sizeof (timestruc_t)) != 0) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_GENERAL | CFSDEBUG_INVALIDATE) {
long sec, nsec;
sec = cp->c_metadata.md_timestamp.tv_sec;
nsec = cp->c_metadata.md_timestamp.tv_nsec;
printf("c_getfrontfile: timestamps don't"
" match fileno %lld va %lx %lx"
" meta %lx %lx\n",
(u_longlong_t)cp->c_id.cid_fileno,
va.va_mtime.tv_sec,
va.va_mtime.tv_nsec, sec, nsec);
}
#endif
cachefs_inval_object(cp);
error = (cp->c_flags & CN_NOCACHE) ? ESTALE : 0;
}
}
out:
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_FRONT)
printf("c_getfrontfile: EXIT error = %d\n", error);
#endif
return (error);
}
void
cachefs_inval_object(cnode_t *cp)
{
cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
struct filegrp *fgp = cp->c_filegrp;
int error;
ASSERT(CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)) == 0);
ASSERT(MUTEX_HELD(&cp->c_statelock));
ASSERT((cp->c_flags & CN_ASYNC_POP_WORKING) == 0 ||
cp->c_popthrp == curthread);
#if 0
CFS_DEBUG(CFSDEBUG_SUBR)
printf("c_inval_object: ENTER cp %p\n", (void *)cp);
if (cp->c_flags & (CN_ASYNC_POPULATE | CN_ASYNC_POP_WORKING))
debug_enter("inval object during async pop");
#endif
cp->c_flags |= CN_NOCACHE;
/* if we cannot modify the cache */
if (C_TO_FSCACHE(cp)->fs_cache->c_flags &
(CACHE_NOFILL | CACHE_NOCACHE)) {
goto out;
}
/* if there is a front file */
if (cp->c_metadata.md_flags & MD_FILE) {
if (fgp->fg_dirvp == NULL)
goto out;
/* get the front file vp if necessary */
if (cp->c_frontvp == NULL) {
error = VFS_VGET(fgp->fg_dirvp->v_vfsp, &cp->c_frontvp,
&cp->c_metadata.md_fid);
if (error || (cp->c_frontvp == NULL)) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_FRONT)
printf("cachefs: "
"io: front file error %d\n", error);
#endif /* CFSDEBUG */
goto out;
}
}
/* truncate the file to zero size */
error = cachefs_frontfile_size(cp, 0);
if (error)
goto out;
cp->c_flags &= ~CN_NOCACHE;
/* if a directory, v_type is zero if called from initcnode */
if (cp->c_attr.va_type == VDIR) {
if (cp->c_usage < CFS_DIRCACHE_COST) {
cp->c_invals++;
if (cp->c_invals > CFS_DIRCACHE_INVAL) {
cp->c_invals = 0;
}
} else
cp->c_invals = 0;
cp->c_usage = 0;
}
} else {
cp->c_flags &= ~CN_NOCACHE;
}
out:
if ((cp->c_metadata.md_flags & MD_PACKED) &&
(cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) &&
((cachep->c_flags & CACHE_NOFILL) == 0)) {
ASSERT(cp->c_metadata.md_rlno != 0);
if (cp->c_metadata.md_rltype != CACHEFS_RL_PACKED_PENDING) {
cachefs_rlent_moveto(cachep,
CACHEFS_RL_PACKED_PENDING,
cp->c_metadata.md_rlno,
cp->c_metadata.md_frontblks);
cp->c_metadata.md_rltype = CACHEFS_RL_PACKED_PENDING;
/* unconditionally set CN_UPDATED below */
}
}
cachefs_purgeacl(cp);
if (cp->c_flags & CN_ASYNC_POP_WORKING)
cp->c_flags |= CN_NOCACHE;
cp->c_metadata.md_flags &= ~(MD_POPULATED | MD_INVALREADDIR |
MD_FASTSYMLNK);
cp->c_flags &= ~CN_NEED_FRONT_SYNC;
cp->c_flags |= CN_UPDATED;
/*
* If the object invalidated is a directory, the dnlc should be purged
* to elide all references to this (directory) vnode.
*/
if (CTOV(cp)->v_type == VDIR)
dnlc_purge_vp(CTOV(cp));
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_SUBR)
printf("c_inval_object: EXIT\n");
#endif
}
void
make_ascii_name(cfs_cid_t *cidp, char *strp)
{
int i = sizeof (uint_t) * 4;
u_longlong_t index;
ino64_t name;
if (cidp->cid_flags & CFS_CID_LOCAL)
*strp++ = 'L';
name = (ino64_t)cidp->cid_fileno;
do {
index = (((u_longlong_t)name) & 0xf000000000000000) >> 60;
index &= (u_longlong_t)0xf;
ASSERT(index < (u_longlong_t)16);
*strp++ = "0123456789abcdef"[index];
name <<= 4;
} while (--i);
*strp = '\0';
}
void
cachefs_nocache(cnode_t *cp)
{
fscache_t *fscp = C_TO_FSCACHE(cp);
cachefscache_t *cachep = fscp->fs_cache;
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_SUBR)
printf("c_nocache: ENTER cp %p\n", (void *)cp);
#endif
ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
ASSERT(MUTEX_HELD(&cp->c_statelock));
if ((cp->c_flags & CN_NOCACHE) == 0) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_INVALIDATE)
printf("cachefs_nocache: invalidating %llu\n",
(u_longlong_t)cp->c_id.cid_fileno);
#endif
/*
* Here we are waiting until inactive time to do
* the inval_object. In case we don't get to inactive
* (because of a crash, say) we set up a timestamp mismatch
* such that getfrontfile will blow the front file away
* next time we try to use it.
*/
cp->c_metadata.md_timestamp.tv_sec = 0;
cp->c_metadata.md_timestamp.tv_nsec = 0;
cp->c_metadata.md_flags &= ~(MD_POPULATED | MD_INVALREADDIR |
MD_FASTSYMLNK);
cp->c_flags &= ~CN_NEED_FRONT_SYNC;
cachefs_purgeacl(cp);
/*
* It is possible we can nocache while disconnected.
* A directory could be nocached by running out of space.
* A regular file should only be nocached if an I/O error
* occurs to the front fs.
* We count on the item staying on the modified list
* so we do not loose the cid to fid mapping for directories.
*/
if ((cp->c_metadata.md_flags & MD_PACKED) &&
(cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) &&
((cachep->c_flags & CACHE_NOFILL) == 0)) {
ASSERT(cp->c_metadata.md_rlno != 0);
if (cp->c_metadata.md_rltype !=
CACHEFS_RL_PACKED_PENDING) {
cachefs_rlent_moveto(cachep,
CACHEFS_RL_PACKED_PENDING,
cp->c_metadata.md_rlno,
cp->c_metadata.md_frontblks);
cp->c_metadata.md_rltype =
CACHEFS_RL_PACKED_PENDING;
/* unconditionally set CN_UPDATED below */
}
}
if (CTOV(cp)->v_type == VDIR)
dnlc_purge_vp(CTOV(cp));
cp->c_flags |= (CN_NOCACHE | CN_UPDATED);
}
if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_NOCACHE))
cachefs_log_nocache(cachep, 0, fscp->fs_cfsvfsp,
&cp->c_metadata.md_cookie, cp->c_id.cid_fileno);
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_SUBR)
printf("c_nocache: EXIT cp %p\n", (void *)cp);
#endif
}
/*
* Checks to see if the page is in the disk cache, by checking the allocmap.
*/
int
cachefs_check_allocmap(cnode_t *cp, u_offset_t off)
{
int i;
size_t dbl_size_to_look = cp->c_attr.va_size - off;
uint_t size_to_look;
if (dbl_size_to_look > (u_offset_t)PAGESIZE)
size_to_look = (uint_t)PAGESIZE;
else
/*LINTED alignment okay*/
size_to_look = (uint_t)dbl_size_to_look;
for (i = 0; i < cp->c_metadata.md_allocents; i++) {
struct cachefs_allocmap *allocp =
cp->c_metadata.md_allocinfo + i;
if (off >= allocp->am_start_off) {
if ((off + size_to_look) <=
(allocp->am_start_off + allocp->am_size)) {
struct fscache *fscp = C_TO_FSCACHE(cp);
cachefscache_t *cachep = fscp->fs_cache;
if (CACHEFS_LOG_LOGGING(cachep,
CACHEFS_LOG_CALLOC))
cachefs_log_calloc(cachep, 0,
fscp->fs_cfsvfsp,
&cp->c_metadata.md_cookie,
cp->c_id.cid_fileno,
off, size_to_look);
/*
* Found the page in the CFS disk cache.
*/
return (1);
}
} else {
return (0);
}
}
return (0);
}
/*
* Merges adjacent allocmap entries together where possible, e.g.
* offset=0x0, size=0x40000
* offset=0x40000, size=0x20000 becomes just offset=0x0, size-0x90000
* offset=0x60000, size=0x30000
*/
void
cachefs_coalesce_allocmap(struct cachefs_metadata *cmd)
{
int i, reduced = 0;
struct cachefs_allocmap *allocp, *nallocp;
nallocp = allocp = cmd->md_allocinfo;
allocp++;
for (i = 1; i < cmd->md_allocents; i++, allocp++) {
if (nallocp->am_start_off + nallocp->am_size ==
allocp->am_start_off) {
nallocp->am_size += allocp->am_size;
reduced++;
} else {
nallocp++;
nallocp->am_start_off = allocp->am_start_off;
nallocp->am_size = allocp->am_size;
}
}
cmd->md_allocents -= reduced;
}
/*
* Updates the allocmap to reflect a new chunk of data that has been
* populated.
*/
void
cachefs_update_allocmap(cnode_t *cp, u_offset_t off, size_t size)
{
int i;
struct cachefs_allocmap *allocp;
struct fscache *fscp = C_TO_FSCACHE(cp);
cachefscache_t *cachep = fscp->fs_cache;
u_offset_t saveoff;
u_offset_t savesize;
u_offset_t logoff = off;
size_t logsize = size;
u_offset_t endoff;
u_offset_t tmpendoff;
/*
* We try to see if we can coalesce the current block into an existing
* allocation and mark it as such.
* If we can't do that then we make a new entry in the allocmap.
* when we run out of allocmaps, put the cnode in NOCACHE mode.
*/
again:
allocp = cp->c_metadata.md_allocinfo;
for (i = 0; i < cp->c_metadata.md_allocents; i++, allocp++) {
if (off <= (allocp->am_start_off)) {
endoff = off + size;
if (endoff >= allocp->am_start_off) {
tmpendoff = allocp->am_start_off +
allocp->am_size;
if (endoff < tmpendoff)
endoff = tmpendoff;
allocp->am_size = endoff - off;
allocp->am_start_off = off;
cachefs_coalesce_allocmap(&cp->c_metadata);
allocp = cp->c_metadata.md_allocinfo;
if (allocp->am_size >= cp->c_size)
cp->c_metadata.md_flags |= MD_POPULATED;
return;
} else {
saveoff = off;
savesize = size;
off = allocp->am_start_off;
size = allocp->am_size;
allocp->am_size = savesize;
allocp->am_start_off = saveoff;
goto again;
}
} else {
endoff = allocp->am_start_off + allocp->am_size;
if (off < endoff) {
tmpendoff = off + size;
if (endoff < tmpendoff)
endoff = tmpendoff;
allocp->am_size = endoff - allocp->am_start_off;
cachefs_coalesce_allocmap(&cp->c_metadata);
allocp = cp->c_metadata.md_allocinfo;
if (allocp->am_size >= cp->c_size)
cp->c_metadata.md_flags |= MD_POPULATED;
return;
}
if (off == (allocp->am_start_off + allocp->am_size)) {
allocp->am_size += size;
cachefs_coalesce_allocmap(&cp->c_metadata);
allocp = cp->c_metadata.md_allocinfo;
if (allocp->am_size >= cp->c_size)
cp->c_metadata.md_flags |= MD_POPULATED;
return;
}
}
}
if (i == C_MAX_ALLOCINFO_SLOTS) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_ALLOCMAP)
printf("c_update_alloc_map: "
"Too many allinfo entries cp %p fileno %llu %p\n",
(void *)cp, (u_longlong_t)cp->c_id.cid_fileno,
(void *)cp->c_metadata.md_allocinfo);
#endif
cachefs_nocache(cp);
return;
}
allocp->am_start_off = off;
allocp->am_size = (u_offset_t)size;
if (allocp->am_size >= cp->c_size)
cp->c_metadata.md_flags |= MD_POPULATED;
cp->c_metadata.md_allocents++;
if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_UALLOC))
cachefs_log_ualloc(cachep, 0, fscp->fs_cfsvfsp,
&cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
logoff, logsize);
}
/*
* CFS population function
*
* before async population, this function used to turn on the cnode
* flags CN_UPDATED, CN_NEED_FRONT_SYNC, and CN_POPULATION_PENDING.
* now, however, it's the responsibility of the caller to do this if
* this function returns 0 (no error).
*/
int
cachefs_populate(cnode_t *cp, u_offset_t off, size_t popsize, vnode_t *frontvp,
vnode_t *backvp, u_offset_t cpsize, cred_t *cr)
{
int error = 0;
caddr_t addr;
u_offset_t upto;
uint_t size;
u_offset_t from = off;
cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
ssize_t resid;
struct fbuf *fbp;
caddr_t buf = kmem_alloc(MAXBSIZE, KM_SLEEP);
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_VOPS)
printf("cachefs_populate: ENTER cp %p off %lld\n",
(void *)cp, off);
#endif
upto = MIN((off + popsize), cpsize);
while (from < upto) {
u_offset_t blkoff = (from & (offset_t)MAXBMASK);
uint_t n = from - blkoff;
size = upto - from;
if (upto > (blkoff + MAXBSIZE))
size = MAXBSIZE - n;
error = fbread(backvp, (offset_t)blkoff, n + size,
S_OTHER, &fbp);
if (CFS_TIMEOUT(C_TO_FSCACHE(cp), error))
goto out;
else if (error) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_BACK)
printf("cachefs_populate: fbread error %d\n",
error);
#endif
goto out;
}
addr = fbp->fb_addr;
ASSERT(addr != NULL);
ASSERT(n + size <= MAXBSIZE);
bcopy(addr, buf, n + size);
fbrelse(fbp, S_OTHER);
if (n == 0 || cachefs_check_allocmap(cp, blkoff) == 0) {
if (error = cachefs_allocblocks(cachep, 1,
cp->c_metadata.md_rltype))
goto out;
cp->c_metadata.md_frontblks++;
}
resid = 0;
error = vn_rdwr(UIO_WRITE, frontvp, buf + n, size,
(offset_t)from, UIO_SYSSPACE, 0,
(rlim64_t)RLIM64_INFINITY, cr, &resid);
if (error) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_FRONT)
printf("cachefs_populate: "
"Got error = %d from vn_rdwr\n", error);
#endif
goto out;
}
#ifdef CFSDEBUG
if (resid)
CFS_DEBUG(CFSDEBUG_FRONT)
printf("cachefs_populate: non-zero resid %ld\n",
resid);
#endif
from += size;
}
(void) cachefs_update_allocmap(cp, off, upto - off);
out:
if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_POPULATE))
cachefs_log_populate(cachep, error,
C_TO_FSCACHE(cp)->fs_cfsvfsp,
&cp->c_metadata.md_cookie, cp->c_id.cid_fileno, off,
popsize);
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_VOPS)
printf("cachefs_populate: EXIT cp %p error %d\n",
(void *)cp, error);
#endif
kmem_free(buf, MAXBSIZE);
return (error);
}
/*
* due to compiler error we shifted cnode to the last argument slot.
* occured during large files project - XXX.
*/
void
cachefs_cluster_allocmap(u_offset_t off, u_offset_t *popoffp,
size_t *popsizep, size_t size, struct cnode *cp)
{
int i;
u_offset_t lastoff = 0;
u_offset_t forward_diff = 0;
u_offset_t backward_diff = 0;
ASSERT(size <= C_TO_FSCACHE(cp)->fs_info.fi_popsize);
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_SUBR)
printf("cachefs_cluster_allocmap: off %llx, size %llx, "
"c_size %llx\n", off, size, (longlong_t)cp->c_size);
#endif /* CFSDEBUG */
for (i = 0; i < cp->c_metadata.md_allocents; i++) {
struct cachefs_allocmap *allocp =
cp->c_metadata.md_allocinfo + i;
if (allocp->am_start_off > off) {
if ((off + size) > allocp->am_start_off) {
forward_diff = allocp->am_start_off - off;
backward_diff = size - forward_diff;
if (backward_diff > off)
backward_diff = off;
if (lastoff > (off - backward_diff))
backward_diff = off - lastoff;
} else {
forward_diff = size;
}
*popoffp = (off - backward_diff) & (offset_t)PAGEMASK;
*popsizep = ((off + forward_diff) - *popoffp) &
(offset_t)PAGEMASK;
return;
} else {
lastoff = allocp->am_start_off + allocp->am_size;
}
}
if ((lastoff + size) > off) {
*popoffp = (lastoff & (offset_t)PAGEMASK);
} else {
*popoffp = off & (offset_t)PAGEMASK;
}
/*
* 64bit project: popsize is the chunk size used to populate the
* cache (default 64K). As such, 32 bit should suffice.
*/
if ((*popoffp + size) > cp->c_size)
*popsizep = (cp->c_size - *popoffp + PAGEOFFSET) &
(offset_t)PAGEMASK;
else if (size < PAGESIZE)
*popsizep = (size + PAGEOFFSET) &
(offset_t)PAGEMASK;
else
*popsizep = size & (offset_t)PAGEMASK;
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_SUBR)
printf("cachefs_cluster_allocmap: popoff %llx, popsize %llx\n",
(u_longlong_t)(*popoffp), (u_longlong_t)(*popsizep));
#endif /* CFSDEBUG */
}
/*
* "populate" a symlink in the cache
*/
int
cachefs_stuffsymlink(cnode_t *cp, caddr_t buf, int buflen)
{
int error = 0;
struct fscache *fscp = C_TO_FSCACHE(cp);
cachefscache_t *cachep = fscp->fs_cache;
struct cachefs_metadata *mdp = &cp->c_metadata;
ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
ASSERT(MUTEX_HELD(&cp->c_statelock));
if (CFS_ISFS_BACKFS_NFSV4(fscp))
goto out;
if (cp->c_flags & CN_NOCACHE)
return (ENOENT);
cp->c_size = (u_offset_t)buflen;
/* if can create a fast sym link */
if (buflen <= C_FSL_SIZE) {
/* give up the front file resources */
if (mdp->md_rlno) {
cachefs_removefrontfile(mdp, &cp->c_id, cp->c_filegrp);
cachefs_rlent_moveto(cachep, CACHEFS_RL_FREE,
mdp->md_rlno, 0);
mdp->md_rlno = 0;
mdp->md_rltype = CACHEFS_RL_NONE;
}
/* put sym link contents in allocinfo in metadata */
bzero(mdp->md_allocinfo, C_FSL_SIZE);
bcopy(buf, mdp->md_allocinfo, buflen);
mdp->md_flags |= MD_FASTSYMLNK;
cp->c_flags &= ~CN_NEED_FRONT_SYNC;
cp->c_flags |= CN_UPDATED;
goto out;
}
/* else create a sym link in a front file */
if (cp->c_frontvp == NULL)
error = cachefs_getfrontfile(cp);
if (error)
goto out;
/* truncate front file */
error = cachefs_frontfile_size(cp, 0);
mdp->md_flags &= ~(MD_FASTSYMLNK | MD_POPULATED);
if (error)
goto out;
/* get space for the sym link */
error = cachefs_allocblocks(cachep, 1, cp->c_metadata.md_rltype);
if (error)
goto out;
/* write the sym link to the front file */
error = vn_rdwr(UIO_WRITE, cp->c_frontvp, buf, buflen, 0,
UIO_SYSSPACE, 0, RLIM_INFINITY, kcred, NULL);
if (error) {
cachefs_freeblocks(cachep, 1, cp->c_metadata.md_rltype);
goto out;
}
cp->c_metadata.md_flags |= MD_POPULATED;
cp->c_flags |= CN_NEED_FRONT_SYNC;
cp->c_flags |= CN_UPDATED;
out:
if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CSYMLINK))
cachefs_log_csymlink(cachep, error, fscp->fs_cfsvfsp,
&cp->c_metadata.md_cookie, cp->c_id.cid_fileno, buflen);
return (error);
}
/*
* Reads the full contents of the symbolic link from the back file system.
* *bufp is set to a MAXPATHLEN buffer that must be freed when done
* *buflenp is the length of the link
*/
int
cachefs_readlink_back(cnode_t *cp, cred_t *cr, caddr_t *bufp, int *buflenp)
{
int error;
struct uio uio;
struct iovec iov;
caddr_t buf;
fscache_t *fscp = C_TO_FSCACHE(cp);
ASSERT(MUTEX_HELD(&cp->c_statelock));
*bufp = NULL;
/* get back vnode */
if (cp->c_backvp == NULL) {
error = cachefs_getbackvp(fscp, cp);
if (error)
return (error);
}
/* set up for the readlink */
bzero(&uio, sizeof (struct uio));
bzero(&iov, sizeof (struct iovec));
buf = cachefs_kmem_alloc(MAXPATHLEN, KM_SLEEP);
iov.iov_base = buf;
iov.iov_len = MAXPATHLEN;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_resid = MAXPATHLEN;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_loffset = 0;
uio.uio_fmode = 0;
uio.uio_extflg = UIO_COPY_CACHED;
uio.uio_llimit = MAXOFFSET_T;
/* get the link data */
CFS_DPRINT_BACKFS_NFSV4(fscp,
("cachefs_readlink (nfsv4): cnode %p, backvp %p\n",
cp, cp->c_backvp));
error = VOP_READLINK(cp->c_backvp, &uio, cr);
if (error) {
cachefs_kmem_free(buf, MAXPATHLEN);
} else {
*bufp = buf;
/*LINTED alignment okay*/
*buflenp = MAXPATHLEN - (int)uio.uio_resid;
}
return (error);
}
int
cachefs_getbackvp(struct fscache *fscp, struct cnode *cp)
{
int error = 0;
int flag;
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_CHEAT | CFSDEBUG_BACK)
printf("cachefs_getbackvp: ENTER fscp %p cp %p\n",
(void *)fscp, (void *)cp);
#endif
ASSERT(cp != NULL);
ASSERT(MUTEX_HELD(&cp->c_statelock));
ASSERT(cp->c_backvp == NULL);
ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
/*
* If destroy is set then the last link to a file has been
* removed. Oddly enough NFS will still return a vnode
* for the file if the timeout has not expired.
* This causes headaches for cachefs_push because the
* vnode is really stale.
* So we just short circuit the problem here.
*/
if (cp->c_flags & CN_DESTROY)
return (ESTALE);
ASSERT(fscp->fs_backvfsp);
if (fscp->fs_backvfsp == NULL)
return (ETIMEDOUT);
error = VFS_VGET(fscp->fs_backvfsp, &cp->c_backvp,
(struct fid *)&cp->c_cookie);
if (cp->c_backvp && cp->c_cred &&
((cp->c_flags & CN_NEEDOPEN) || (cp->c_attr.va_type == VREG))) {
/*
* XXX bob: really should pass in the correct flag,
* fortunately nobody pays attention to it
*/
flag = 0;
/*
* If NEEDOOPEN is set, then this file was opened VOP_OPEN'd
* but the backvp was not. So, for the sake of the vnode
* open counts used by delegation, we need to OPEN the backvp
* with the same flags that were used for this cnode. That way
* when the file is VOP_CLOSE'd the counts won't go negative.
*/
if (cp->c_flags & CN_NEEDOPEN) {
cp->c_flags &= ~CN_NEEDOPEN;
if (cp->c_rdcnt > 0) {
cp->c_rdcnt--;
flag |= FREAD;
}
if (cp->c_wrcnt > 0) {
cp->c_wrcnt--;
flag |= FWRITE;
}
}
error = VOP_OPEN(&cp->c_backvp, flag, cp->c_cred);
if (error) {
VN_RELE(cp->c_backvp);
cp->c_backvp = NULL;
}
}
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_GENERAL | CFSDEBUG_BACK) {
if (error || cp->c_backvp == NULL) {
printf("Stale cookie cp %p fileno %llu type %d \n",
(void *)cp, (u_longlong_t)cp->c_id.cid_fileno,
CTOV(cp)->v_type);
}
}
#endif
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_CHEAT | CFSDEBUG_BACK)
printf("cachefs_getbackvp: EXIT error = %d\n", error);
#endif
return (error);
}
int
cachefs_getcookie(
vnode_t *vp,
struct fid *cookiep,
struct vattr *attrp,
cred_t *cr,
uint32_t valid_fid)
{
int error = 0;
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_CHEAT)
printf("cachefs_getcookie: ENTER vp %p\n", (void *)vp);
#endif
/*
* Get the FID only if the caller has indicated it is valid,
* otherwise, zero the cookie.
*/
if (valid_fid) {
/*
* This assumes that the cookie is a full size fid, if we go to
* variable length fids we will need to change this.
*/
cookiep->fid_len = MAXFIDSZ;
error = VOP_FID(vp, cookiep);
} else {
bzero(cookiep, sizeof (*cookiep));
}
if (!error) {
if (attrp) {
ASSERT(attrp != NULL);
attrp->va_mask = AT_ALL;
error = VOP_GETATTR(vp, attrp, 0, cr);
}
} else {
if (error == ENOSPC) {
/*
* This is an indication that the underlying filesystem
* needs a bigger fid. For now just map to EINVAL.
*/
error = EINVAL;
}
}
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_CHEAT)
printf("cachefs_getcookie: EXIT error = %d\n", error);
#endif
return (error);
}
void
cachefs_workq_init(struct cachefs_workq *qp)
{
qp->wq_head = qp->wq_tail = NULL;
qp->wq_length =
qp->wq_thread_count =
qp->wq_max_len =
qp->wq_halt_request = 0;
qp->wq_keepone = 0;
cv_init(&qp->wq_req_cv, NULL, CV_DEFAULT, NULL);
cv_init(&qp->wq_halt_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&qp->wq_queue_lock, NULL, MUTEX_DEFAULT, NULL);
}
/*
* return non-zero if it's `okay' to queue more requests (policy)
*/
static int cachefs_async_max = 512;
static int cachefs_async_count = 0;
kmutex_t cachefs_async_lock;
int
cachefs_async_okay(void)
{
/*
* a value of -1 for max means to ignore freemem
*/
if (cachefs_async_max == -1)
return (1);
if (freemem < minfree)
return (0);
/*
* a value of 0 for max means no arbitrary limit (only `freemen')
*/
if (cachefs_async_max == 0)
return (1);
ASSERT(cachefs_async_max > 0);
/*
* check the global count against the max.
*
* we don't need to grab cachefs_async_lock -- we're just
* looking, and a little bit of `fuzz' is okay.
*/
if (cachefs_async_count >= cachefs_async_max)
return (0);
return (1);
}
void
cachefs_async_start(struct cachefs_workq *qp)
{
struct cachefs_req *rp;
int left;
callb_cpr_t cprinfo;
CALLB_CPR_INIT(&cprinfo, &qp->wq_queue_lock, callb_generic_cpr, "cas");
mutex_enter(&qp->wq_queue_lock);
left = 1;
for (;;) {
/* if there are no pending requests */
if ((qp->wq_head == NULL) && (qp->wq_logwork == 0)) {
/* see if thread should exit */
if (qp->wq_halt_request || (left == -1)) {
if ((qp->wq_thread_count > 1) ||
(qp->wq_keepone == 0))
break;
}
/* wake up thread in async_halt if necessary */
if (qp->wq_halt_request)
cv_broadcast(&qp->wq_halt_cv);
CALLB_CPR_SAFE_BEGIN(&cprinfo);
/* sleep until there is something to do */
left = cv_timedwait(&qp->wq_req_cv,
&qp->wq_queue_lock, CFS_ASYNC_TIMEOUT + lbolt);
CALLB_CPR_SAFE_END(&cprinfo,
&qp->wq_queue_lock);
if ((qp->wq_head == NULL) && (qp->wq_logwork == 0))
continue;
}
left = 1;
if (qp->wq_logwork) {
qp->wq_logwork = 0;
mutex_exit(&qp->wq_queue_lock);
cachefs_log_process_queue(qp->wq_cachep, 1);
mutex_enter(&qp->wq_queue_lock);
continue;
}
/* remove request from the list */
rp = qp->wq_head;
qp->wq_head = rp->cfs_next;
if (rp->cfs_next == NULL)
qp->wq_tail = NULL;
/* do the request */
mutex_exit(&qp->wq_queue_lock);
cachefs_do_req(rp);
mutex_enter(&qp->wq_queue_lock);
/* decrement count of requests */
qp->wq_length--;
mutex_enter(&cachefs_async_lock);
--cachefs_async_count;
mutex_exit(&cachefs_async_lock);
}
ASSERT(qp->wq_head == NULL);
qp->wq_thread_count--;
if (qp->wq_halt_request && qp->wq_thread_count == 0)
cv_broadcast(&qp->wq_halt_cv);
CALLB_CPR_EXIT(&cprinfo);
thread_exit();
/*NOTREACHED*/
}
/*
* attempt to halt all the async threads associated with a given workq
*/
int
cachefs_async_halt(struct cachefs_workq *qp, int force)
{
int error = 0;
clock_t tend;
mutex_enter(&qp->wq_queue_lock);
if (force)
qp->wq_keepone = 0;
if (qp->wq_thread_count > 0) {
qp->wq_halt_request++;
cv_broadcast(&qp->wq_req_cv);
tend = lbolt + (60 * hz);
(void) cv_timedwait(&qp->wq_halt_cv,
&qp->wq_queue_lock, tend);
qp->wq_halt_request--;
if (qp->wq_thread_count > 0) {
if ((qp->wq_thread_count == 1) &&
(qp->wq_length == 0) && qp->wq_keepone)
error = EAGAIN;
else
error = EBUSY;
} else {
ASSERT(qp->wq_length == 0 && qp->wq_head == NULL);
}
}
mutex_exit(&qp->wq_queue_lock);
return (error);
}
void
cachefs_addqueue(struct cachefs_req *rp, struct cachefs_workq *qp)
{
mutex_enter(&qp->wq_queue_lock);
if (qp->wq_thread_count < cachefs_max_threads) {
if (qp->wq_thread_count == 0 ||
(qp->wq_length >= (qp->wq_thread_count * 2))) {
(void) thread_create(NULL, 0, cachefs_async_start,
qp, 0, &p0, TS_RUN, minclsyspri);
qp->wq_thread_count++;
}
}
mutex_enter(&rp->cfs_req_lock);
if (qp->wq_tail)
qp->wq_tail->cfs_next = rp;
else
qp->wq_head = rp;
qp->wq_tail = rp;
rp->cfs_next = NULL;
qp->wq_length++;
if (qp->wq_length > qp->wq_max_len)
qp->wq_max_len = qp->wq_length;
mutex_enter(&cachefs_async_lock);
++cachefs_async_count;
mutex_exit(&cachefs_async_lock);
cv_signal(&qp->wq_req_cv);
mutex_exit(&rp->cfs_req_lock);
mutex_exit(&qp->wq_queue_lock);
}
void
cachefs_async_putpage(struct cachefs_putpage_req *prp, cred_t *cr)
{
struct cnode *cp = VTOC(prp->cp_vp);
ASSERT(CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)) == 0);
(void) VOP_PUTPAGE(prp->cp_vp, prp->cp_off, prp->cp_len,
prp->cp_flags, cr);
mutex_enter(&cp->c_iomutex);
if (--cp->c_nio == 0)
cv_broadcast(&cp->c_iocv);
if (prp->cp_off == 0 && prp->cp_len == 0 &&
(cp->c_ioflags & CIO_PUTPAGES)) {
cp->c_ioflags &= ~CIO_PUTPAGES;
}
mutex_exit(&cp->c_iomutex);
}
void
cachefs_async_populate(struct cachefs_populate_req *pop, cred_t *cr)
{
struct cnode *cp = VTOC(pop->cpop_vp);
struct fscache *fscp = C_TO_FSCACHE(cp);
struct filegrp *fgp = cp->c_filegrp;
int error = 0; /* not returned -- used as a place-holder */
vnode_t *frontvp = NULL, *backvp = NULL;
int havelock = 0;
vattr_t va;
ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
if (((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0) ||
(fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
mutex_enter(&cp->c_statelock);
cp->c_flags &= ~CN_ASYNC_POPULATE;
mutex_exit(&cp->c_statelock);
return; /* goto out */
}
error = cachefs_cd_access(fscp, 0, 0);
if (error) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_ASYNCPOP)
printf("async_pop: cd_access: err %d con %d\n",
error, fscp->fs_cdconnected);
#endif /* CFSDEBUG */
mutex_enter(&cp->c_statelock);
cp->c_flags &= ~CN_ASYNC_POPULATE;
mutex_exit(&cp->c_statelock);
return; /* goto out */
}
/*
* grab the statelock for some minimal things
*/
rw_enter(&cp->c_rwlock, RW_WRITER);
mutex_enter(&cp->c_statelock);
havelock = 1;
if ((cp->c_flags & CN_ASYNC_POPULATE) == 0)
goto out;
/* there can be only one */
ASSERT((cp->c_flags & CN_ASYNC_POP_WORKING) == 0);
cp->c_flags |= CN_ASYNC_POP_WORKING;
cp->c_popthrp = curthread;
if (cp->c_metadata.md_flags & MD_POPULATED)
goto out;
if (cp->c_flags & CN_NOCACHE) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_ASYNCPOP)
printf("cachefs_async_populate: nocache bit on\n");
#endif /* CFSDEBUG */
error = EINVAL;
goto out;
}
if (cp->c_frontvp == NULL) {
if ((cp->c_metadata.md_flags & MD_FILE) == 0) {
struct cfs_cid cid = cp->c_id;
mutex_exit(&cp->c_statelock);
havelock = 0;
/*
* if frontfile doesn't exist, drop the lock
* to do some of the file creation stuff.
*/
if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
error = filegrp_allocattr(fgp);
if (error != 0)
goto out;
}
if (fgp->fg_flags & CFS_FG_ALLOC_FILE) {
mutex_enter(&fgp->fg_mutex);
if (fgp->fg_flags & CFS_FG_ALLOC_FILE) {
if (fgp->fg_header->ach_nffs == 0)
error = filegrpdir_create(fgp);
else
error = filegrpdir_find(fgp);
if (error != 0) {
mutex_exit(&fgp->fg_mutex);
goto out;
}
}
mutex_exit(&fgp->fg_mutex);
}
if (fgp->fg_dirvp != NULL) {
char name[CFS_FRONTFILE_NAME_SIZE];
struct vattr *attrp;
attrp = cachefs_kmem_zalloc(
sizeof (struct vattr), KM_SLEEP);
attrp->va_mode = S_IFREG | 0666;
attrp->va_uid = 0;
attrp->va_gid = 0;
attrp->va_type = VREG;
attrp->va_size = 0;
attrp->va_mask =
AT_SIZE | AT_TYPE | AT_MODE |
AT_UID | AT_GID;
make_ascii_name(&cid, name);
(void) VOP_CREATE(fgp->fg_dirvp, name, attrp,
EXCL, 0666, &frontvp, kcred, 0);
cachefs_kmem_free(attrp,
sizeof (struct vattr));
}
mutex_enter(&cp->c_statelock);
havelock = 1;
}
error = cachefs_getfrontfile(cp);
ASSERT((error != 0) ||
(frontvp == NULL) ||
(frontvp == cp->c_frontvp));
}
if ((error != 0) || (cp->c_frontvp == NULL))
goto out;
if (frontvp != NULL)
VN_RELE(frontvp);
frontvp = cp->c_frontvp;
VN_HOLD(frontvp);
if (cp->c_backvp == NULL) {
error = cachefs_getbackvp(fscp, cp);
if ((error != 0) || (cp->c_backvp == NULL))
goto out;
}
backvp = cp->c_backvp;
VN_HOLD(backvp);
switch (pop->cpop_vp->v_type) {
case VREG:
mutex_exit(&cp->c_statelock);
havelock = 0;
error = cachefs_async_populate_reg(pop, cr, backvp, frontvp);
break;
case VDIR:
error = cachefs_async_populate_dir(pop, cr, backvp, frontvp);
mutex_exit(&cp->c_statelock);
havelock = 0;
break;
default:
#ifdef CFSDEBUG
printf("cachefs_async_populate: warning: vnode type = %d\n",
pop->cpop_vp->v_type);
ASSERT(0);
#endif /* CFSDEBUG */
error = EINVAL;
break;
}
if (error != 0)
goto out;
error = VOP_FSYNC(frontvp, FSYNC, cr);
if (error != 0) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_ASYNCPOP)
printf("cachefs_async_populate: fsync\n");
#endif /* CFSDEBUG */
goto out;
}
/* grab the lock and finish up */
mutex_enter(&cp->c_statelock);
havelock = 1;
/* if went nocache while lock was dropped, get out */
if ((cp->c_flags & CN_NOCACHE) || (cp->c_frontvp == NULL)) {
error = EINVAL;
goto out;
}
va.va_mask = AT_MTIME;
error = VOP_GETATTR(cp->c_frontvp, &va, 0, cr);
if (error) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_ASYNCPOP)
printf("cachefs_async_populate: getattr\n");
#endif /* CFSDEBUG */
goto out;
}
cp->c_metadata.md_timestamp = va.va_mtime;
cp->c_metadata.md_flags |= MD_POPULATED;
cp->c_metadata.md_flags &= ~MD_INVALREADDIR;
cp->c_flags |= CN_UPDATED;
out:
if (! havelock)
mutex_enter(&cp->c_statelock);
/* see if an error happened behind our backs */
if ((error == 0) && (cp->c_flags & CN_NOCACHE)) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_ASYNCPOP)
printf("cachefs_async_populate: "
"nocache behind our backs\n");
#endif /* CFSDEBUG */
error = EINVAL;
}
cp->c_flags &= ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING |
CN_ASYNC_POPULATE | CN_ASYNC_POP_WORKING);
cp->c_popthrp = NULL;
if (error != 0)
cachefs_nocache(cp);
/* unblock any threads waiting for populate to finish */
cv_broadcast(&cp->c_popcv);
mutex_exit(&cp->c_statelock);
rw_exit(&cp->c_rwlock);
cachefs_cd_release(fscp);
if (backvp != NULL) {
VN_RELE(backvp);
}
if (frontvp != NULL) {
VN_RELE(frontvp);
}
}
/*
* only to be called from cachefs_async_populate
*/
static int
cachefs_async_populate_reg(struct cachefs_populate_req *pop, cred_t *cr,
vnode_t *backvp, vnode_t *frontvp)
{
struct cnode *cp = VTOC(pop->cpop_vp);
int error = 0;
u_offset_t popoff;
size_t popsize;
cachefs_cluster_allocmap(pop->cpop_off, &popoff,
&popsize, pop->cpop_size, cp);
if (popsize == 0) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_ASYNCPOP)
printf("cachefs_async_populate: popsize == 0\n");
#endif /* CFSDEBUG */
goto out;
}
error = cachefs_populate(cp, popoff, popsize, frontvp, backvp,
cp->c_size, cr);
if (error != 0) {
#ifdef CFSDEBUG
CFS_DEBUG(CFSDEBUG_ASYNCPOP)
printf("cachefs_async_populate: cachefs_populate\n");
#endif /* CFSDEBUG */
goto out;
}
out:
return (error);
}
void
cachefs_do_req(struct cachefs_req *rp)
{
struct cachefscache *cachep;
mutex_enter(&rp->cfs_req_lock);
switch (rp->cfs_cmd) {
case CFS_INVALID:
panic("cachefs_do_req: CFS_INVALID operation on queue");
/*NOTREACHED*/
case CFS_CACHE_SYNC:
cachep = rp->cfs_req_u.cu_fs_sync.cf_cachep;
cachefs_cache_sync(cachep);
break;
case CFS_IDLE:
cachefs_cnode_idle(rp->cfs_req_u.cu_idle.ci_vp, rp->cfs_cr);
break;
case CFS_PUTPAGE:
cachefs_async_putpage(&rp->cfs_req_u.cu_putpage, rp->cfs_cr);
VN_RELE(rp->cfs_req_u.cu_putpage.cp_vp);
break;
case CFS_POPULATE:
cachefs_async_populate(&rp->cfs_req_u.cu_populate, rp->cfs_cr);
VN_RELE(rp->cfs_req_u.cu_populate.cpop_vp);
break;
case CFS_NOOP:
break;
default:
panic("c_do_req: Invalid CFS async operation");
}
crfree(rp->cfs_cr);
rp->cfs_cmd = CFS_INVALID;
mutex_exit(&rp->cfs_req_lock);
kmem_cache_free(cachefs_req_cache, rp);
}
ssize_t cachefs_mem_usage = 0;
struct km_wrap {
size_t kw_size;
struct km_wrap *kw_other;
};
kmutex_t cachefs_kmem_lock;
void *
cachefs_kmem_alloc(size_t size, int flag)
{
#ifdef DEBUG
caddr_t mp = NULL;
struct km_wrap *kwp;
size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
ASSERT(n >= (size + 8));
mp = kmem_alloc(n, flag);
if (mp == NULL) {
return (NULL);
}
/*LINTED alignment okay*/
kwp = (struct km_wrap *)mp;
kwp->kw_size = n;
/*LINTED alignment okay*/
kwp->kw_other = (struct km_wrap *)(mp + n - sizeof (struct km_wrap));
kwp = (struct km_wrap *)kwp->kw_other;
kwp->kw_size = n;
/*LINTED alignment okay*/
kwp->kw_other = (struct km_wrap *)mp;
mutex_enter(&cachefs_kmem_lock);
ASSERT(cachefs_mem_usage >= 0);
cachefs_mem_usage += n;
mutex_exit(&cachefs_kmem_lock);
return (mp + sizeof (struct km_wrap));
#else /* DEBUG */
return (kmem_alloc(size, flag));
#endif /* DEBUG */
}
void *
cachefs_kmem_zalloc(size_t size, int flag)
{
#ifdef DEBUG
caddr_t mp = NULL;
struct km_wrap *kwp;
size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
ASSERT(n >= (size + 8));
mp = kmem_zalloc(n, flag);
if (mp == NULL) {
return (NULL);
}
/*LINTED alignment okay*/
kwp = (struct km_wrap *)mp;
kwp->kw_size = n;
/*LINTED alignment okay*/
kwp->kw_other = (struct km_wrap *)(mp + n - sizeof (struct km_wrap));
kwp = (struct km_wrap *)kwp->kw_other;
kwp->kw_size = n;
/*LINTED alignment okay*/
kwp->kw_other = (struct km_wrap *)mp;
mutex_enter(&cachefs_kmem_lock);
ASSERT(cachefs_mem_usage >= 0);
cachefs_mem_usage += n;
mutex_exit(&cachefs_kmem_lock);
return (mp + sizeof (struct km_wrap));
#else /* DEBUG */
return (kmem_zalloc(size, flag));
#endif /* DEBUG */
}
void
cachefs_kmem_free(void *mp, size_t size)
{
#ifdef DEBUG
struct km_wrap *front_kwp;
struct km_wrap *back_kwp;
size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
void *p;
ASSERT(n >= (size + 8));
front_kwp = (struct km_wrap *)((uintptr_t)mp - sizeof (struct km_wrap));
back_kwp = (struct km_wrap *)
((uintptr_t)front_kwp + n - sizeof (struct km_wrap));
ASSERT(front_kwp->kw_other == back_kwp);
ASSERT(front_kwp->kw_size == n);
ASSERT(back_kwp->kw_other == front_kwp);
ASSERT(back_kwp->kw_size == n);
mutex_enter(&cachefs_kmem_lock);
cachefs_mem_usage -= n;
ASSERT(cachefs_mem_usage >= 0);
mutex_exit(&cachefs_kmem_lock);
p = front_kwp;
front_kwp->kw_size = back_kwp->kw_size = 0;
front_kwp->kw_other = back_kwp->kw_other = NULL;
kmem_free(p, n);
#else /* DEBUG */
kmem_free(mp, size);
#endif /* DEBUG */
}
char *
cachefs_strdup(char *s)
{
char *rc;
ASSERT(s != NULL);
rc = cachefs_kmem_alloc(strlen(s) + 1, KM_SLEEP);
(void) strcpy(rc, s);
return (rc);
}
int
cachefs_stats_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
{
struct fscache *fscp = (struct fscache *)ksp->ks_data;
cachefscache_t *cachep = fscp->fs_cache;
int error = 0;
if (rw == KSTAT_WRITE) {
bcopy(buf, &fscp->fs_stats, sizeof (fscp->fs_stats));
cachep->c_gc_count = fscp->fs_stats.st_gc_count;
CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_time,
cachep->c_gc_time);
CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_before_atime,
cachep->c_gc_before);
CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_after_atime,
cachep->c_gc_after);
return (error);
}
fscp->fs_stats.st_gc_count = cachep->c_gc_count;
CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_time,
fscp->fs_stats.st_gc_time, error);
CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_before,
fscp->fs_stats.st_gc_before_atime, error);
CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_after,
fscp->fs_stats.st_gc_after_atime, error);
bcopy(&fscp->fs_stats, buf, sizeof (fscp->fs_stats));
return (error);
}
#ifdef DEBUG
cachefs_debug_info_t *
cachefs_debug_save(cachefs_debug_info_t *oldcdb, int chain,
char *message, uint_t flags, int number, void *pointer,
cachefscache_t *cachep, struct fscache *fscp, struct cnode *cp)
{
cachefs_debug_info_t *cdb;
if ((chain) || (oldcdb == NULL))
cdb = cachefs_kmem_zalloc(sizeof (*cdb), KM_SLEEP);
else
cdb = oldcdb;
if (chain)
cdb->cdb_next = oldcdb;
if (message != NULL) {
if (cdb->cdb_message != NULL)
cachefs_kmem_free(cdb->cdb_message,
strlen(cdb->cdb_message) + 1);
cdb->cdb_message = cachefs_kmem_alloc(strlen(message) + 1,
KM_SLEEP);
(void) strcpy(cdb->cdb_message, message);
}
cdb->cdb_flags = flags;
cdb->cdb_int = number;
cdb->cdb_pointer = pointer;
cdb->cdb_count++;
cdb->cdb_cnode = cp;
if (cp != NULL) {
cdb->cdb_frontvp = cp->c_frontvp;
cdb->cdb_backvp = cp->c_backvp;
}
if (fscp != NULL)
cdb->cdb_fscp = fscp;
else if (cp != NULL)
cdb->cdb_fscp = C_TO_FSCACHE(cp);
if (cachep != NULL)
cdb->cdb_cachep = cachep;
else if (cdb->cdb_fscp != NULL)
cdb->cdb_cachep = cdb->cdb_fscp->fs_cache;
cdb->cdb_thread = curthread;
cdb->cdb_timestamp = gethrtime();
cdb->cdb_depth = getpcstack(cdb->cdb_stack, CACHEFS_DEBUG_DEPTH);
return (cdb);
}
void
cachefs_debug_show(cachefs_debug_info_t *cdb)
{
hrtime_t now = gethrtime();
timestruc_t ts;
int i;
while (cdb != NULL) {
hrt2ts(now - cdb->cdb_timestamp, &ts);
printf("cdb: %p count: %d timelapse: %ld.%9ld\n",
(void *)cdb, cdb->cdb_count, ts.tv_sec, ts.tv_nsec);
if (cdb->cdb_message != NULL)
printf("message: %s", cdb->cdb_message);
printf("flags: %x int: %d pointer: %p\n",
cdb->cdb_flags, cdb->cdb_int, (void *)cdb->cdb_pointer);
printf("cnode: %p fscp: %p cachep: %p\n",
(void *)cdb->cdb_cnode,
(void *)cdb->cdb_fscp, (void *)cdb->cdb_cachep);
printf("frontvp: %p backvp: %p\n",
(void *)cdb->cdb_frontvp, (void *)cdb->cdb_backvp);
printf("thread: %p stack...\n", (void *)cdb->cdb_thread);
for (i = 0; i < cdb->cdb_depth; i++) {
ulong_t off;
char *sym;
sym = kobj_getsymname(cdb->cdb_stack[i], &off);
printf("%s+%lx\n", sym ? sym : "?", off);
}
delay(2*hz);
cdb = cdb->cdb_next;
}
debug_enter(NULL);
}
#endif /* DEBUG */
/*
* Changes the size of the front file.
* Returns 0 for success or error if cannot set file size.
* NOCACHE bit is ignored.
* c_size is ignored.
* statelock must be held, frontvp must be set.
* File must be populated if setting to a size other than zero.
*/
int
cachefs_frontfile_size(cnode_t *cp, u_offset_t length)
{
cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
vattr_t va;
size_t nblks, blkdelta;
int error = 0;
int alloc = 0;
struct cachefs_allocmap *allocp;
ASSERT(MUTEX_HELD(&cp->c_statelock));
ASSERT(cp->c_frontvp);
/* if growing the file, allocate space first, we charge for holes */
if (length) {
ASSERT(cp->c_metadata.md_flags & MD_POPULATED);
nblks = (length + MAXBSIZE - 1) / MAXBSIZE;
if (nblks > cp->c_metadata.md_frontblks) {
blkdelta = nblks - cp->c_metadata.md_frontblks;
error = cachefs_allocblocks(cachep, blkdelta,
cp->c_metadata.md_rltype);
if (error)
goto out;
alloc = 1;
}
}
/* change the size of the front file */
va.va_mask = AT_SIZE;
va.va_size = length;
error = VOP_SETATTR(cp->c_frontvp, &va, 0, kcred, NULL);
if (error)
goto out;
/* zero out the alloc map */
bzero(&cp->c_metadata.md_allocinfo,
cp->c_metadata.md_allocents * sizeof (struct cachefs_allocmap));
cp->c_metadata.md_allocents = 0;
if (length == 0) {
/* free up blocks */
if (cp->c_metadata.md_frontblks) {
cachefs_freeblocks(cachep, cp->c_metadata.md_frontblks,
cp->c_metadata.md_rltype);
cp->c_metadata.md_frontblks = 0;
}
} else {
/* update number of blocks if shrinking file */
nblks = (length + MAXBSIZE - 1) / MAXBSIZE;
if (nblks < cp->c_metadata.md_frontblks) {
blkdelta = cp->c_metadata.md_frontblks - nblks;
cachefs_freeblocks(cachep, blkdelta,
cp->c_metadata.md_rltype);
cp->c_metadata.md_frontblks = (uint_t)nblks;
}
/* fix up alloc map to reflect new size */
allocp = cp->c_metadata.md_allocinfo;
allocp->am_start_off = 0;
allocp->am_size = length;
cp->c_metadata.md_allocents = 1;
}
cp->c_flags |= CN_UPDATED | CN_NEED_FRONT_SYNC;
out:
if (error && alloc)
cachefs_freeblocks(cachep, blkdelta, cp->c_metadata.md_rltype);
return (error);
}
/*ARGSUSED*/
int
cachefs_req_create(void *voidp, void *cdrarg, int kmflags)
{
struct cachefs_req *rp = (struct cachefs_req *)voidp;
/*
* XXX don't do this! if you need this, you can't use this
* constructor.
*/
bzero(rp, sizeof (struct cachefs_req));
mutex_init(&rp->cfs_req_lock, NULL, MUTEX_DEFAULT, NULL);
return (0);
}
/*ARGSUSED*/
void
cachefs_req_destroy(void *voidp, void *cdrarg)
{
struct cachefs_req *rp = (struct cachefs_req *)voidp;
mutex_destroy(&rp->cfs_req_lock);
}