swap_subr.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/vnode.h>
#include <sys/swap.h>
#include <sys/sysmacros.h>
#include <sys/buf.h>
#include <sys/callb.h>
#include <sys/debug.h>
#include <vm/seg.h>
#include <sys/fs/swapnode.h>
#include <fs/fs_subr.h>
#include <sys/cmn_err.h>
#include <sys/mem_config.h>
#include <sys/atomic.h>
extern const fs_operation_def_t swap_vnodeops_template[];
/*
* swapfs_minfree is the amount of physical memory (actually remaining
* availrmem) that we want to keep free for the rest of the system. This
* means that swapfs can only grow to availrmem - swapfs_minfree. This
* can be set as just constant value or a certain percentage of installed
* physical memory. It is set in swapinit().
*
* Users who want to change the amount of memory that can be used as swap
* space should do so by setting swapfs_desfree at boot time,
* not swapfs_minfree.
*/
pgcnt_t swapfs_desfree = 0;
pgcnt_t swapfs_minfree = 0;
pgcnt_t swapfs_reserve = 0;
#ifdef SWAPFS_DEBUG
int swapfs_debug;
#endif /* SWAPFS_DEBUG */
static int swapfs_vpcount;
static kmutex_t swapfs_lock;
static struct async_reqs *sw_ar, *sw_pendlist, *sw_freelist;
static struct vnode **swap_vnodes; /* ptr's to swap vnodes */
static void swap_init_mem_config(void);
static pgcnt_t initial_swapfs_desfree;
static pgcnt_t initial_swapfs_minfree;
static pgcnt_t initial_swapfs_reserve;
static int swap_sync(struct vfs *vfsp, short flag, struct cred *cr);
static void
swapfs_recalc_save_initial(void)
{
initial_swapfs_desfree = swapfs_desfree;
initial_swapfs_minfree = swapfs_minfree;
initial_swapfs_reserve = swapfs_reserve;
}
static int
swapfs_recalc(pgcnt_t pgs)
{
pgcnt_t new_swapfs_desfree;
pgcnt_t new_swapfs_minfree;
pgcnt_t new_swapfs_reserve;
new_swapfs_desfree = initial_swapfs_desfree;
new_swapfs_minfree = initial_swapfs_minfree;
new_swapfs_reserve = initial_swapfs_reserve;
if (new_swapfs_desfree == 0)
new_swapfs_desfree = btopr(7 * 512 * 1024); /* 3-1/2Mb */;
if (new_swapfs_minfree == 0) {
/*
* We set this lower than we'd like here, 2Mb, because we
* always boot on swapfs. It's up to a safer value,
* swapfs_desfree, when/if we add physical swap devices
* in swapadd(). Users who want to change the amount of
* memory that can be used as swap space should do so by
* setting swapfs_desfree at boot time, not swapfs_minfree.
* However, swapfs_minfree is tunable by install as a
* workaround for bugid 1147463.
*/
new_swapfs_minfree = MAX(btopr(2 * 1024 * 1024), pgs >> 3);
}
/*
* priv processes can reserve memory as swap as long as availrmem
* remains greater than swapfs_minfree; in the case of non-priv
* processes, memory can be reserved as swap only if availrmem
* doesn't fall below (swapfs_minfree + swapfs_reserve). Thus,
* swapfs_reserve amount of memswap is not available to non-priv
* processes. This protects daemons such as automounter dying
* as a result of application processes eating away almost entire
* membased swap. This safeguard becomes useless if apps are run
* with root access.
*
* set swapfs_reserve to a minimum of 4Mb or 1/128 of physmem whichever
* is greater up to the limit of 128 MB.
*/
if (new_swapfs_reserve == 0)
new_swapfs_reserve = MIN(btopr(128 * 1024 * 1024),
MAX(btopr(4 * 1024 * 1024), pgs >> 7));
/* Test basic numeric viability. */
if (new_swapfs_minfree > pgs)
return (0);
/* Equivalent test to anon_resvmem() check. */
if (availrmem < new_swapfs_minfree) {
/*
* If ism pages are being used, then there must be agreement
* between these two policies.
*/
if ((availrmem > segspt_minfree) && (segspt_minfree > 0)) {
new_swapfs_minfree = segspt_minfree;
} else {
return (0);
}
}
swapfs_desfree = new_swapfs_desfree;
swapfs_minfree = new_swapfs_minfree;
swapfs_reserve = new_swapfs_reserve;
return (1);
}
/*ARGSUSED1*/
int
swapinit(int fstype, char *name)
{ /* reserve for mp */
ssize_t sw_freelist_size = klustsize / PAGESIZE * 2;
int i, error;
static const fs_operation_def_t swap_vfsops[] = {
VFSNAME_SYNC, (fs_generic_func_p) swap_sync,
NULL, NULL
};
SWAPFS_PRINT(SWAP_SUBR, "swapinit\n", 0, 0, 0, 0, 0);
mutex_init(&swapfs_lock, NULL, MUTEX_DEFAULT, NULL);
swap_vnodes = kmem_zalloc(MAX_SWAP_VNODES * sizeof (struct vnode *),
KM_SLEEP);
swapfs_recalc_save_initial();
if (!swapfs_recalc(physmem))
cmn_err(CE_PANIC, "swapfs_minfree(%lu) > physmem(%lu)",
swapfs_minfree, physmem);
/*
* Arrange for a callback on memory size change.
*/
swap_init_mem_config();
sw_ar = (struct async_reqs *)
kmem_zalloc(sw_freelist_size*sizeof (struct async_reqs), KM_SLEEP);
error = vfs_setfsops(fstype, swap_vfsops, NULL);
if (error != 0) {
cmn_err(CE_WARN, "swapinit: bad vfs ops template");
return (error);
}
error = vn_make_ops(name, swap_vnodeops_template, &swap_vnodeops);
if (error != 0) {
(void) vfs_freevfsops_by_type(fstype);
cmn_err(CE_WARN, "swapinit: bad vnode ops template");
return (error);
}
sw_freelist = sw_ar;
for (i = 0; i < sw_freelist_size - 1; i++)
sw_ar[i].a_next = &sw_ar[i + 1];
return (0);
}
/*
* Get a swapfs vnode corresponding to the specified identifier.
*/
struct vnode *
swapfs_getvp(ulong_t vidx)
{
struct vnode *vp;
vp = swap_vnodes[vidx];
if (vp) {
return (vp);
}
mutex_enter(&swapfs_lock);
vp = swap_vnodes[vidx];
if (vp == NULL) {
vp = vn_alloc(KM_SLEEP);
vn_setops(vp, swap_vnodeops);
vp->v_type = VREG;
vp->v_flag |= (VISSWAP|VISSWAPFS);
swap_vnodes[vidx] = vp;
swapfs_vpcount++;
}
mutex_exit(&swapfs_lock);
return (vp);
}
int swap_lo;
/*ARGSUSED*/
static int
swap_sync(struct vfs *vfsp, short flag, struct cred *cr)
{
struct vnode *vp;
int i;
if (!(flag & SYNC_ALL))
return (1);
/*
* assumes that we are the only one left to access this so that
* no need to use swapfs_lock (since it's staticly defined)
*/
for (i = 0; i < MAX_SWAP_VNODES; i++) {
vp = swap_vnodes[i];
if (vp) {
VN_HOLD(vp);
(void) VOP_PUTPAGE(vp, (offset_t)0, 0,
(B_ASYNC | B_FREE), kcred);
VN_RELE(vp);
}
}
return (0);
}
extern int sw_pending_size;
/*
* Take an async request off the pending queue
*/
struct async_reqs *
sw_getreq()
{
struct async_reqs *arg;
mutex_enter(&swapfs_lock);
arg = sw_pendlist;
if (arg) {
sw_pendlist = arg->a_next;
arg->a_next = NULL;
sw_pending_size -= PAGESIZE;
}
ASSERT(sw_pending_size >= 0);
mutex_exit(&swapfs_lock);
return (arg);
}
/*
* Put an async request on the pending queue
*/
void
sw_putreq(struct async_reqs *arg)
{
/* Hold onto it */
VN_HOLD(arg->a_vp);
mutex_enter(&swapfs_lock);
arg->a_next = sw_pendlist;
sw_pendlist = arg;
sw_pending_size += PAGESIZE;
mutex_exit(&swapfs_lock);
}
/*
* Put an async request back on the pending queue
*/
void
sw_putbackreq(struct async_reqs *arg)
{
mutex_enter(&swapfs_lock);
arg->a_next = sw_pendlist;
sw_pendlist = arg;
sw_pending_size += PAGESIZE;
mutex_exit(&swapfs_lock);
}
/*
* Take an async request structure off the free list
*/
struct async_reqs *
sw_getfree()
{
struct async_reqs *arg;
mutex_enter(&swapfs_lock);
arg = sw_freelist;
if (arg) {
sw_freelist = arg->a_next;
arg->a_next = NULL;
}
mutex_exit(&swapfs_lock);
return (arg);
}
/*
* Put an async request structure on the free list
*/
void
sw_putfree(struct async_reqs *arg)
{
/* Release our hold - should have locked the page by now */
VN_RELE(arg->a_vp);
mutex_enter(&swapfs_lock);
arg->a_next = sw_freelist;
sw_freelist = arg;
mutex_exit(&swapfs_lock);
}
static pgcnt_t swapfs_pending_delete;
/*ARGSUSED*/
static void
swap_mem_config_post_add(
void *arg,
pgcnt_t delta_swaps)
{
(void) swapfs_recalc(physmem - swapfs_pending_delete);
}
/*ARGSUSED*/
static int
swap_mem_config_pre_del(
void *arg,
pgcnt_t delta_swaps)
{
pgcnt_t nv;
nv = atomic_add_long_nv(&swapfs_pending_delete, (spgcnt_t)delta_swaps);
if (!swapfs_recalc(physmem - nv)) {
/*
* Tidy-up is done by the call to post_del which
* is always made.
*/
return (EBUSY);
}
return (0);
}
/*ARGSUSED*/
static void
swap_mem_config_post_del(
void *arg,
pgcnt_t delta_swaps,
int cancelled)
{
pgcnt_t nv;
nv = atomic_add_long_nv(&swapfs_pending_delete, -(spgcnt_t)delta_swaps);
(void) swapfs_recalc(physmem - nv);
}
static kphysm_setup_vector_t swap_mem_config_vec = {
KPHYSM_SETUP_VECTOR_VERSION,
swap_mem_config_post_add,
swap_mem_config_pre_del,
swap_mem_config_post_del,
};
static void
swap_init_mem_config(void)
{
int ret;
ret = kphysm_setup_func_register(&swap_mem_config_vec, (void *)NULL);
ASSERT(ret == 0);
}