vm_swap.c revision 20a2d3f67cad8360c790dbe034df45a949636224
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Each physical swap area has an associated bitmap representing
* its physical storage. The bitmap records which swap slots are
* currently allocated or freed. Allocation is done by searching
* through the bitmap for the first free slot. Thus, there's
* no linear relation between offset within the swap device and the
* address (within its segment(s)) of the page that the slot backs;
* instead, it's an arbitrary one-to-one mapping.
*
* Associated with each swap area is a swapinfo structure. These
* structures are linked into a linear list that determines the
* ordering of swap areas in the logical swap device. Each contains a
* pointer to the corresponding bitmap, the area's size, and its
* associated vnode.
*/
#include <sys/inttypes.h>
#include <sys/sysmacros.h>
#include <sys/pathname.h>
/*
* To balance the load among multiple swap areas, we don't allow
* more than swap_maxcontig allocations to be satisfied from a
* single swap area before moving on to the next swap area. This
* effectively "interleaves" allocations among the many swap areas.
*/
int swap_maxcontig; /* set by anon_init() to 1 Mb */
/*
* XXX - this lock is a kludge. It serializes some aspects of swapadd() and
* swapdel() (namely VOP_OPEN, VOP_CLOSE, VN_RELE). It protects against
* somebody swapadd'ing and getting swap slots from a vnode, while someone
* else is in the process of closing or rele'ing it.
*/
/*
* protected by the swapinfo_lock
*/
static int nswapfiles;
/*
* swap device bitmap allocation macros
*/
#define MAPSHIFT 5
int swap_debug = 0; /* set for debug printf's */
int swap_verify = 0; /* set to verify slots when freeing and allocating */
/*
* Allocate a range of up to *lenp contiguous slots (page) from a physical
* swap device. Flags are one of:
* SA_NOT Must have a slot from a physical swap device other than the
* the one containing input (*vpp, *offp).
* Less slots than requested may be returned. *lenp allocated slots are
* returned starting at *offp on *vpp.
* Returns 1 for a successful allocation, 0 for couldn't allocate any slots.
*/
int
{
/* Find a desirable physical device and allocate from it. */
do {
break;
/* Caller wants other than specified swap device */
goto found;
/* Caller is loose, will take anything */
} else
goto found;
return (0);
if (soff == -1)
panic("swap_alloc: swap_getoff failed!");
break;
break;
if (noff == -1) {
break;
break;
}
}
}
return (1);
}
int swap_backsearch = 0;
/*
* Get a free offset on swap device sip.
* Return >=0 offset if succeeded, -1 for failure.
*/
static u_offset_t
{
sip->si_alloccnt++;
goto foundentry;
else
sip->si_checkcnt++;
}
"swap_getoff: couldn't find slot from hint %ld to end\n",
/*
* Go backwards? Check for faster method XXX
*/
if (swap_backsearch) {
goto foundentry;
else
sip->si_checkcnt++;
}
} else {
goto foundentry;
else
sip->si_checkcnt++;
}
}
if (*sp == 0xffffffff) {
return ((u_offset_t)-1);
}
/*
* aoff is the page number offset (in bytes) of the si_swapslots
* array element containing a free page
*
* boff is the page number offset of the free page
* (i.e. cleared bit) in si_swapslots[aoff].
*/
goto foundslot;
else
sip->si_checkcnt++;
}
goto foundslot;
else
sip->si_checkcnt++;
}
/*
* Return the offset of the free page in swap device.
* Convert page number of byte offset and add starting
* offset of swap device.
*/
slotnumber, 0, 0, 0, 0);
printf("ptob(aoff(%ld) + boff(%ld))(%ld) >= eoff(%ld)\n",
/*
* We could verify here that the slot isn't already allocated
* by looking through all the anon slots.
*/
}
/*
* Free a swap page.
*/
void
{
do {
"swap_phys_free: freeing slot %ld on "
"sip %p\n",
pagenumber, sip, 0, 0, 0);
"swap_phys_free: freeing free slot "
"%p,%lx\n", (void *)vp,
}
}
return;
}
panic("swap_phys_free");
/*NOTREACHED*/
}
/*
* Return the anon struct corresponding for the given
* <vnode, off> if it is part of the virtual swap device.
* Return the anon struct if found, otherwise NULL.
*/
struct anon *
{
return (ap);
}
return (NULL);
}
/*
* Determine if the vp offset range overlap a swap device.
*/
int
{
do {
continue;
return (1);
}
return (0);
}
/*
* See if name is one of our swap files
* even though lookupname failed.
* This can be used by swapdel to delete
* swap resources on remote machines
* where the link has gone down.
*/
static struct vnode *
char *name, /* pathname to delete */
{
/*
* Find the swap file entry for the file to
* be deleted. Skip any entries that are in
* transition.
*/
return (vp);
}
}
return (NULL);
}
/*
* New system call to manipulate swap files.
*/
int
{
int error = 0;
int cnt = 0;
int tmp_nswapfiles;
int nswap;
char *swapname;
char *pname;
char *tpname;
/*
* When running in a zone we want to hide the details of the swap
* devices: we report there only being one swap device named "swap"
* having a size equal to the sum of the sizes of all real swap devices
* on the system.
*/
switch (sc_cmd) {
case SC_GETNSWP:
if (global)
*rv = nswapfiles;
else
*rv = 1;
return (0);
case SC_AINFO:
/*
* Return anoninfo information with these changes:
* ani_max = maximum amount of swap space
* (including potentially available physical memory)
* ani_free = amount of unallocated anonymous memory
* (some of which might be reserved and including
* potentially available physical memory)
* ani_resv = amount of claimed (reserved) anonymous memory
*/
return (EFAULT);
return (0);
case SC_LIST:
return (EFAULT);
if (!global) {
char *swappath = "swap";
if (length < 1)
return (ENOMEM);
return (EFAULT);
st.ste_length +=
}
return (EFAULT);
}
*rv = 1;
return (0);
}
/* Return an error if not enough space for the whole table. */
if (length < tmp_nswapfiles)
return (ENOMEM);
/*
* Get memory to hold the swap entries and their names. We'll
* copy the real entries into these and then copy these out.
* Allocating the pathname memory is only a guess so we may
* find that we need more and have to do it again.
* All this is because we have to hold the anon lock while
* traversing the swapinfo list, and we can't be doing copyouts
*/
KM_SLEEP);
if (tmp_nswapfiles != nswapfiles) {
tmp_nswapfiles * sizeof (struct swapinfo));
gplen = 0;
goto beginning;
}
goto retry;
}
}
if (sip) {
goto lout;
}
goto lout;
}
st.ste_length =
goto lout;
}
if (!tsip->si_pnamelen)
continue;
tsip->si_pnamelen) != 0) {
goto lout;
}
}
lout:
return (error);
case SC_ADD:
case SC_REMOVE:
break;
default:
return (EINVAL);
}
return (error);
return (EFAULT);
/* Allocate the space to read in pathname */
return (ENOMEM);
if (error)
goto out;
if (error) {
goto out;
/* see if we match by name */
goto out;
}
goto out;
}
case VBLK:
break;
case VREG:
else
break;
case VDIR:
break;
default:
break;
}
if (error == 0) {
else
}
out:
return (error);
}
#if defined(_LP64) && defined(_SYSCALL32)
int
{
int error = 0;
int cnt = 0;
int tmp_nswapfiles;
int nswap;
char *swapname;
char *pname;
char *tpname;
struct anoninfo32 ai;
size_t s;
switch (sc_cmd) {
case SC_GETNSWP:
*rv = nswapfiles;
return (0);
case SC_AINFO:
/*
* Return anoninfo information with these changes:
* ani_max = maximum amount of swap space
* (including potentially available physical memory)
* ani_free = amount of unallocated anonymous memory
* (some of which might be reserved and including
* potentially available physical memory)
* ani_resv = amount of claimed (reserved) anonymous memory
*/
if (s > UINT32_MAX)
return (EOVERFLOW);
if (s > UINT32_MAX)
return (EOVERFLOW);
if (s > UINT32_MAX)
return (EOVERFLOW);
return (EFAULT);
return (0);
case SC_LIST:
return (EFAULT);
/* Return an error if not enough space for the whole table. */
if (length < tmp_nswapfiles)
return (ENOMEM);
/*
* Get memory to hold the swap entries and their names. We'll
* copy the real entries into these and then copy these out.
* Allocating the pathname memory is only a guess so we may
* find that we need more and have to do it again.
* All this is because we have to hold the anon lock while
* traversing the swapinfo list, and we can't be doing copyouts
*/
if (tmp_nswapfiles != nswapfiles) {
gplen = 0;
goto beginning;
}
goto retry;
}
}
goto lout;
}
goto lout;
}
st.ste_length =
goto lout;
}
if (!tsip->si_pnamelen)
continue;
tsip->si_pnamelen) != 0) {
goto lout;
}
}
lout:
return (error);
case SC_ADD:
case SC_REMOVE:
break;
default:
return (EINVAL);
}
return (error);
return (EFAULT);
/* Allocate the space to read in pathname */
return (ENOMEM);
if (error)
goto out;
if (error) {
goto out;
/* see if we match by name */
goto out;
}
goto out;
}
case VBLK:
break;
case VREG:
else
break;
case VDIR:
break;
default:
break;
}
if (error == 0) {
else
swapname);
}
out:
return (error);
}
#endif /* _LP64 && _SYSCALL32 */
/*
* Add a new swap file.
*/
int
{
int error;
/*
* Get the real vnode. (If vp is not a specnode it just returns vp, so
* it does the right thing, but having this code know about specnodes
* violates the spirit of having it be indepedent of vnode type.)
*/
/*
* Or in VISSWAP so file system has chance to deny swap-ons during open.
*/
/* restore state of v_flag */
if (!wasswap) {
}
return (error);
}
/*
* Get partition size. Return error if empty partition,
* or if request does not fit within the partition.
* If this is the first swap device, we can reduce
* the size of the swap area to match what is
* available. This can happen if the system was built
* on a machine with a different size swap partition.
*/
goto out;
/*
* Specfs returns a va_size of MAXOFFSET_T (UNKNOWN_SIZE) when the
* size of the device can't be determined.
*/
goto out;
}
#ifdef _ILP32
/*
* No support for large swap in 32-bit OS, if the size of the swap is
* bigger than MAXOFF32_T then the size used by swapfs must be limited.
* This limitation is imposed by the swap subsystem itself, a D_64BIT
* driver as the target of swap operation should be able to field
* the IO.
*/
"!swap device %s truncated from 0x%llx to 0x%x bytes",
}
#endif /* _ILP32 */
/* Fail if file not writeable (try to set size to current size) */
goto out;
/* Fail if fs does not support VOP_PAGEIO */
NULL);
goto out;
else
error = 0;
/*
* If swapping on the root filesystem don't put swap blocks that
* correspond to the miniroot filesystem on the swap free list.
*/
else /* Skip 1st page (disk label) */
goto out;
}
/*
* If user specified 0 blks, use the size of the device
*/
goto out;
}
/*
* The starting and ending offsets must be page aligned.
* Round soff up to next page boundary, round eoff
* down to previous page boundary.
*/
goto out;
}
/* Allocate and partially set up the new swapinfo */
/*
* Size of swapslots map in bytes
*/
/*
* Permanently set the bits that can't ever be allocated,
* i.e. those from the ending offset to the round up slot for the
* swapslots bit map.
*/
0, 0, 0, 0);
}
/*
* Now check to see if we can add it. We wait til now to check because
* we need the swapinfo_lock and we don't want sleep with it (e.g.,
* during kmem_alloc()) while we're setting up the swapinfo.
*/
/*
* We are adding a device that we are in the
* middle of deleting. Just clear the
* ST_DOINGDEL flag to signal this and
* the deletion routine will eventually notice
* it and add it back.
*/
goto out;
}
/* disallow overlapping swap files */
goto out;
}
}
}
nswapfiles++;
/*
* add new swap device to list and shift allocations to it
* before updating the anoninfo counters
*/
/*
* Update the total amount of reservable swap space
* accounting properly for swap space from physical memory
*/
/* New swap device soaks up currently reserved memory swap */
}
/*
* At boot time, to permit booting small memory machines using
* only physical memory as swap space, we allowed a dangerously
* large amount of memory to be used as swap space; now that
* more physical backing store is available bump down the amount
* we can get from memory to a safer size.
*/
if (swapfs_minfree < swapfs_desfree) {
}
/* Initialize the dump device */
out:
if (!wasswap) {
}
if (nsip) {
}
NULL);
}
return (error);
}
/*
* Delete a swap file.
*/
static int
{
int error = 0;
u_offset_t toff = 0;
pgcnt_t adjust_swap = 0;
/* Find the swap file entry for the file to be deleted */
break;
}
/* If the file was not found, error. */
goto out;
}
/*
* Do not delete if we will be low on swap pages.
*/
goto out;
}
/* If needed, reserve memory swap to replace old device */
availrmem -= adjust_swap;
}
/*
* Set the delete flag. This prevents anyone from allocating more
* pages from this file. Also set ST_DOINGDEL. Someone who wants to
* add the file back while we're deleting it will signify by clearing
* this flag.
*/
/*
* Free all the allocated physical slots for this file. We do this
* by walking through the entire anon hash array, because we need
* to update all the anon slots that have physical swap slots on
* this file, and this is the only way to find them all. We go back
* to the beginning of a bucket after each slot is freed because the
* anonhash_lock is not held during the free and thus the hash table
* may change under us.
*/
top:
goto top;
} else {
if (error) {
"swapslot_free failed %d",
error);
}
/*
* Add device back before making it
* visible.
*/
~(ST_INDEL | ST_DOINGDEL);
/*
* Update the anon space available
*/
availrmem += adjust_swap;
goto out;
}
}
}
}
/* All done, they'd better all be free! */
/* Now remove it from the swapinfo list */
break;
}
nswapfiles--;
dumpfini();
/* Release the vnode */
out:
return (error);
}
/*
* Free up a physical swap slot on swapinfo sip, currently in use by the
* anonymous page whose name is (vp, off).
*/
static int
{
int error = 0;
int alloc_pg = 0;
/*
* Get the page for the old swap slot if exists or create a new one.
*/
goto again;
alloc_pg = 1;
/*LINTED: constant in conditional context*/
return (0);
}
if (error) {
error = 0;
/*LINTED: constant in conditional context*/
return (error);
}
}
/*
* by anon layer (an_pvp == NULL) with the same vp, off.
* In this case the page which has been allocated needs to
* be freed.
*/
if (!alloc_pg)
/*LINTED: constant in conditional context*/
return (0);
}
/*
* Free the physical slot. It may have been freed up and replaced with
* another one while we were getting the page so we have to re-verify
* that this is really one we want. If we do free the slot we have
* to mark the page modified, as its backing store is now gone.
*/
hat_setmod(pp);
} else {
}
return (0);
}
/*
* Get contig physical backing store for vp, in the range
* [*offp, *offp + *lenp), May back a subrange of this, but must
* always include the requested offset or fail. Returns the offsets
* backed as [*offp, *offp + *lenp) and the physical offsets used to
* back them from *pvpp in the range [*pstartp, *pstartp + *lenp).
* Returns 0 for success
* SE_NOANON -- no anon slot for requested paged
* SE_NOSWAP -- no physical swap space available
*/
int
{
int error = 0;
/* Get new physical swap slots. */
/*
* No swap available so return error unless requested
* offset is already backed in which case return that.
*/
return (error);
}
return (error);
}
/*
* We got plen (<= *lenp) contig slots. Use these to back a
* subrange of [*offp, *offp + *lenp) which includes offset.
* For now we just put offset at the end of the kluster.
* Clearly there are other possible choices - which is best?
*/
/* Free old slot if any, and assign new one */
PAGESIZE);
} else { /* No anon slot for a klustered page, quit. */
/* Already did requested page, do partial kluster */
error = 0;
/* Fail on requested page, error */
/* Fail on prior page, fail on requested page, error */
/* Fail on prior page, got requested page, do only it */
} else {
/* Free old slot if any, and assign new one */
PAGESIZE);
/* One page kluster */
}
/* Free unassigned slots */
break;
}
}
return (error);
}
/*
* Get the physical swap backing store location for a given anonymous page
* named (vp, off). The backing store name is returned in (*pvpp, *poffp).
* Returns 0 success
* EIDRM -- no anon slot (page is not allocated)
*/
int
{
int error = 0;
/* Get anon slot for vp, off */
goto out;
}
out:
return (error);
}