vm_anon.c revision b942e89b21c9ea698fd94b07082bee381672203d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
/*
* VM - anonymous pages.
*
* This layer sits immediately above the vm_swap layer. It manages
* physical pages that have no permanent identity in the file system
* name space, using the services of the vm_swap layer to allocate
* backing storage for these pages. Since these pages have no external
* identity, they are discarded when the last reference is removed.
*
* An important function of this layer is to manage low-level sharing
* of pages that are logically distinct but that happen to be
* physically identical (e.g., the corresponding pages of the processes
* resulting from a fork before one process or the other changes their
* contents). This pseudo-sharing is present only as an optimization
* and is not to be confused with true sharing in which multiple
* address spaces deliberately contain references to the same object;
* such sharing is managed at a higher level.
*
* The key data structure here is the anon struct, which contains a
* reference count for its associated physical page and a hint about
* the identity of that page. Anon structs typically live in arrays,
* with an instance's position in its array determining where the
* corresponding backing storage is allocated; however, the swap_xlate()
* routine abstracts away this representation information so that the
* rest of the anon layer need not know it. (See the swap layer for
* more details on anon struct layout.)
*
* In the future versions of the system, the association between an
* anon struct and its position on backing store will change so that
* we don't require backing store all anonymous pages in the system.
* This is important for consideration for large memory systems.
* We can also use this technique to delay binding physical locations
* smarter allocation decisions to improve anonymous klustering.
*
* Many of the routines defined here take a (struct anon **) argument,
* which allows the code at this level to manage anon pages directly,
* so that callers can regard anon structs as opaque objects and not be
* concerned with assigning or inspecting their contents.
*
* Clients of this layer refer to anon pages indirectly. That is, they
* maintain arrays of pointers to anon structs rather than maintaining
* anon structs themselves. The (struct anon **) arguments mentioned
* above are pointers to entries in these arrays. It is these arrays
* that capture the mapping between offsets within a given segment and
* the corresponding anonymous backing storage address.
*/
#ifdef DEBUG
#define ANON_DEBUG
#endif
#include <sys/sysmacros.h>
#include <sys/tuneable.h>
#include <sys/tnf_probe.h>
#include <sys/condvar_impl.h>
#include <sys/mutex_impl.h>
int anon_debug;
struct k_anoninfo k_anoninfo;
/*
* Global hash table for (vp, off) -> anon slot
*/
extern int swap_maxcontig;
static struct kmem_cache *anon_cache;
static struct kmem_cache *anonmap_cache;
#ifdef VM_STATS
static struct anonvmstats_str {
} anonvmstats;
#endif /* VM_STATS */
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED1*/
static void
{
}
void
anon_init(void)
{
int i;
for (i = 0; i < AH_LOCK_SIZE; i++) {
}
for (i = 0; i < ANON_LOCKSIZE; i++) {
}
sizeof (struct anon_map), 0,
}
/*
* Global anon slot hash table manipulation.
*/
static void
{
int index;
}
static void
{
break;
}
}
}
/*
* The anon array interfaces. Functions allocating,
* entries in the array of pointers for a given offset.
*
* Create the list of pointers
*/
struct anon_hdr *
{
return (NULL);
}
/*
* Single level case.
*/
if (flags & ANON_ALLOC_FORCE)
return (NULL);
}
} else {
/*
* 2 Level case.
* anon hdr size needs to be rounded off to be a multiple
* of ANON_CHUNK_SIZE. This is important as various anon
* related functions depend on this.
* NOTE -
* anon_grow() makes anon hdr size a multiple of
* ANON_CHUNK_SIZE.
* amp size is <= anon hdr size.
* anon_index + seg_pgs <= anon hdr size.
*/
return (NULL);
}
}
return (ahp);
}
/*
* Free the array of pointers
*/
void
{
ulong_t i;
void **ppp;
/*
* Single level case.
*/
} else {
/*
* 2 level case.
*/
for (i = 0; i < nchunks; i++) {
}
}
}
/*
* Return the pointer from the list for a
* specified anon index.
*/
struct anon *
{
/*
* Single level case.
*/
return ((struct anon *)
} else {
/*
* 2 level case.
*/
if (app) {
return ((struct anon *)
ANON_PTRMASK));
} else {
return (NULL);
}
}
}
/*
* Return the anon pointer for the first valid entry in the anon list,
* starting from the given index.
*/
struct anon *
{
ulong_t i;
ulong_t j;
i = *index;
/*
* 1 level case
*/
while (i < size) {
if (ap) {
*index = i;
return (ap);
}
i++;
}
} else {
/*
* 2 level case
*/
chunkoff = i & ANON_CHUNK_OFF;
while (i < size) {
if (app)
for (j = chunkoff; j < ANON_CHUNK_SIZE; j++) {
if (ap) {
return (ap);
}
}
chunkoff = 0;
i = (i + ANON_CHUNK_SIZE) & ~ANON_CHUNK_OFF;
}
}
return (NULL);
}
/*
* Set list entry with a given pointer for a specified offset
*/
int
{
void **ppp;
/*
* Single level case.
*/
} else {
/*
* 2 level case.
*/
return (ENOMEM);
}
}
}
}
return (0);
}
/*
* Copy anon array into a given new anon array
*/
int
{
void *ap;
/*
* Both arrays are 1 level.
*/
return (0);
}
/*
* Both arrays are 2 levels.
*/
while (npages != 0) {
>> ANON_CHUNK_SHIFT];
return (ENOMEM);
}
chknp << ANON_PTRSHIFT);
}
}
return (0);
}
/*
* At least one of the arrays is 2 level.
*/
while (npages--) {
return (ENOMEM);
}
s_idx++;
d_idx++;
}
return (0);
}
/*
* ANON_INITBUF is a convenience macro for anon_grow() below. It
* takes a buffer dst, which is at least as large as buffer src. It
* does a bcopy from src into dst, and then bzeros the extra bytes
* of dst. If tail is set, the data in src is tail aligned within
* dst instead of head aligned.
*/
if (tail) { \
} else { \
}
/*
* anon_grow() is used to efficiently extend an existing anon array.
* startidx_p points to the index into the anon array of the first page
* that is in use. oldseg_pgs is the number of pages in use, starting at
* *startidx_p. newpages is the number of additional pages desired.
*
* If startidx_p == NULL, startidx is taken to be 0 and cannot be changed.
*
* The growth is done by creating a new top level of the anon array,
* and (if the array is 2-level) reusing the existing second level arrays.
*
* flags can be used to specify ANON_NOSLEEP and ANON_GROWDOWN.
*
* Returns the new number of pages in the anon array.
*/
{
void **level1;
void *level2;
/*
* Determine the total number of pages needed in the new
* anon array. If growing down, totpages is all pages from
* startidx through the end of the array, plus <newseg_pgs>
* pages. If growing up, keep all pages from page 0 through
* the last page currently in use, plus <newseg_pgs> pages.
*/
if (growdown)
else
/* If the array is already large enough, just return. */
if (oldamp_pgs >= totpages) {
if (growdown)
return (oldamp_pgs);
}
/*
* oldamp_pgs/newamp_pgs are the total numbers of pages represented
* by the corresponding arrays.
* which may be either level 1 or level 2.
* Will the new anon array be one level or two levels?
*/
oelems = oldamp_pgs;
nelems = newamp_pgs;
} else {
}
return (0);
/* Are we converting from a one level to a two level anon array? */
/*
* Yes, we're converting to a two level. Reuse old level 1
* as new level 2 if it is exactly PAGESIZE. Otherwise
* alloc a new level 2 and copy the old level 1 data into it.
*/
if (oldamp_pgs == ANON_CHUNK_SIZE) {
} else {
return (0);
}
oldarrsz = oldamp_pgs * sizeof (void *);
}
if (growdown)
else
} else {
}
if (growdown)
return (newamp_pgs);
}
/*
* Called from clock handler to sync ani_free value.
*/
void
set_anoninfo(void)
{
int ix;
}
}
/*
* Reserve anon space.
*
* It's no longer simply a matter of incrementing ani_resv to
* reserve swap space, we need to check memory-based as well
* as disk-backed (physical) swap. The following algorithm
* is used:
* Check the space on physical swap
* i.e. amount needed < ani_max - ani_phys_resv
* If we are swapping on swapfs check
* amount needed < (availrmem - swapfs_minfree)
* Since the algorithm to check for the quantity of swap space is
* almost the same as that for reserving it, we'll just use anon_resvmem
* with a flag to decrement availrmem.
*
* Return non-zero on success.
*/
int
{
pgcnt_t mswap_pages = 0;
pgcnt_t pswap_pages = 0;
/* test zone.max-swap resource control */
mutex_enter(&p->p_lock);
mutex_exit(&p->p_lock);
return (0);
}
mutex_exit(&p->p_lock);
}
/*
* pswap_pages is the number of pages we can take from
* physical (i.e. disk-backed) swap.
*/
("anon_resvmem: npages %lu takemem %u pswap %lu caller %p\n",
if (npages <= pswap_pages) {
/*
* we have enough space on a physical swap
*/
if (takemem)
return (1);
} else if (pswap_pages != 0) {
/*
* we have some space on a physical swap
*/
if (takemem) {
/*
* use up remainder of phys swap
*/
}
}
/*
* since (npages > pswap_pages) we need mem swap
* mswap_pages is the number of pages needed from availrmem
*/
mswap_pages));
/*
* priv processes can reserve memory as swap as long as availrmem
* remains greater than swapfs_minfree; in the case of non-priv
* processes, memory can be reserved as swap only if availrmem
* doesn't fall below (swapfs_minfree + swapfs_reserve). Thus,
* swapfs_reserve amount of memswap is not available to non-priv
* processes. This protects daemons such as automounter dying
* as a result of application processes eating away almost entire
* membased swap. This safeguard becomes useless if apps are run
* with root access.
*
* swapfs_reserve is minimum of 4Mb or 1/16 of physmem.
*
*/
if (tryhard) {
if (secpolicy_resource_anon_mem(CRED())) {
} else {
}
}
secpolicy_resource(CRED()) == 0)) {
if (takemem) {
/*
* Take the memory from the rest of the system.
*/
availrmem -= mswap_pages;
("anon_resvmem: took %ld pages of availrmem\n",
mswap_pages));
} else {
}
return (1);
} else {
/*
* Fail if not enough memory
*/
if (takemem) {
}
("anon_resvmem: not enough space from swapfs\n"));
return (0);
}
}
/*
* Give back an anon reservation.
*/
void
{
spgcnt_t mem_free_pages = 0;
#ifdef ANON_DEBUG
#endif
/*
* If some of this reservation belonged to swapfs
* give it back to availrmem.
* ani_mem_resv is the amount of availrmem swapfs has reserved.
* but some of that memory could be locked by segspt so we can only
* return non locked ani_mem_resv back to availrmem
*/
("anon_unresv: growing availrmem by %ld pages\n",
}
/*
* The remainder of the pages is returned to phys swap
*/
if (phys_free_slots) {
}
#ifdef ANON_DEBUG
#endif
}
/*
* Allocate an anon slot and return it with the lock held.
*/
struct anon *
{
swap_alloc(ap);
} else {
}
ANI_ADD(-1);
return (ap);
}
/*
* such pages don't consume any physical swap resources needed for swapping
* unlocked pages.
*/
void
{
return;
hat_setmod(pp);
} else {
}
}
/*
* Decrement the reference count of an anon page.
* If reference count goes to zero, free it and
* its associated page (if any).
*/
void
{
panic("anon_decref: slot count 0");
/*
* If there is a page for this anon slot we will need to
* call VN_DISPOSE to get rid of the vp association and
* put the page back on the free list as really free.
* Acquire the "exclusive" lock to ensure that any
* pending i/o always completes before the swap slot
* is freed.
*/
/*LINTED: constant in conditional context */
}
ANI_ADD(1);
} else {
}
}
/*
* check an_refcnt of the root anon slot (anon_index argument is aligned at
* seg->s_szc level) to determine whether COW processing is required.
* anonpages_hash_lock[] held on the root ap ensures that if root's
* refcnt is 1 all other refcnt's are 1 as well (and they can't increase
* later since this process can't fork while its AS lock is held).
*
* returns 1 if the root anon slot has a refcnt > 1 otherwise returns 0.
*/
int
{
return (0);
return (0);
}
return (1);
}
/*
* Check 'nslots' anon slots for refcnt > 1.
*
* returns 1 if any of the 'nslots' anon slots has a refcnt > 1 otherwise
* returns 0.
*/
static int
{
while (nslots-- > 0) {
return (1);
anon_index++;
}
return (0);
}
static void
{
pgcnt_t i;
#ifdef DEBUG
int refcnt = 1;
#endif
/*
* In case of shared mappings total anon map size may not be
* the largest page size aligned.
*/
}
}
}
i = 0;
while (i < pgcnt) {
i++;
continue;
}
PAGESIZE);
SE_EXCL);
}
/*LINTED*/
}
ANI_ADD(1);
i++;
} else {
pgcnt_t j;
int dispose = 0;
for (j = i + 1; j < i + curpgcnt; j++) {
SE_EXCL);
panic("anon_decref_pages: "
"no page");
(void) hat_pageunload(pp,
dispose = 1;
}
for (j = i; j < i + curpgcnt; j++) {
NULL, ANON_SLEEP);
ANI_ADD(1);
}
if (!dispose) {
page_destroy_pages(ppa[0]);
} else {
for (j = 0; j < curpgcnt; j++) {
}
for (j = 0; j < curpgcnt; j++) {
ppa[j]));
/*LINTED*/
kcred);
}
}
i += curpgcnt;
}
} else {
i++;
}
}
}
}
/*
* Duplicate references to size bytes worth of anon pages.
* Used when duplicating a segment that contains private anon pages.
* This code assumes that procedure calling this one has already used
* hat_chgprot() to disable write access to the range of addresses that
* that *old actually refers to.
*/
void
{
while (npages > 0) {
break;
if (npages <= 0)
break;
off++;
npages--;
}
}
/*
* Just like anon_dup but also guarantees there are no holes (unallocated anon
* slots) within any large page region. That means if a large page region is
* empty in the old array it will skip it. If there are 1 or more valid slots
* in the large page region of the old array it will make sure to fill in any
* unallocated ones and also copy them to the new array. If noalloc is 1 large
* page region should either have no valid anon slots or all slots should be
* valid.
*/
void
int noalloc)
{
#ifdef DEBUG
int refcnt;
#endif
while (npages > 0) {
/*
* Find the next valid slot.
*/
break;
/*
* Now backup index to the beginning of the
* current large page region of the old array.
*/
if (npages <= 0)
break;
/*
* Fill and copy a large page regions worth
* of anon slots.
*/
for (i = 0; i < pgcnt; i++) {
if (noalloc) {
panic("anon_dup_fill_holes: "
"empty anon slot\n");
}
} else if (i == 0) {
/*
* make the increment of all refcnts of all
* anon slots of a large page appear atomic by
* getting an anonpages_hash_lock for the
* first anon slot of a large page.
*/
/*LINTED*/
}
}
}
}
}
/*
* Used when a segment with a vnode changes szc. similarly to
* anon_dup_fill_holes() makes sure each large page region either has no anon
* slots or all of them. but new slots are created by COWing the file
* pages. on entrance no anon slots should be shared.
*/
int
{
int err = 0;
int pageflags = 0;
while (npages > 0) {
/*
* Find the next valid slot.
*/
break;
}
/*
* Now backup index to the beginning of the
* current large page region of the anon array.
*/
if (npages <= 0)
break;
}
NULL);
if (err) {
break;
}
LOCK_PAGE : 0;
}
break;
}
}
vpage++;
}
}
}
return (err);
}
/*
* Free a group of "size" anon pages, size in bytes,
* and clear out the pointers to the anon entries.
*/
void
{
while (npages > 0) {
break;
if (npages <= 0)
break;
/*
* Bump index and decrement page count
*/
index++;
npages--;
}
}
void
{
while (npages > 0) {
/*
* Find the next valid slot.
*/
break;
/*
* Now backup index to the beginning of the
* current large page region of the old array.
*/
if (npages <= 0)
break;
}
}
/*
* Make anonymous pages discardable
*/
void
{
pgcnt = 1;
/*
* get anon pointer and index for the first valid entry
* in the anon list, starting from "index"
*/
break;
/*
* decrement npages by number of NULL anon slots we skipped
*/
if (npages <= 0)
break;
/*
* Get anonymous page and try to lock it SE_EXCL;
* if we couldn't grab the lock we skip to next page.
*/
pgcnt = 1;
continue;
}
/*
* we cannot free a page which is permanently locked.
* The page_struct_lock need not be acquired to examine
* these fields since the page has an "exclusive" lock.
*/
continue;
}
/*
* skip this one if copy-on-write is not yet broken.
*/
continue;
}
pgcnt = 1;
/*
* free swap slot;
*/
PAGESIZE);
}
/*
* while we are at it, unload all the translations
* and attempt to free the page.
*/
/*LINTED: constant in conditional context */
continue;
}
if (!page_try_demote_pages(pp)) {
continue;
} else {
pgcnt = 1;
}
/*LINTED*/
continue;
}
}
/*
* try to lock remaining pages
*/
pp++;
break;
break;
}
}
for (i = 0; i < pgcnt; i++) {
break;
/*
* skip this one if copy-on-write
* is not yet broken.
*/
goto skiplp;
}
}
}
continue;
}
}
}
/*
* Return the kept page(s) and protections back to the segment driver.
*/
int
{
int err;
/*
* Lookup the page. If page is being paged in,
* wait for it to finish as we must return a list of
* pages since this routine acts like the VOP_GETPAGE
* routine does.
*/
else
return (0);
}
/*
* Simply treat it as a vnode fault on the anon vp.
*/
"anon_getpage:seg %x addr %x vp %x",
}
return (err);
}
/*
* Creates or returns kept pages to the segment driver. returns -1 if a large
* page cannot be allocated. returns -2 if some other process has allocated a
* larger page.
*
* For cowfault it will allocate any size pages to fill the requested area to
* avoid partially overwriting anon slots (i.e. sharing only some of the anon
* slots within a large page with other processes). This policy greatly
* simplifies large page freeing (which is only freed when all anon slot
* refcnts are 0).
*/
int
int brkcow,
int anypgsz,
int pgflags,
{
int prealloc = 1;
int err, slotcreate;
#endif
if (szc == 0) {
if (err)
return (err);
page_unlock(ppa[0]);
return (-2);
}
return (0);
}
panic("anon_map_getpages: cowfault for szc 0");
} else {
return (ENOMEM);
return (0);
}
}
/*
* First we check for the case that the requtested large
* page or larger page already exists in the system.
* Actually we only check if the first constituent page
* exists and only preallocate if it's not found.
*/
if (ap) {
return (-2);
}
prealloc = 0;
}
}
}
top:
/*
* If a smaller page or no page at all was found,
* grab a large page off the freelist.
*/
if (prealloc) {
/*
* If the refcnt's of all anon slots are <= 1
* they can't increase since we are holding
* the address space's lock. So segvn can
* safely decrease szc without risking to
* generate a cow fault for the region smaller
* than the segment's largest page size.
*/
return (-1);
}
/*
* This is a cow fault. Copy away the entire 1 large
* page region of this segment.
*/
panic("anon_map_getpages: cowfault for szc %d",
szc);
NULL) {
if (err) {
for (i = 0; i < pg_idx; i++) {
NULL)
}
return (err);
}
} else {
/*
* Since this is a cowfault we know
* that this address space has a
* parent or children which means
* anon_dup_fill_holes() has initialized
* all anon slots within a large page
* region that had at least one anon
* slot at the time of fork().
*/
panic("anon_map_getpages: "
"cowfault but anon slot is empty");
}
}
}
}
pg_idx = 0;
slotcreate = 0;
/*
* For us to have decided not to preallocate
* would have meant that a large page
* was found. Which also means that all of the
* anon slots for that page would have been
* already created for us.
*/
if (prealloc == 0)
panic("anon_map_getpages: prealloc = 0");
slotcreate = 1;
}
/*
* Now setup our preallocated page to pass down
* to swap_getpage().
*/
if (prealloc) {
}
/*
* If we just created this anon slot then call
* with S_CREATE to prevent doing IO on the page.
* Similar to the anon_zero case.
*/
if (err) {
ASSERT(slotcreate == 0);
goto io_err;
}
ASSERT(slotcreate == 0);
return (-2);
}
prealloc = 1;
goto top;
}
/*
* If we decided to preallocate but VOP_GETPAGE
* found a page in the system that satisfies our
* request then free up our preallocated large page
* and continue looping accross the existing large
* page via VOP_GETPAGE.
*/
ASSERT(slotcreate == 0);
prealloc = 0;
page_free_pages(ppa[0]);
}
/*
* we have relocated out of a smaller large page.
* skip npgs - 1 iterations and continue which will
* increment by one the loop indices.
*/
ASSERT(slotcreate == 0);
if ((*protp & PROT_WRITE) &&
*protp &= ~PROT_WRITE;
}
continue;
}
/*
* Anon_zero case.
*/
if (slotcreate) {
}
if (pg_idx > 0 &&
panic("anon_map_getpages: unexpected page");
panic("anon_map_getpages: unaligned page");
}
if (prealloc == 0) {
}
*protp &= ~PROT_WRITE;
}
/*
* If this is a new anon slot then initialize
* the anon array entry.
*/
if (slotcreate) {
}
pg_idx++;
an_idx++;
}
/*
* Since preallocated pages come off the freelist
* they are locked SE_EXCL. Simply downgrade and return.
*/
if (prealloc) {
}
}
return (0);
}
/*
* We got an IO error somewhere in our large page.
* If we were using a preallocated page then just demote
* all the constituent pages that we've succeeded with sofar
* to PAGESIZE pages and leave them in the system
* unlocked.
*/
if (prealloc) {
if (pg_idx > 0) {
for (i = 0; i < pgcnt; i++) {
}
for (i = 0; i < pg_idx; i++) {
page_unlock(ppa[i]);
}
/*
* Now free up the remaining unused constituent
* pages.
*/
pg_idx++;
}
} else {
page_free_pages(ppa[0]);
}
} else {
for (i = 0; i < pg_idx; i++)
page_unlock(ppa[i]);
}
if (err != -1)
return (err);
/*
* we are here because we failed to relocate.
*/
return (-1);
}
goto docow;
}
/*
* Turn a reference to an object or shared anon page
* into a private page with a copy of the data from the
* original page which is always locked by the caller.
* This routine unloads the translation and unlocks the
* original page, if it isn't being stolen, before returning
* to the caller.
*
* NOTE: The original anon slot is not freed by this routine
* It must be freed by the caller while holding the
* "anon_map" lock to prevent races which can occur if
* a process has multiple lwps in its address space.
*/
page_t *
int oppflags,
{
int err;
if (oppflags & STEAL_PAGE)
else
/* Kernel probe */
if (oppflags & STEAL_PAGE) {
"anon_private:seg %p addr %x pp %p vp %p off %lx",
hat_setmod(pp);
/* bug 4026339 */
return (pp);
}
/*
* Call the VOP_GETPAGE routine to create the page, thereby
* enabling the vnode driver to allocate any filesystem
* space (e.g., disk block allocation for UFS). This also
* prevents more than one page from being added to the
* vnode at the same time.
*/
if (err)
goto out;
/*
* If the original page was locked, we need to move the lock
*
* See Statement at the beginning of segvn_lockop() and
* comments in page_pp_useclaim() regarding the way
*
* Also availrmem must be decremented up front for read only mapping
* before calling page_pp_useclaim. page_pp_useclaim will bump it back
* if availrmem did not need to be decremented after all.
*/
if ((prot & PROT_WRITE) == 0) {
if (availrmem > pages_pp_maximum) {
availrmem--;
} else {
goto out;
}
}
}
/*
* Now copy the contents from the original page,
* which is locked and loaded in the MMU by
* the caller to prevent yet another page fault.
*/
/* XXX - should set mod bit in here */
/*
* Before ppcopy could hanlde UE or other faults, we
* would have panicked here, and still have no option
* but to do so now.
*/
panic("anon_private, ppcopy failed, opp = 0x%p, pp = 0x%p",
}
/*
* Unload the old translation.
*/
/*
* Free unmapped, unmodified original page.
* or release the lock on the original page,
* otherwise the process will sleep forever in
* anon_decref() waiting for the "exclusive" lock
* on the page.
*/
/*
* we are done with page creation so downgrade the new
* page's selock to shared, this helps when multiple
* as_fault(...SOFTLOCK...) are done to the same
* page(aio)
*/
/*
* NOTE: The original anon slot must be freed by the
* caller while holding the "anon_map" lock, if we
* copied away from an anonymous page.
*/
return (pp);
out:
if (pp)
}
int
int anypgsz,
int pgflags,
{
int err;
int prealloc = 1;
int pagelock = 0;
#ifdef DEBUG
int refcnt;
#endif
/*
* Now try and allocate the large page. If we fail then just
* let VOP_GETPAGE give us PAGESIZE pages. Normally we let
* the caller make this decision but to avoid added complexity
* it's simplier to handle that case here.
*/
if (anypgsz == -1) {
prealloc = 0;
prealloc = 0;
}
/*
* make the decrement of all refcnts of all
* anon slots of a large page appear atomic by
* getting an anonpages_hash_lock for the
* first anon slot of a large page.
*/
if (prealloc) {
}
return (0);
}
}
return (-1);
}
}
/*
* If we are passed in the vpage array and this is
* not PROT_WRITE then we need to decrement availrmem
* up front before we try anything. If we need to and
* can't decrement availrmem then its better to fail now
* than in the middle of processing the new large page.
* page_pp_usclaim() on behalf of each constituent page
* below will adjust availrmem back for the cases not needed.
*/
pagelock = 1;
break;
}
}
if (pagelock) {
pages_useclaim += pgcnt;
} else {
}
if (prealloc) {
}
return (ENOMEM);
}
}
}
pg_idx = 0;
/*
* Now setup our preallocated page to pass down to
* swap_getpage().
*/
if (prealloc) {
}
/*
* Impossible to fail this is S_CREATE.
*/
if (err)
panic("anon_map_privatepages: VOP_GETPAGE failed");
/*
* If the original page was locked, we need to move
* the lock to the new page by transfering
* of the new page. pg_idx can be used to index
* into the vpage array since the caller will guarentee
* that vpage struct passed in corresponds to addr
* and forward.
*/
} else if (pagelock) {
availrmem++;
}
/*
* Now copy the contents from the original page.
*/
/*
* Before ppcopy could hanlde UE or other faults, we
* would have panicked here, and still have no option
* but to do so now.
*/
panic("anon_map_privatepages, ppcopy failed");
}
/*
* Release the lock on the original page,
* derement the old slot, and down grade the lock
* on the new copy.
*/
if (!prealloc)
/*
* Now reflect the copy in the new anon array.
*/
}
/*
* Unload the old large page translation.
*/
}
if (prealloc) {
}
}
return (0);
}
/*
* Allocate a private zero-filled anon page.
*/
page_t *
{
int err;
/* Kernel probe */
/*
* Call the VOP_GETPAGE routine to create the page, thereby
* enabling the vnode driver to allocate any filesystem
* dependent structures (e.g., disk block allocation for UFS).
* This also prevents more than on page from being added to
* the vnode at the same time since it is locked.
*/
if (err) {
return (NULL);
}
return (pp);
}
/*
* Allocate array of private zero-filled anon pages for empty slots
* and kept pages for non empty slots within given range.
*
* NOTE: This rontine will try and use large pages
* if available and supported by underlying platform.
*/
int
{
int err = 0;
/*
* XXX For now only handle S_CREATE.
*/
index = start_index;
p_index = 0;
/*
* If this platform supports multiple page sizes
* then try and allocate directly from the free
* list for pages larger than PAGESIZE.
*
* NOTE:When we have page_create_ru we can stop
* directly allocating from the freelist.
*/
while (npgs) {
/*
* if anon slot already exists
* (means page has been created)
* so 1) look up the page
* 2) if the page is still in memory, get it.
* 3) if not, create a page and
* page in from physical swap device.
* These are done in anon_getpage().
*/
if (ap) {
if (err) {
panic("anon_map_createpages: anon_getpage");
}
/*
* an_pvp can become non-NULL after SysV's page was
* paged out before ISM was attached to this SysV
* shared memory segment. So free swap slot if needed.
*/
hat_setmod(pp);
} else {
}
}
index++;
npgs--;
continue;
}
/*
* Now try and allocate the largest page possible
* for the current address and range.
* Keep dropping down in page size until:
*
* 1) Properly aligned
* 2) Does not overlap existing anon pages
* 3) Fits in remaining range.
* 4) able to allocate one.
*
* NOTE: XXX When page_create_ru is completed this code
* will change.
*/
pg_cnt = 0;
while (szc) {
/*
* XXX
* Since we are faking page_create()
* we also need to do the freemem and
* pcf accounting.
*/
/*
* Get lgroup to allocate next page of shared
* memory from and use it to specify where to
* allocate the physical memory
*/
}
/*
* If a request for a page of size
* larger than PAGESIZE failed
* then don't try that size anymore.
*/
} else {
break;
}
}
szc--;
}
/*
* If just using PAGESIZE pages then don't
* directly allocate from the free list.
*/
panic("anon_map_createpages: anon_zero");
}
index++;
npgs--;
continue;
}
/*
* pplist is a list of pg_cnt PAGESIZE pages.
* These pages are locked SE_EXCL since they
* came directly off the free list.
*/
while (pg_cnt--) {
PP_CLRFREE(pp);
PP_CLRAGED(pp);
if (err) {
panic("anon_map_createpages: S_CREATE");
}
index++;
npgs--;
}
while (pg_cnt--) {
}
}
return (0);
}
static int
int private)
{
pgcnt_t i;
int root = 0;
}
}
}
if (ppasize != 0) {
}
return (0);
}
}
}
panic("anon_try_demote_pages: an_refcnt != 1");
}
SE_EXCL);
(void) hat_pageunload(pp,
}
} else {
}
}
for (i = 0; i < pgcnt; i++) {
if (!root) {
if (curnpgs != 0)
panic("anon_try_demote_pages: "
"bad large page");
root = 1;
} else {
ASSERT(i > 0);
npgs - 1)
root = 0;
}
curnpgs--;
}
}
panic("anon_try_demote_pages: bad large page");
for (i = 0; i < pgcnt; i++) {
}
}
if (ppasize != 0) {
}
return (1);
}
/*
* anon_map_demotepages() can only be called by MAP_PRIVATE segments.
*/
int
{
int err;
int retry = 0;
top:
return (0);
}
panic("anon_map_demotepages: no anon slot");
if (err) {
for (i = 0; i < pg_idx; i++) {
}
return (err);
}
}
if (err > 0) {
return (err);
}
if (err == -1) {
retry = 1;
goto top;
}
for (i = 0; i < pgcnt; i++) {
retry = 1;
page_unlock(ppa[i]);
}
if (retry) {
goto top;
}
return (0);
}
/*
* Free pages of shared anon map. It's assumed that anon maps don't share anon
* structures with private anon maps. Therefore all anon structures should
* have at most one reference at this point. This means underlying pages can
* be exclusively locked and demoted or freed. If not freeing the entire
* large pages demote the ends of the region we free to be able to free
* subpages. Page roots correspond to aligned index positions in anon map.
*/
void
{
if (len == 0) { /* XXX */
return;
}
if (sidx_aligned != sidx ||
panic("anon_shmap_free_pages: demote failed");
}
return;
}
}
if (sidx < eidx_aligned) {
sidx = eidx_aligned;
}
if (eidx == eidx_aligned) {
return;
}
panic("anon_shmap_free_pages: demote failed");
}
} else {
}
}
/*
* This routine should be called with amp's writer lock when there're no other
* users of amp. All pcache entries of this amp must have been already
* inactivated. We must not drop a_rwlock here to prevent new users from
* attaching to this amp.
*/
void
{
if (amp->a_softlockcnt != 0) {
}
/*
* Since all pcache entries were already inactive before this routine
* was called seg_ppurge() couldn't return while there're still
* entries that can be found via the list anchored at a_phead. So we
* can assert this list is empty now. a_softlockcnt may be still non 0
* if asynchronous thread that manages pcache already removed pcache
* entries but hasn't unlocked the pages yet. If a_softlockcnt is non
* 0 we just wait on a_purgecv for shamp_reclaim() to finish. Even if
* a_softlockcnt is 0 we grab a_purgemtx to avoid freeing anon map
* before shamp_reclaim() is done with it. a_purgemtx also taken by
* shamp_reclaim() while a_softlockcnt was still not 0 acts as a
* barrier that prevents anonmap_purge() to complete while
* shamp_reclaim() may still be referencing this amp.
*/
while (amp->a_softlockcnt != 0) {
}
}
/*
* Allocate and initialize an anon_map structure for seg
* associating the given swap reservation with the new anon_map.
*/
struct anon_map *
{
return (NULL);
}
return (NULL);
}
amp->a_softlockcnt = 0;
amp->a_purgewait = 0;
return (amp);
}
void
{
}
/*
* Returns true if the app array has some empty slots.
* these values represent the starting offset and length of the
* mapping. When true is returned, these values may be modified
* to be the largest range which includes empty slots.
*/
int
{
low = -1;
if (low == -1)
low = i;
high = i;
}
}
if (low != -1) {
/*
* Found at least one non-anon page.
* Set up the off and len return values.
*/
if (low != 0)
return (1);
}
return (0);
}
/*
* Return a count of the number of existing anon pages in the anon array
* app in the range (off, off+len). The array and slots must be guaranteed
* stable by the caller.
*/
{
while (nslots-- > 0) {
cnt++;
anon_index++;
}
return (cnt);
}
/*
* Move reserved phys swap into memory swap (unreserve phys swap
* and reserve mem swap by the same amount).
* Used by segspt when it needs to lock reserved swap npages in memory
*/
int
{
if (npages > unlocked_mem_swap) {
/*
* if there is not enough unlocked mem swap we take missing
* amount from phys swap and give it to mem swap
*/
return (ENOMEM);
}
}
return (0);
}
/*
* 'unlocked' reserved mem swap so when it is unreserved it
* can be moved back phys (disk) swap
*/
void
{
}
/*
* Return the pointer from the list for a
* specified anon index.
*/
ulong_t *
{
void **ppp;
/*
* Single level case.
*/
} else {
/*
* 2 level case.
*/
}
}
}
void
{
kcondvar_t *cv;
int hash;
/*
* Use szc to determine anon slot(s) to appear atomic.
* If szc = 0, then lock the anon slot and mark it busy.
* If szc > 0, then lock the range of slots by getting the
* anon_array_lock for the first anon slot, and mark only the
* first anon slot busy to represent whole range being busy.
*/
while (ANON_ISBUSY(ap_slot))
}
int
{
int hash;
/*
* Try to lock a range of anon slots.
* Use szc to determine anon slot(s) to appear atomic.
* If szc = 0, then lock the anon slot and mark it busy.
* If szc > 0, then lock the range of slots by getting the
* anon_array_lock for the first anon slot, and mark only the
* first anon slot busy to represent whole range being busy.
* Fail if the mutex or the anon_array are busy.
*/
if (!mutex_tryenter(mtx)) {
return (EWOULDBLOCK);
}
if (ANON_ISBUSY(ap_slot)) {
return (EWOULDBLOCK);
}
return (0);
}
void
{
}