vm_pagelist.c revision ce8eb11a8717b4a57c68fd77ab9f8aac15b16bf2
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Portions of this source code were derived from Berkeley 4.3 BSD
* under license from the Regents of the University of California.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* This file contains common functions to access and manage the page lists.
* Many of these routines originated from platform dependent modules
* a platform independent manner.
*
*/
#include <sys/sysmacros.h>
#include <vm/seg_kmem.h>
#include <sys/mem_config.h>
#include <sys/mem_cage.h>
extern uint_t vac_colors;
#define MAX_PRAGMA_ALIGN 128
/* vm_cpu_data0 for the boot cpu before kmem is initialized */
#else
#endif
char vm_cpu_data0[VM_CPU_DATA_PADSIZE];
/*
* number of page colors equivalent to reqested color in page_get routines.
* If set, keeps large pages intact longer and keeps MPO allocation
* from the local mnode in favor of acquiring the 'correct' page color from
* a demoted large page or from a remote mnode.
*/
/*
* color equivalency mask for each page size.
* Mask is computed based on cpu L2$ way sizes and colorequiv global.
* High 4 bits determine the number of high order bits of the color to ignore.
* Low 4 bits determines number of low order bits of color to ignore (it's only
* relevant for hashed index based page coloring).
*/
/*
* if set, specifies the percentage of large pages that are free from within
* a large page region before attempting to lock those pages for
* page_get_contig_pages processing.
*
* Should be turned on when kpr is available when page_trylock_contig_pages
* can be more selective.
*/
int ptcpthreshold;
/*
* Limit page get contig page search based on failure cnts in pgcpfailcnt[].
* Enabled by default via pgcplimitsearch.
*
* pgcpfailcnt[] is bounded by PGCPFAILMAX (>= 1/2 of installed
* memory). When reached, pgcpfailcnt[] is reset to 1/2 of this upper
* bound. This upper bound range guarantees:
* - all large page 'slots' will be searched over time
* - the minimum (1) large page candidates considered on each pgcp call
* - count doesn't wrap around to 0
*/
int pgcplimitsearch = 1;
#define SETPGCPFAILCNT(szc) \
#ifdef VM_STATS
struct vmm_vmstats_str vmm_vmstats;
#endif /* VM_STATS */
#if defined(__sparc)
#define LPGCREATE 0
#else
/* enable page_get_contig_pages */
#define LPGCREATE 1
#endif
int pg_contig_disable;
int pg_lpgcreate_nocage = LPGCREATE;
/*
* page_freelist_split pfn flag to signify no hi pfn requirement.
*/
#define PFNNULL 0
/* Flags involved in promotion and demotion routines */
/*
* Flag for page_demote to be used with PC_FREE to denote that we don't care
* what the color is as the color parameter to the function is ignored.
*/
#define PC_NO_COLOR (-1)
/* mtype value for page_promote to use when mtype does not matter */
#define PC_MTYPE_ANY (-1)
/*
* page counters candidates info
* See page_ctrs_cands comment below for more details.
* fields are as follows:
* pcc_pages_free: # pages which freelist coalesce can create
* pcc_color_free: pointer to page free counts per color
*/
typedef struct pcc_info {
} pcc_info_t;
/*
* On big machines it can take a long time to check page_counters
* arrays. page_ctrs_cands is a summary array whose elements are a dynamically
* updated sum of all elements of the corresponding page_counters arrays.
* page_freelist_coalesce() searches page_counters only if an appropriate
* element of page_ctrs_cands array is greater than 0.
*
* page_ctrs_cands is indexed by mutex (i), region (r), mnode (m), mrange (g)
*/
/*
* Return in val the total number of free pages which can be created
* for the given mnode (m), mrange (g), and region size (r)
*/
#define PGCTRS_CANDS_GETVALUE(m, g, r, val) { \
int i; \
val = 0; \
for (i = 0; i < NPC_MUTEX; i++) { \
} \
}
/*
* Return in val the total number of free pages which can be created
* for the given mnode (m), mrange (g), region size (r), and color (c)
*/
#define PGCTRS_CANDS_GETVALUECOLOR(m, g, r, c, val) { \
int i; \
val = 0; \
ASSERT((c) < PAGE_GET_PAGECOLORS(r)); \
for (i = 0; i < NPC_MUTEX; i++) { \
val += \
page_ctrs_cands[i][(r)][(m)][(g)].pcc_color_free[(c)]; \
} \
}
/*
* We can only allow a single thread to update a counter within the physical
* range of the largest supported page size. That is the finest granularity
* possible since the counter values are dependent on each other
* as you move accross region sizes. PP_CTR_LOCK_INDX is used to determine the
* ctr_mutex lock index for a particular physical range.
*/
#define PP_CTR_LOCK_INDX(pp) \
#define INVALID_COLOR 0xffffffff
#define INVALID_MASK 0xffffffff
/*
* Local functions prototypes.
*/
void page_ctr_add(int, int, page_t *, int);
void page_ctr_add_internal(int, int, page_t *, int);
void page_ctr_sub(int, int, page_t *, int);
void page_ctr_sub_internal(int, int, page_t *, int);
void page_freelist_lock(int);
void page_freelist_unlock(int);
/*
* The page_counters array below is used to keep track of free contiguous
* physical memory. A hw_page_map_t will be allocated per mnode per szc.
* This contains an array of counters, the size of the array, a shift value
* used to convert a pagenum into a counter array index or vice versa, as
* well as a cache of the last successful index to be promoted to a larger
* page size. As an optimization, we keep track of the last successful index
* to be promoted per page color for the given size region, and this is
* allocated dynamically based upon the number of colors for a given
* region size.
*
* Conceptually, the page counters are represented as:
*
* page_counters[region_size][mnode]
*
* region_size: size code of a candidate larger page made up
* of contiguous free smaller pages.
*
* page_counters[region_size][mnode].hpm_counters[index]:
* represents how many (region_size - 1) pages either
* exist or can be created within the given index range.
*
* Let's look at a sparc example:
* If we want to create a free 512k page, we look at region_size 2
* for the mnode we want. We calculate the index and look at a specific
* hpm_counters location. If we see 8 (FULL_REGION_CNT on sparc) at
* this location, it means that 8 64k pages either exist or can be created
* from 8K pages in order to make a single free 512k page at the given
* index. Note that when a region is full, it will contribute to the
* counts in the region above it. Thus we will not know what page
* size the free pages will be which can be promoted to this new free
* page unless we look at all regions below the current region.
*/
/*
* Note: hpmctr_t is defined in platform vm_dep.h
* hw_page_map_t contains all the information needed for the page_counters
* logic. The fields are as follows:
*
* hpm_counters: dynamically allocated array to hold counter data
* hpm_entries: entries in hpm_counters
* hpm_base: PFN mapped to counter index 0
* hpm_color_current: last index in counter array for this color at
* which we successfully created a large page
*/
typedef struct hw_page_map {
int hpm_shift;
/*
* Element zero is not used, but is allocated for convenience.
*/
/*
* Cached value of MNODE_RANGE_CNT(mnode).
* This is a function call in x86.
*/
static int mnode_nranges[MAX_MEM_NODES];
static int mnode_maxmrange[MAX_MEM_NODES];
/*
* The following macros are convenient ways to get access to the individual
* elements of the page_counters arrays. They can be used on both
* the left side and right side of equations.
*/
/*
* Protects the hpm_counters and hpm_color_current memory from changing while
* looking at page counters information.
* Grab the write lock to modify what these fields point at.
* Grab the read lock to prevent any pointers from changing.
* The write lock can not be held during memory allocation due to a possible
* recursion deadlock with trying to grab the read lock while the
* write lock is already held.
*/
/*
* initialize cpu_vm_data to point at cache aligned vm_cpu_data_t.
*/
void
{
} else {
void *kmptr;
int align;
}
}
/*
* free cpu_vm_data
*/
void
{
}
}
/*
* page size to page size code
*/
int
{
int i = 0;
while (hw_page_array[i].hp_size) {
return (i);
i++;
}
return (-1);
}
/*
* page size to page size code with the restriction that it be a supported
* user page size. If it's not a supported user page size, -1 will be returned.
*/
int
{
return (szc);
}
return (-1);
}
/*
* Return how many page sizes are available for the user to use. This is
* what the hardware supports and not based upon how the OS implements the
* support of different page sizes.
*/
page_num_user_pagesizes(void)
{
return (mmu_exported_page_sizes);
}
page_num_pagesizes(void)
{
return (mmu_page_sizes);
}
/*
* returns the count of the number of base pagesize pages associated with szc
*/
{
if (szc >= mmu_page_sizes)
}
{
if (szc >= mmu_page_sizes)
}
/*
* Return the size of a page based upon the index passed in. An index of
* zero refers to the smallest page size in the system, and as index increases
* it refers to the next larger supported page size in the system.
* Note that szc and userszc may not be the same due to unsupported szc's on
* some systems.
*/
{
if (szc >= mmu_page_sizes)
}
{
if (szc >= mmu_page_sizes)
return (PAGE_GET_SHIFT(szc));
}
{
if (szc >= mmu_page_sizes)
return (PAGE_GET_PAGECOLORS(szc));
}
/*
* this assigns the desired equivalent color after a split
*/
{
}
/*
* The interleaved_mnodes flag is set when mnodes overlap in
* the physbase..physmax range, but have disjoint slices.
* In this case hpm_counters is shared by all mnodes.
* This flag is set dynamically by the platform.
*/
int interleaved_mnodes = 0;
/*
* Called by startup().
* Size up the per page size free list counters based on physmax
* of each node and max_mem_nodes.
*
* If interleaved_mnodes is set we need to find the first mnode that
* exists. hpm_counters for the first mnode will then be shared by
* all other mnodes. If interleaved_mnodes is not set, just set
* first=mnode each time. That means there will be no sharing.
*/
page_ctrs_sz(void)
{
int r; /* region size */
int mnode;
int firstmn; /* first mnode that exists */
int nranges;
int i;
/*
* We need to determine how many page colors there are for each
* page size in order to allocate memory for any color specific
* arrays.
*/
for (i = 0; i < mmu_page_sizes; i++) {
colors_per_szc[i] = PAGE_GET_PAGECOLORS(i);
}
continue;
/*
* determine size needed for page counter arrays with
* base aligned to large page size.
*/
for (r = 1; r < mmu_page_sizes; r++) {
/* add in space for hpm_color_current */
colors_per_szc[r] * nranges;
continue;
/* add in space for hpm_counters */
r_align = page_get_pagecnt(r);
/*
* Round up to always allocate on pointer sized
* boundaries.
*/
sizeof (hpmctr_t *));
}
}
for (r = 1; r < mmu_page_sizes; r++) {
}
/* add in space for page_ctrs_cands and pcc_color_free */
continue;
for (r = 1; r < mmu_page_sizes; r++) {
colors_per_szc[r] * NPC_MUTEX;
}
}
/* ctr_mutex */
/* size for page list counts */
/*
* add some slop for roundups. page_ctrs_alloc will roundup the start
* address of the counters to ecache_alignsize boundary for every
* memory node.
*/
}
{
int mnode;
int r; /* region size */
int i;
int firstmn; /* first mnode that exists */
/*
* We need to determine how many page colors there are for each
* page size in order to allocate memory for any color specific
* arrays.
*/
for (i = 0; i < mmu_page_sizes; i++) {
colors_per_szc[i] = PAGE_GET_PAGECOLORS(i);
}
for (r = 1; r < mmu_page_sizes; r++) {
}
/* page_ctrs_cands and pcc_color_free array */
for (i = 0; i < NPC_MUTEX; i++) {
for (r = 1; r < mmu_page_sizes; r++) {
pcc_info_t *pi;
continue;
pi->pcc_color_free =
(pgcnt_t *)alloc_base;
alloc_base += sizeof (pgcnt_t) *
colors_per_szc[r];
pi++;
}
}
}
}
/* ctr_mutex */
for (i = 0; i < NPC_MUTEX; i++) {
}
/* initialize page list counts */
int r_shift;
continue;
for (r = 1; r < mmu_page_sizes; r++) {
/*
* the page_counters base has to be aligned to the
* page count of page size code r otherwise the counts
* will cross large page boundaries.
*/
r_align = page_get_pagecnt(r);
/* base needs to be aligned - lower to aligned value */
r_shift = PAGE_BSZS_SHIFT(r);
alloc_base += sizeof (size_t) *
colors_per_szc[r];
}
for (i = 0; i < colors_per_szc[r]; i++) {
int mrange;
PAGE_NEXT_PFN_FOR_COLOR(pfnum, r, i,
}
}
/* hpm_counters may be shared by all mnodes */
PAGE_COUNTERS_COUNTERS(mnode, r) =
(hpmctr_t *)alloc_base;
alloc_base +=
sizeof (hpmctr_t *));
} else {
PAGE_COUNTERS_COUNTERS(mnode, r) =
}
/*
* Verify that PNUM_TO_IDX and IDX_TO_PNUM
* satisfy the identity requirement.
* We should be able to go from one to the other
* and get consistent values.
*/
(IDX_TO_PNUM(mnode, r, 0))) == 0);
}
/*
* Roundup the start address of the page_counters to
* cache aligned boundary for every memory node.
* page_ctrs_sz() has added some slop for these roundups.
*/
}
/* Initialize other page counter specific data structures. */
}
return (alloc_base);
}
/*
* Functions to adjust region counters for each size free list.
* Caller is responsible to acquire the ctr_mutex lock if necessary and
* thus can be called during startup without locks.
*/
/* ARGSUSED */
void
{
ssize_t r; /* region size */
int lckidx;
/* no counter update needed for largest page size */
return;
}
/*
* Increment the count of free pages for the current
* region. Continue looping up in region size incrementing
* count if the preceeding region is full.
*/
while (r < mmu_page_sizes) {
break;
} else {
cand->pcc_pages_free++;
}
r++;
}
}
void
{
}
void
{
int lckidx;
ssize_t r; /* region size */
/* no counter update needed for largest page size */
return;
}
/*
* Decrement the count of free pages for the current
* region. Continue looping up in region size decrementing
* count if the preceeding region was full.
*/
while (r < mmu_page_sizes) {
break;
} else {
cand->pcc_pages_free--;
}
r++;
}
}
void
{
}
/*
* Adjust page counters following a memory attach, since typically the
* size of the array needs to change, and the PFN to counter index
* mapping needs to change.
*
* It is possible this mnode did not exist at startup. In that case
* allocate pcc_info_t and pcc_color_free arrays. Also, allow for nranges
* to change (a theoretical possibility on x86), which means pcc_color_free
* arrays must be extended.
*/
page_ctrs_adjust(int mnode)
{
int r; /* region size */
int i;
int cands_cache_nranges;
int old_maxmrange, new_maxmrange;
int rc = 0;
if (cands_cache == NULL)
return (ENOMEM);
i = -1;
/* prepare to free non-null pointers on the way out */
/*
* We need to determine how many page colors there are for each
* page size in order to allocate memory for any color specific
* arrays.
*/
for (r = 0; r < mmu_page_sizes; r++) {
colors_per_szc[r] = PAGE_GET_PAGECOLORS(r);
}
/*
* Preallocate all of the new hpm_counters arrays as we can't
* hold the page_ctrs_rwlock as a writer and allocate memory.
* If we can't allocate all of the arrays, undo our work so far
* and return failure.
*/
for (r = 1; r < mmu_page_sizes; r++) {
size_cache[r] = pcsz;
sizeof (hpmctr_t), KM_NOSLEEP);
goto cleanup;
}
}
/*
* Preallocate all of the new color current arrays as we can't
* hold the page_ctrs_rwlock as a writer and allocate memory.
* If we can't allocate all of the arrays, undo our work so far
* and return failure.
*/
for (r = 1; r < mmu_page_sizes; r++) {
colors_per_szc[r], KM_NOSLEEP);
goto cleanup;
}
}
}
/*
* Preallocate all of the new pcc_info_t arrays as we can't
* hold the page_ctrs_rwlock as a writer and allocate memory.
* If we can't allocate all of the arrays, undo our work so far
* and return failure.
*/
for (r = 1; r < mmu_page_sizes; r++) {
for (i = 0; i < NPC_MUTEX; i++) {
goto cleanup;
}
sizeof (pgcnt_t), KM_NOSLEEP);
goto cleanup;
}
}
}
}
/*
* Grab the write lock to prevent others from walking these arrays
* while we are modifying them.
*/
for (r = 1; r < mmu_page_sizes; r++) {
r, mrange);
}
/*
* Map the intersection of the old and new
* counters into the new array.
*/
PAGE_COUNTERS_SHIFT(mnode, r);
sizeof (hpmctr_t));
} else {
PAGE_COUNTERS_SHIFT(mnode, r);
sizeof (hpmctr_t));
}
}
/* update shared hpm_counters in other mnodes */
if (interleaved_mnodes) {
for (i = 0; i < max_mem_nodes; i++) {
if (i == mnode)
continue;
if (mem_node_config[i].exists == 0)
continue;
PAGE_COUNTERS_COUNTERS(i, r) = new_ctr;
PAGE_COUNTERS_ENTRIES(i, r) = pcsz;
PAGE_COUNTERS_BASE(i, r) = newbase;
}
}
color_cache[r][mrange];
}
/*
* for now, just reset on these events as it's probably
* not worthwhile to try and optimize this.
*/
for (i = 0; i < colors_per_szc[r]; i++) {
(mnode + 1);
int m;
if (mem_node_config[m].exists == 0)
continue;
color_mask, &it);
}
}
}
/* cache info for freeing out of the critical path */
size_cache[r] = old_csz;
}
}
}
/*
* Verify that PNUM_TO_IDX and IDX_TO_PNUM
* satisfy the identity requirement.
* We should be able to go from one to the other
* and get consistent values.
*/
(IDX_TO_PNUM(mnode, r, 0))) == 0);
/* pcc_info_t and pcc_color_free */
for (i = 0; i < NPC_MUTEX; i++) {
/* preserve old pcc_color_free values, if any */
continue;
/*
* possible change in range index when
* preserving pcc_info
*/
if (new_maxmrange > old_maxmrange) {
} else if (new_maxmrange < old_maxmrange) {
}
}
}
}
/*
* Now that we have dropped the write lock, it is safe to free all
* of the memory we have cached above.
* We come thru here to free memory when pre-alloc fails, and also to
* free old pointers which were recorded while locked.
*/
for (r = 1; r < mmu_page_sizes; r++) {
size_cache[r] * sizeof (hpmctr_t));
}
colors_per_szc[r] * sizeof (size_t));
}
}
for (i = 0; i < NPC_MUTEX; i++) {
continue;
continue;
colors_per_szc[r] *
sizeof (pgcnt_t));
}
}
}
}
}
return (rc);
}
#ifdef DEBUG
/*
* confirm pp is a large page corresponding to szc
*/
void
{
if (npgs == 1) {
return;
}
/*
* Check list of pages.
*/
while (npgs--) {
if (npgs != 0) {
}
}
}
#endif /* DEBUG */
void
page_freelist_lock(int mnode)
{
int i;
for (i = 0; i < NPC_MUTEX; i++) {
}
}
void
{
int i;
for (i = 0; i < NPC_MUTEX; i++) {
}
}
/*
* add pp to the specified page list. Defaults to head of the page list
* unless PG_LIST_TAIL is specified.
*/
void
{
int mnode;
/*
* Large pages should be freed via page_list_add_pages().
*/
/*
* Don't need to lock the freelist first here
* because the page isn't on the freelist yet.
* This means p_szc can't change on us.
*/
if (flags & PG_LIST_ISINIT) {
/*
* PG_LIST_ISINIT is set during system startup (ie. single
* threaded), add a page to the free list and add to the
* the free region counters w/o any locking
*/
/* inline version of page_add() */
} else
} else {
if (flags & PG_FREE_LIST) {
} else {
}
if (flags & PG_LIST_TAIL)
/*
* Add counters before releasing pcm mutex to avoid a race with
* page_freelist_coalesce and page_freelist_split.
*/
}
#if defined(__sparc)
if (PP_ISNORELOC(pp)) {
}
#endif
/*
* It is up to the caller to unlock the page!
*/
}
#ifdef __sparc
/*
* This routine is only used by kcage_init during system startup.
* It performs the function of page_list_sub/PP_SETNORELOC/page_list_add
* without the overhead of taking locks and updating counters.
*/
void
{
int mnode;
int mtype;
int flags = 0;
/*
* If this is a large page on the freelist then
* break it up into smaller pages.
*/
/*
* Get list page is currently on.
*/
flags |= PG_FREE_LIST;
} else {
flags |= PG_CACHE_LIST;
}
/*
* Delete page from current list.
*/
} else {
}
/*
* Decrement page counters
*/
/*
* Set no reloc for cage initted pages.
*/
/*
* Get new list for page.
*/
} else {
}
/*
* Insert page on new list.
*/
} else {
}
/*
* Increment page counters
*/
/*
* Update cage freemem counter
*/
}
#else /* __sparc */
/* ARGSUSED */
void
{
panic("page_list_noreloc_startup: should be here only for sparc");
}
#endif
void
{
int mnode;
if (flags & PG_LIST_ISINIT) {
} else {
#if defined(__sparc)
if (PP_ISNORELOC(pp))
#endif
}
}
/*
* During boot, need to demote a large page to base
* pagesize pages for seg_kmem for use in boot_alloc()
*/
void
{
PC_FREE);
}
/*
* Take a particular page off of whatever freelist the page
* is claimed to be on.
*
* NOTE: Only used for PAGESIZE pages.
*/
void
{
int bin;
int mnode;
/*
* The p_szc field can only be changed by page_promote()
* and page_demote(). Only free pages can be promoted and
* demoted and the free list MUST be locked during these
* operations. So to prevent a race in page_list_sub()
* between computing which bin of the freelist lock to
* grab and actually grabing the lock we check again that
* the bin we locked is still the correct one. Notice that
* the p_szc field could have actually changed on us but
* if the bin happens to still be the same we are safe.
*/
goto try_again;
}
if (flags & PG_FREE_LIST) {
} else {
}
/*
* Common PAGESIZE case.
*
* Note that we locked the freelist. This prevents
* the p_szc will not change until we drop pcm mutex.
*/
/*
* Subtract counters before releasing pcm mutex
* to avoid race with page_freelist_coalesce.
*/
#if defined(__sparc)
if (PP_ISNORELOC(pp)) {
}
#endif
return;
}
/*
* Large pages on the cache list are not supported.
*/
if (flags & PG_CACHE_LIST)
panic("page_list_sub: large page on cachelist");
/*
* Slow but rare.
*
* Somebody wants this particular page which is part
* of a large page. In this case we just demote the page
* if it's on the freelist.
*
* We have to drop pcm before locking the entire freelist.
* Once we have re-locked the freelist check to make sure
* the page hasn't already been demoted or completely
* freed.
*/
/*
* Large page is on freelist.
*/
}
/*
* Subtract counters before releasing pcm mutex
* to avoid race with page_freelist_coalesce.
*/
#if defined(__sparc)
if (PP_ISNORELOC(pp)) {
}
#endif
}
void
{
int mnode;
/*
* See comment in page_list_sub().
*/
goto try_again;
}
/*
* If we're called with a page larger than szc or it got
* promoted above szc before we locked the freelist then
* drop pcm and re-lock entire freelist. If page still larger
* than szc then demote it.
*/
(void) page_demote(mnode,
}
}
} else {
}
} else {
}
#if defined(__sparc)
if (PP_ISNORELOC(pp)) {
}
#endif
}
/*
* Add the page to the front of a linked list of pages
* using the p_next & p_prev pointers for the list.
* The caller is responsible for protecting the list pointers.
*/
void
{
} else {
}
}
/*
* Remove this page from a linked list of pages
* using the p_next & p_prev pointers for the list.
*
* The caller is responsible for protecting the list pointers.
*/
void
{
panic("mach_page_sub");
else {
}
}
/*
* Routine fsflush uses to gradually coalesce the free list into larger pages.
*/
void
{
int mnode;
int idx;
}
static uint_t page_promote_err;
static uint_t page_promote_noreloc_err;
/*
* Create a single larger page (of szc new_szc) from smaller contiguous pages
* for the given mnode starting at pfnum. Pages involved are on the freelist
* before the call and may be returned to the caller if requested, otherwise
* they will be placed back on the freelist.
* If flags is PC_ALLOC, then the large page will be returned to the user in
* a state which is consistent with a page being taken off the freelist. If
* we failed to lock the new large page, then we will return NULL to the
* caller and put the large page on the freelist instead.
* If flags is PC_FREE, then the large page will be placed on the freelist,
* and NULL will be returned.
* The caller is responsible for locking the freelist as well as any other
* accounting which needs to be done for a returned page.
*
* RFE: For performance pass in pp instead of pfnum so
* we can avoid excessive calls to page_numtopp_nolock().
* This would depend on an assumption that all contiguous
* our pp.
*
* Lock ordering:
*
* There is a potential but rare deadlock situation
* for page promotion and demotion operations. The problem
* is there are two paths into the freelist manager and
* they have different lock orders:
*
* page_create()
* lock freelist
* page_lock(EXCL)
* unlock freelist
* return
* caller drops page_lock
*
* page_free() and page_reclaim()
* caller grabs page_lock(EXCL)
*
* lock freelist
* unlock freelist
* drop page_lock
*
* What prevents a thread in page_create() from deadlocking
* with a thread freeing or reclaiming the same page is the
* page_trylock() in page_get_freelist(). If the trylock fails
* it skips the page.
*
* The lock ordering for promotion and demotion is the same as
* for page_create(). Since the same deadlock could occur during
* page promotion and freeing or reclaiming of a page on the
* cache list we might have to fail the operation and undo what
* have done so far. Again this is rare.
*/
page_t *
{
int which_list;
/*
* General algorithm:
* Find the starting page
* Walk each page struct removing it from the freelist,
* and linking it to all the other pages removed.
* Once all pages are off the freelist,
* walk the list, modifying p_szc to new_szc and what
* ever other info needs to be done to create a large free page.
* According to the flags, either return the page or put it
* on the freelist.
*/
/* don't return page of the wrong mtype */
return (NULL);
/*
* Loop through smaller pages to confirm that all pages
* give the same result for PP_ISNORELOC().
* We can check this reliably here as the protocol for setting
* P_NORELOC requires pages to be taken off the free list first.
*/
return (NULL);
}
}
/* Loop around coalescing the smaller pages into a big page. */
while (pages_left) {
/*
* Remove from the freelist.
*/
/*
* PG_FREE_LIST
*/
} else {
}
} else {
/*
* PG_CACHE_LIST
*
* Since this page comes from the
* cachelist, we must destroy the
* vnode association.
*/
goto fail_promote;
}
/*
* We need to be careful not to deadlock
* with another thread in page_lookup().
* The page_lookup() thread could be holding
* the same phm that we need if the two
* pages happen to hash to the same phm lock.
* At this point we have locked the entire
* freelist and page_lookup() could be trying
* to grab a freelist lock.
*/
if (!mutex_tryenter(phm)) {
goto fail_promote;
}
PP_SETAGED(pp);
}
/*
* Concatenate the smaller page(s) onto
* the large page list.
*/
pages_left -= npgs;
while (npgs--) {
}
}
/*
* return the page to the user if requested
* in the properly locked state.
*/
return (pplist);
}
/*
* Otherwise place the new large page on the freelist
*/
return (NULL);
/*
* A thread must have still been freeing or
* reclaiming the page on the cachelist.
* To prevent a deadlock undo what we have
* done sofar and return failure. This
* situation can only happen while promoting
* PAGESIZE pages.
*/
while (pplist) {
}
return (NULL);
}
/*
* Break up a large page into smaller size pages.
* Pages involved are on the freelist before the call and may
* be returned to the caller if requested, otherwise they will
* be placed back on the freelist.
* The caller is responsible for locking the freelist as well as any other
* accounting which needs to be done for a returned page.
* If flags is not PC_ALLOC, the color argument is ignored, and thus
* technically, any value may be passed in but PC_NO_COLOR is the standard
* which should be followed for clarity's sake.
*/
page_t *
{
/*
* Number of PAGESIZE pages for smaller new_szc
* page.
*/
while (pplist) {
/*
* We either break it up into PAGESIZE pages or larger.
*/
} else {
}
} else {
/*
* Break down into smaller lists of pages.
*/
n = npgs;
while (n--) {
}
} else {
}
}
}
return (ret_pp);
}
int mpss_coalesce_disable = 0;
/*
* Coalesce free pages into a page of the given szc and color if possible.
* Return the pointer to the page created, otherwise, return NULL.
*
* If pfnhi is non-zero, search for large page with pfn range less than pfnhi.
*/
page_t *
{
int r = szc; /* region size */
int mrange;
#if defined(__sparc)
#endif
if (mpss_coalesce_disable) {
return (NULL);
}
/* Prevent page_counters dynamic memory from being freed */
/* get pfn range for mtype */
#if defined(__sparc)
#else
hi++;
#endif
/* use lower limit if given */
/* round to szcpgcnt boundaries */
/* set lo to the closest pfn of the right color */
&it);
}
return (NULL);
}
full = FULL_REGION_CNT(r);
/* calculate the number of page candidates and initial search index */
do {
if (acand) {
}
if (cands == 0) {
return (NULL);
}
} else {
/* invalid color, get the closest correct pfn */
color_mask, &it);
}
}
}
/* set starting index */
#if defined(__sparc)
nhi = 0; /* search kcage ranges */
#endif
#if defined(__sparc)
/*
* Find lowest intersection of kcage ranges and mnode.
* MTYPE_NORELOC means look in the cage, otherwise outside.
*/
goto wrapit;
/* jump to the next page in the range */
goto wrapit;
goto next;
if (interleaved_mnodes &&
goto next;
}
}
#endif
goto next;
/*
* RFE: For performance maybe we can do something less
* brutal than locking the entire freelist. So far
* this doesn't seem to be a performance problem?
*/
ret_pp =
#if defined(__sparc)
if (PP_ISNORELOC(ret_pp)) {
}
#endif
return (ret_pp);
}
} else {
}
/*
* No point looking for another page if we've
* already tried all of the ones that
* page_ctr_cands indicated. Stash off where we left
* off.
* Note: this is not exact since we don't hold the
* page_freelist_locks before we initially get the
* value of cands for performance reasons, but should
* be a decent approximation.
*/
if (--cands == 0) {
idx;
break;
}
next:
color_mask, &it);
wrap++;
#if defined(__sparc)
nhi = 0; /* search kcage ranges */
#endif
}
}
return (NULL);
}
/*
* For the given mnode, promote as many small pages to large pages as possible.
* mnode can be -1, which means do them all
*/
void
{
int r; /* region size */
if (mpss_coalesce_disable) {
return;
}
/*
* Lock the entire freelist and coalesce what we can.
*
* Always promote to the largest page possible
* first to reduce the number of page promotions.
*/
}
for (r = mmu_page_sizes - 1; r > 0; r--) {
if (cands != 0)
break;
}
if (cands == 0) {
continue;
}
full = FULL_REGION_CNT(r);
int tmnode = interleaved_mnodes ?
pfnum <
(void) page_promote(tmnode,
}
}
/* shared hpm_counters covers all mnodes, so we quit */
if (interleaved_mnodes)
break;
}
}
}
}
/*
* This is where all polices for moving pages around
* to different page size free lists is implemented.
* Returns 1 on success, 0 on failure.
*
* So far these are the priorities for this algorithm in descending
* order:
*
* 1) When servicing a request try to do so with a free page
* from next size up. Helps defer fragmentation as long
* as possible.
*
* 2) Page coalesce on demand. Only when a freelist
* larger than PAGESIZE is empty and step 1
* will not work since all larger size lists are
* also empty.
*
* If pfnhi is non-zero, search for large page with pfn range less than pfnhi.
*/
page_t *
{
if (nszc == mmu_page_sizes)
return (NULL);
/*
* First try to break up a larger page to fill current size freelist.
*/
/*
* If page found then demote it.
*/
/*
* If pfnhi is not PFNNULL, look for large page below
* pfnhi. PFNNULL signifies no pfn requirement.
*/
do {
break;
}
}
if (pp) {
if (ret_pp) {
#if defined(__sparc)
if (PP_ISNORELOC(ret_pp)) {
}
#endif
return (ret_pp);
}
}
}
/* loop through next size bins */
/* we are done with this page size - check next */
/* we have already checked next size bins */
break;
if (bin_prev != INVALID_COLOR) {
break;
}
}
}
return (ret_pp);
}
/*
* Helper routine used only by the freelist code to lock
* a page. If the page is a large page then it succeeds in
* locking all the constituent pages or none at all.
* Returns 1 on sucess, 0 on failure.
*/
static int
{
/*
* Fail if can't lock first or only page.
*/
return (0);
}
/*
* PAGESIZE: common case.
*/
return (1);
}
/*
* Large page case.
*/
/*
* On failure unlock what we have locked so far.
* We want to avoid attempting to capture these
* pages as the pcm mutex may be held which could
* lead to a recursive mutex panic.
*/
}
return (0);
}
}
return (1);
}
/*
* init context for walking page lists
* Called when a page of the given szc in unavailable. Sets markers
* for the beginning of the search to detect when search has
* completed a full cycle. Sets flags for splitting larger pages
* and coalescing smaller pages. Page walking procedes until a page
* of the desired equivalent color is found.
*/
void
{
/*
* if vac aliasing is possible make sure lower order color
* bits are never ignored
*/
if (vac_colors > 1)
ceq &= 0xf0;
/*
* calculate the number of non-equivalent colors and
* color equivalency mask
*/
if (flags & PG_MATCH_COLOR) {
if (cpu_page_colors < 0) {
/*
* this is a heterogeneous machine with different CPUs
*/
}
}
/* we can split pages in the freelist, but not the cachelist */
if (can_split) {
/* set next szc color masks and number of free list bins */
}
} else {
plw->plw_do_split = 0;
}
}
/*
* set mark to flag where next split should occur
*/
plw->plw_split_next = \
plw->plw_split_next = \
} \
}
{
if (plw->plw_do_split) {
plw->plw_do_split = 0;
}
if (szc == 0) {
}
/*
* large pages all have the same vac color
* so by now we should be done with next
* size page splitting process
*/
plw->plw_do_split = 0;
return (nbin);
}
} else {
/*
* check if next page size bin is the
* same as the next page size bin for
* bin0
*/
nbin);
}
return (nbin);
}
}
}
}
return (nbin);
}
page_t *
{
int plw_initialized;
if (mtype < 0) { /* mnode does not have memory in mtype range */
return (NULL);
}
plw_initialized = 0;
/*
* Only hold one freelist lock at a time, that way we
* can start anywhere and not have to worry about lock
* ordering.
*/
do {
goto bin_empty_1;
goto bin_empty_0;
/*
* These were set before the page
* was put on the free list,
* they must still be set.
*/
/*
* Walk down the hash chain.
* 8k pages are linked on p_next
* and p_prev fields. Large pages
* are a contiguous group of
* constituent pages linked together
* on their p_next and p_prev fields.
* The large pages are linked together
* on the hash chain using p_vpnext
* p_vpprev of the base constituent
* page of each large page.
*/
if (szc == 0) {
} else {
}
goto bin_empty_0;
}
if (szc == 0) {
} else {
}
#if defined(__sparc)
(flags & PG_NORELOC) == 0);
if (PP_ISNORELOC(pp))
#endif
return (pp);
if (plw_initialized == 0) {
&plw);
plw_initialized = 1;
}
/* calculate the next bin with equivalent color */
/*
* color bins are all empty if color match. Try and
* satisfy the request by breaking up or coalescing
* pages from a different size freelist of the correct
* color that satisfies the ORIGINAL color requested.
* If that fails then try pages of the same size but
* different colors assuming we are not called with
* PG_MATCH_COLOR.
*/
if (plw.plw_do_split &&
return (pp);
return (pp);
}
/* if allowed, cycle through additional mtypes */
if (mtype >= 0)
goto try_again;
return (NULL);
}
/*
* Returns the count of free pages for 'pp' with size code 'szc'.
* Note: This function does not return an exact value as the page freelist
* locks are not held and thus the values in the page_counters may be
* changing as we walk through the data.
*/
static int
{
int i;
/* Make sure pagenum passed in is aligned properly */
/* Prevent page_counters dynamic memory from being freed */
/* Check for completely full region */
return (pgfree);
}
while (--r > 0) {
full = FULL_REGION_CNT(r);
/*
* If cnt here is full, that means we have already
* accounted for these pages earlier.
*/
}
}
}
return (pgfree);
}
/*
* Called from page_geti_contig_pages to exclusively lock constituent pages
* starting from 'spp' for page size code 'szc'.
*
* If 'ptcpthreshold' is set, the number of free pages needed in the 'szc'
* region needs to be greater than or equal to the threshold.
*/
static int
{
goto skipptcpcheck;
/*
* check if there are sufficient free pages available before attempting
* to trylock. Count is approximate as page counters can change.
*/
/* attempt to trylock if there are sufficient already free pages */
return (0);
}
for (i = 0; i < pgcnt; i++) {
while (--i != (pgcnt_t)-1) {
}
return (0);
}
ASSERT(i == 0);
return (0);
}
if (PP_ISNORELOC(pp)) {
while (i != (pgcnt_t)-1) {
i--;
}
return (0);
}
}
return (1);
}
/*
* Claim large page pointed to by 'pp'. 'pp' is the starting set
* of 'szc' constituent pages that had been locked exclusively previously.
* Will attempt to relocate constituent pages in use.
*/
static page_t *
{
while (pgcnt) {
/*
* If this is a PG_FREE_LIST page then its
* size code can change underneath us due to
* page promotion or demotion. As an optimzation
* use page_list_sub_pages() instead of
* page_list_sub().
*/
return (pp);
}
}
continue;
}
PP_SETAGED(pp);
pp++;
pgcnt--;
continue;
}
/*
* page_create_wait freemem accounting done by caller of
* page_get_freelist and not necessary to call it prior to
* calling page_get_replacement_page.
*
* page_get_replacement_page can call page_get_contig_pages
* to acquire a large page (szc > 0); the replacement must be
* smaller than the contig page size to avoid looping or
* szc == 0 and PGI_PGCPSZC0 is set.
*/
if (replpp) {
}
}
/*
* If replacement is NULL or do_page_relocate fails, fail
* coalescing of pages.
*/
/*
* Unlock un-processed target list
*/
while (pgcnt--) {
pp++;
}
/*
* Free the processed target list.
*/
while (pplist) {
}
return (NULL);
}
/* LINTED */
while (npgs--) {
(flags & PGI_PGCPSZC0)));
}
}
return (pplist);
}
/*
* Trim kernel cage from pfnlo-pfnhi and store result in lo-hi. Return code
* of 0 means nothing left after trim.
*/
int
{
int decr;
int rc = 0;
/* lower part of this mseg inside kernel cage */
/* kernel cage may have transitioned past mseg */
rc = 1;
}
}
/* else entire mseg in the cage */
} else {
/* upper part of this mseg inside kernel cage */
/* kernel cage may have transitioned past mseg */
rc = 1;
}
} else {
/* entire mseg outside of kernel cage */
rc = 1;
}
}
return (rc);
}
/*
* called from page_get_contig_pages to search 'pfnlo' thru 'pfnhi' to claim a
* page with size code 'szc'. Claiming such a page requires acquiring
* exclusive locks on all constituent pages (page_trylock_contig_pages),
* relocating pages in use and concatenating these constituent pages into a
* large page.
*
* The page lists do not have such a large page and page_freelist_split has
*
* 'flags' may specify PG_COLOR_MATCH which would limit the search of large
* pages with the same color as 'bin'.
*
* 'pfnflag' specifies the subset of the pfn range to search.
*/
static page_t *
{
/* LINTED : set but not used in function */
return (NULL);
} else {
ceq_mask = 0;
}
/* clear "non-significant" color bits */
/*
* trim the pfn range to search based on pfnflag. pfnflag is set
* when there have been previous page_get_contig_page failures to
* limit the search.
*
* The high bit in pfnflag specifies the number of 'slots' in the
* pfn range and the remainder of pfnflag specifies which slot.
* For example, a value of 1010b would mean the second slot of
* the pfn range that has been divided into 8 slots.
*/
if (pfnflag > 1) {
int slotlen;
}
memsegs_lock(0);
/*
* loop through memsegs to look for contig page candidates
*/
/* no overlap */
continue;
}
/* mseg too small */
continue;
/* trim off kernel cage pages from pfn range */
if (kcage_on) {
continue;
} else {
}
/* round to szcpgcnt boundaries */
continue;
/*
* set lo to point to the pfn for the desired bin. Large
* page sizes may only have a single page color
*/
if (ceq_mask > 0 || interleaved_mnodes) {
/* set lo to point at appropriate color */
color_mask, &it);
}
/* mseg cannot satisfy color request */
continue;
}
/* randomly choose a point between lo and hi to begin search */
if (ceq_mask || interleaved_mnodes) {
}
}
do {
/* pages unlocked by page_claim on failure */
memsegs_unlock(0);
return (pp);
}
}
if (ceq_mask == 0 && !interleaved_mnodes) {
} else {
} else {
}
}
/* start from the beginning */
}
}
memsegs_unlock(0);
return (NULL);
}
/*
* controlling routine that searches through physical memory in an attempt to
* claim a large page based on the input parameters.
* on the page free lists.
*
* calls page_geti_contig_pages with an initial pfn range from the mnode
* and mtype. page_geti_contig_pages will trim off the parts of the pfn range
* that overlaps with the kernel cage or does not match the requested page
* color if PG_MATCH_COLOR is set. Since this search is very expensive,
* page_geti_contig_pages may further limit the search range based on
* previous failure counts (pgcpfailcnt[]).
*
* for PGI_PGCPSZC0 requests, page_get_contig_pages will relocate a base
* pagesize page that satisfies mtype.
*/
page_t *
{
/* no allocations from cage */
flags |= PGI_NOCAGE;
/* LINTED */
if (mtype < 0) { /* mnode does not have memory in mtype range */
return (NULL);
}
/* do not limit search and ignore color if hi pri */
/* remove color match to improve chances */
flags &= ~PG_MATCH_COLOR;
do {
/* get pfn range based on mnode and mtype */
if (pfnflag) {
/* double the search size */
}
return (pp);
}
} while (mtype >= 0);
return (NULL);
}
/*
* Find the `best' page on the freelist for this (vp,off) (as,vaddr) pair.
*
* Does its own locking and accounting.
* If PG_MATCH_COLOR is set, then NULL will be returned if there are no
* pages of the proper color even if there are pages of a different color.
*
* Finds a page, removes it, THEN locks it.
*/
/*ARGSUSED*/
page_t *
{
int mnode;
int mtype;
/*
* If we aren't passed a specific lgroup, or passed a freed lgrp
* assume we wish to allocate near to the current thread's home.
*/
if (!LGRP_EXISTS(lgrp))
lgrp = lgrp_home_lgrp();
if (kcage_on) {
/*
* Set a "reserve" of kcage_throttlefree pages for
* PG_PANIC and cageout thread allocations.
*
* Everybody else has to serialize in
* page_create_get_something() to get a cage page, so
* that we don't deadlock cageout!
*/
return (NULL);
}
} else {
flags &= ~PG_NORELOC;
flags |= PGI_NOCAGE;
}
/* LINTED */
/*
* Convert size to page size code.
*/
panic("page_get_freelist: illegal page size request");
/* LINTED */
/*
* Try to get a local page first, but try remote if we can't
* get a page of the right color.
*/
int, mnode,
return (pp);
}
}
/*
* for non-SZC0 PAGESIZE requests, check cachelist before checking
* remote free lists. Caller expected to call page_get_cachelist which
* will check local cache lists and remote free lists.
*/
return (NULL);
}
/*
* Try to get a non-local freelist page.
*/
int, mnode,
return (pp);
}
}
}
/*
* when the cage is off chances are page_get_contig_pages() will fail
* to lock a large page chunk therefore when the cage is off it's not
*
* page_get_contig_pages() also called to acquire a base pagesize page
* for page_create_get_something().
*/
(page_get_func != page_get_contig_pages)) {
goto pgretry;
}
return (NULL);
}
/*
* Find the `best' page on the cachelist for this (vp,off) (as,vaddr) pair.
*
* Does its own locking.
* If PG_MATCH_COLOR is set, then NULL will be returned if there are no
* pages of the proper color even if there are pages of a different color.
* Otherwise, scan the bins for ones with pages. For each bin with pages,
* try to lock one of them. If no page can be locked, try the
* next bin. Return NULL if a page can not be found and locked.
*
* Finds a pages, trys to lock it, then removes it.
*/
/*ARGSUSED*/
page_t *
{
/*LINTED*/
int mnode;
int mtype;
/*
* If we aren't passed a specific lgroup, or pasased a freed lgrp
* assume we wish to allocate near to the current thread's home.
*/
if (!LGRP_EXISTS(lgrp))
lgrp = lgrp_home_lgrp();
if (!kcage_on) {
flags &= ~PG_NORELOC;
flags |= PGI_NOCAGE;
}
/*
* Reserve kcage_throttlefree pages for critical kernel
* threads.
*
* Everybody else has to go to page_create_get_something()
* to get a cage page, so we don't deadlock cageout.
*/
return (NULL);
}
/* LINTED */
/* LINTED */
/*
* Try local cachelists first
*/
int, mnode,
return (pp);
}
}
/*
* Try freelists/cachelists that are farther away
* This is our only chance to allocate remote pages for PAGESIZE
* requests.
*/
0, flags);
int, mnode,
return (pp);
}
int, mnode,
return (pp);
}
}
return (NULL);
}
page_t *
{
int plw_initialized;
/* LINTED */
if (mtype < 0) { /* mnode does not have memory in mtype range */
return (NULL);
}
plw_initialized = 0;
/*
* Only hold one cachelist lock at a time, that way we
* can start anywhere and not have to worry about lock
* ordering.
*/
do {
goto bin_empty_1;
goto bin_empty_0;
/*
* We have searched the complete list!
* And all of them (might only be one)
* are locked. This can happen since
* these pages can also be found via
* the hash list. When found via the
* hash list, they are locked first,
* then removed. We give up to let the
* other thread run.
*/
break;
}
mnode);
}
if (pp) {
/*
* Found and locked a page.
* Pull it off the list.
*/
/*
* Subtract counters before releasing pcm mutex
* to avoid a race with page_freelist_coalesce
* and page_freelist_split.
*/
#if defined(__sparc)
(flags & PG_NORELOC) == 0 ||
PP_ISNORELOC(pp));
if (PP_ISNORELOC(pp)) {
}
#endif
return (pp);
}
if (plw_initialized == 0) {
plw_initialized = 1;
}
/* calculate the next bin with equivalent color */
}
if (mtype >= 0)
goto try_again;
return (NULL);
}
#ifdef DEBUG
#define REPL_PAGE_STATS
#endif /* DEBUG */
#ifdef REPL_PAGE_STATS
struct repl_page_stats {
#else /* REPL_PAGE_STATS */
#define REPL_STAT_INCR(v)
#endif /* REPL_PAGE_STATS */
int pgrppgcp;
/*
* The freemem accounting must be done by the caller.
* First we try to get a replacement page of the same size as like_pp,
* if that is not possible, then we just get a set of discontiguous
* PAGESIZE pages.
*/
page_t *
{
int mnode, page_mnode;
int szc;
int mtype;
int flags = 0;
/*
* Now we reset like_pp to the base page_t.
* That way, we won't walk past the end of this 'szc' page.
*/
if (PP_ISNORELOC(like_pp)) {
} else if (pgrflags & PGR_NORELOC) {
flags = PG_NORELOC;
}
/*
* Kernel pages must always be replaced with the same size
* pages, since we cannot properly handle demotion of kernel
* pages.
*/
pgrflags |= PGR_SAMESZC;
/* LINTED */
while (npgs) {
for (;;) {
/*
* If an lgroup was specified, try to get the
* page from that lgroup.
* NOTE: Must be careful with code below because
* lgroup may disappear and reappear since there
* is no locking for lgroup here.
*/
if (LGRP_EXISTS(lgrp_target)) {
/*
* Keep local variable for lgroup separate
* from lgroup argument since this code should
* only be exercised when lgroup argument
* exists....
*/
lgrp = lgrp_target;
/* Try the lgroup's freelists first */
!= -1) {
pplist =
}
/*
* Now try it's cachelists if this is a
* small page. Don't need to do it for
* larger ones since page_freelist_coalesce()
* already failed.
*/
break;
/* Now try it's cachelists */
!= -1) {
pplist =
}
break;
}
/* Done looking in this lgroup. Bail out. */
break;
}
/*
* No lgroup was specified (or lgroup was removed by
* DR, so just try to get the page as close to
* like_pp's mnode as possible.
* First try the local freelist...
*/
break;
/*
* ...then the local cachelist. Don't need to do it for
* larger pages cause page_freelist_coalesce() already
* failed there anyway.
*/
if (szc == 0) {
break;
}
}
/* Now try remote freelists */
page_mnode = mnode;
lgrp =
!= -1) {
/*
* Skip local mnode.
*/
if ((mnode == page_mnode) ||
continue;
}
break;
/* Now try remote cachelists */
if (mnode == -1)
break;
/*
* Skip local mnode.
*/
if ((mnode == page_mnode) ||
continue;
break;
}
}
/*
* Break out of while loop under the following cases:
* - If we successfully got a page.
* - If pgrflags specified only returning a specific
* page size and we could not find that page size.
* - If we could not satisfy the request with PAGESIZE
* or larger pages.
*/
break;
/* try to find contig page */
(mnode =
!= -1) {
flags | PGI_PGCPHIPRI);
}
break;
}
/*
* The correct thing to do here is try the next
* page size down using szc--. Due to a bug
* with the processing of HAT_RELOAD_SHARE
* where the sfmmu_ttecnt arrays of all
* hats sharing an ISM segment don't get updated,
* using intermediate size pages for relocation
* can lead to continuous page faults.
*/
szc = 0;
}
int, mnode,
PP_CLRFREE(pp);
PP_CLRAGED(pp);
npgs--;
}
} else {
break;
}
}
if (npgs) {
/*
* We were unable to allocate the necessary number
* of pages.
* We need to free up any pl.
*/
return (NULL);
} else {
return (pl);
}
}
/*
* demote a free large page to it's constituent pages
*/
void
{
int mnode;
}
}
/*
* Factor in colorequiv to check additional 'equivalent' bins.
*/
void
page_set_colorequiv_arr(void)
{
if (colorequiv > 1) {
int i;
if (sv_a > 15)
sv_a = 15;
for (i = 0; i < MMU_PAGE_SIZES; i++) {
continue;
}
while ((colors >> a) == 0)
a--;
if ((a << 4) > colorequivszc[i]) {
colorequivszc[i] = (a << 4);
}
}
}
}