kflt_mem.c revision f045d8d6fec1759551cc2bce1d26628931f14fce
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#include <sys/sysmacros.h>
#include <sys/kflt_mem.h>
#include <vm/seg_kmem.h>
#include <sys/mem_config.h>
#ifdef DEBUG
#define KFLT_STATS
#endif
#ifdef KFLT_STATS
struct kflt_stats_scan {
/* managed by KFLT_STAT_* macros */
/* set in kflt_user_evict() */
/* set in kflt_invalidate_page() */
/* set in kflt_export */
};
struct kflt_stats {
/* managed by KFLT_STAT_* macros */
/* set in kflt_evict_thread */
/* set in kflt_create_throttle */
/* managed by KFLT_STAT_* macros */
};
static struct kflt_stats kflt_stats;
static struct kflt_stats_scan kflt_stats_scan_zero;
/*
* No real need for atomics here. For the most part the incs and sets are
* done by the kernel freelist thread. There are a few that are done by any
* number of other threads. Those cases are noted by comments.
*/
#define KFLT_STAT_INCR(m) kflt_stats.m++
#define KFLT_STAT_NINCR(m, v) kflt_stats.m += (v)
#define KFLT_STAT_INCR_SCAN(m) \
#define KFLT_STAT_NINCR_SCAN(m, v) \
#define KFLT_STAT_SET(m, v) kflt_stats.m = (v)
#define KFLT_STAT_SETZ(m, v) \
if (kflt_stats.m == 0) kflt_stats.m = (v)
#define KFLT_STAT_SET_SCAN(m, v) \
#define KFLT_STAT_SETZ_SCAN(m, v) \
#define KFLT_STAT_INC_SCAN_INDEX \
kflt_stats.scan_index = \
#define KFLT_STAT_INIT_SCAN_INDEX \
kflt_stats.scan_index = 0
#else /* KFLT_STATS */
#define KFLT_STAT_INCR(v)
#define KFLT_STAT_NINCR(m, v)
#define KFLT_STAT_INCR_SCAN(v)
#define KFLT_STAT_NINCR_SCAN(m, v)
#define KFLT_STAT_SET(m, v)
#define KFLT_STAT_SETZ(m, v)
#define KFLT_STAT_SET_SCAN(m, v)
#define KFLT_STAT_SETZ_SCAN(m, v)
#define KFLT_STAT_INC_SCAN_INDEX
#define KFLT_STAT_INIT_SCAN_INDEX
#endif /* KFLT_STATS */
/* Internal Routines */
void kflt_init(void);
void kflt_evict_wakeup(void);
static boolean_t kflt_evict_cpr(void *, int);
static void kflt_thread_init(void);
static void kflt_user_evict(void);
extern mnoderange_t *mnoderanges;
extern int mnoderangecnt;
void wakeup_pcgs(void);
static int kflt_evict_ready; /* nonzero when evict thread ready */
static kcondvar_t kflt_throttle_cv;
/*
* Statistics used to drive the behavior of the evict demon.
*/
/* time in seconds to check on throttled threads */
int kflt_maxwait = 10;
int kflt_on = 0; /* indicates evict thread is initialised */
/*
* This is called before a CPR suspend and after a CPR resume. We have to
* turn off kflt_evict before a suspend, and turn it back on after a
* restart.
*/
/*ARGSUSED*/
static boolean_t
{
if (code == CB_CODE_CPR_CHKPT) {
kflt_evict_ready = 0;
return (B_TRUE);
} else if (code == CB_CODE_CPR_RESUME) {
ASSERT(kflt_evict_ready == 0);
kflt_evict_ready = 1;
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Sets up kernel freelist related statistics and starts the evict thread.
*/
void
kflt_init(void)
{
if (kflt_disable) {
return;
}
if (kflt_lotsfree == 0)
if (kflt_minfree == 0)
if (kflt_desfree == 0)
if (kflt_throttlefree == 0)
if (kflt_reserve == 0)
"kflt_evict_thread");
kflt_on = 1;
}
/*
* Wakeup kflt_user_evict thread and throttle waiting for the number of pages
* requested to become available. For non-critical requests, a
* timeout is added, since freemem accounting is separate from kflt
* freemem accounting: it's possible for us to get stuck and not make
* forward progress even though there was sufficient freemem before
* arriving here.
*/
int
{
int niter = 0;
kflt_evict_wakeup(); /* just to be sure */
/*
* Obviously, we can't throttle the evict thread since
* we depend on it. We also can't throttle the panic thread.
*/
if (curthread == kflt_evict_thread ||
!kflt_evict_ready || panicstr) {
return (KFT_CRIT);
}
/*
* Don't throttle threads which are critical for proper
* vm management if we're above kfLt_throttlefree or
* if freemem is very low.
*/
if (NOMEMWAIT()) {
if (enough) {
return (KFT_CRIT);
return (KFT_CRIT);
}
}
/*
* Don't throttle real-time threads if kflt_freemem > kflt_reserve.
*/
kflt_freemem > kflt_reserve) {
return (KFT_CRIT);
}
/*
* Cause all other threads (which are assumed to not be
* critical to kflt_user_evict) to wait here until their request
* can be satisfied. Be a little paranoid and wake the
* kernel evict thread on each loop through this logic.
*/
if (kflt_evict_ready) {
kflt_needfree += npages;
kflt_needfree -= npages;
} else {
/*
* NOTE: atomics are used just in case we enter
* mp operation before the evict thread is ready.
*/
}
if (kflt_freemem > lastfree) {
niter = 0;
} else {
if (++niter >= kflt_maxwait) {
return (KFT_FAILURE);
}
}
}
return (KFT_CRIT);
}
}
return (KFT_NONCRIT);
}
/*
* Creates the kernel freelist evict thread.
*/
static void
kflt_thread_init(void)
{
if (kflt_on) {
kflt_on = 0;
}
}
}
/*
* This routine is used by the kernel freelist evict thread to iterate over the
* pfns.
*/
static pfn_t
{
if (pfn == PFN_INVALID) {
*mnode = 0;
return (pfn);
}
pfn++;
(*mnode)++;
if (*mnode >= mnoderangecnt) {
return (PFN_INVALID);
}
}
return (pfn);
}
/*
* Locks all the kernel page freelist mutexes before promoting a group of pages
* and returning the large page to the user page freelist.
*/
void
page_kflt_lock(int mnode)
{
int i;
for (i = 0; i < NPC_MUTEX; i++) {
}
}
/*
* Unlocks all the kernel page freelist mutexes after promoting a group of pages
* and returning the large page to the user page freelist.
*/
void
page_kflt_unlock(int mnode)
{
int i;
for (i = 0; i < NPC_MUTEX; i++) {
}
}
/*
* This routine is called by the kflt_user_evict() thread whenever a free page
* is found on the kernel page freelist and there is an excess of free memory on
* the kernel freelist. It determines whether it is possible to promote groups
* of small free pages into a large page which can then be returned to the
* user page freelist.
*/
static int
{
static pfn_t lp_base_page_num = 0;
int mtype;
int mnode;
int bin;
int ret;
/*
* We're not holding any locks yet, so pp state may change.
*/
lp_base_page_num = 0;
lp_count = 0;
return (0);
}
ret = 0;
/* Count pages with the same large page base */
if (lp_page_num == lp_base_page_num) {
lp_count++;
/*
* Check that all pages are still free and on the kernel
* freelist.
*/
tpp++, pages_left--) {
goto out;
}
}
#ifdef KFLT_STATS
#endif
/* clear kflt bit in each page */
do {
/*
* Return large page to the user page
* freelist
*/
bin, 0);
ret = 1;
}
}
} else {
out:
lp_count = 1;
}
return (ret);
}
/*
* This thread is woken up whenever pages are added or removed from the kernel
* page freelist and free memory on this list is low, or when there is excess
* memory on the kernel freelist. It iterates over the physical pages in the
* system and has two main tasks:
*
* 1) Relocate user pages which have been allocated on the kernel page freelist
* wherever this is possible.
*
* 2) Identify groups of free pages on the kernel page freelist which can be
* promoted to large pages and then exported to the user page freelist.
*/
static void
kflt_user_evict(void)
{
int mnode;
int pass;
int last_pass;
int did_something;
int scan_again;
int pages_skipped;
int shared_skipped;
int prm;
int pages_scanned;
int pages_skipped_thresh = 20;
int shared_skipped_thresh = 20;
int kflt_export_scan;
callb_generic_cpr, "kflt_user_evict");
mnode = 0;
kflt_evict_ready = 1;
loop:
scan_start = ddi_get_lbolt();
kflt_export_scan = 0;
if (kflt_freemem > kflt_lotsfree) {
/* Force a delay between kflt export scans */
if ((scan_start - kflt_export_scan_start) >
kflt_export_scan = 1;
}
}
pass = 0;
last_pass = 0;
did_something = 0;
pages_skipped = 0;
shared_skipped = 0;
pages_scanned = 0;
/*
* There are two conditions which drive the loop -
*
* 1. If we have too much free memory then it may be possible to
* export some large pages back to the user page freelist.
*
* 2. If a large number of user pages have been allocated from the
* kernel freelist then we try to relocate them.
*/
while ((kflt_export_scan || kflt_needfree ||
if (start_pfn == PFN_INVALID) {
pass += 1;
/* initialize internal state in kflt_export() */
/*
* Did a complete walk of kernel freelist, but didn't
* free any pages.
*/
break;
}
did_something = 0;
}
pages_scanned = 1;
continue;
}
continue;
if (kflt_export_scan) {
did_something = 1;
}
continue;
}
if (!kflt_user_alloc) {
continue;
}
continue;
}
/* Check that the page is in the same state after locking */
continue;
}
continue;
}
/* On first pass ignore ref'd pages */
continue;
}
/* On pass 2, VN_DISPOSE if mod bit is not set */
if (pass <= 2) {
} else {
/*
* unload the mappings before
* checking if mod bit is set
*/
(void) hat_pageunload(pp,
/*
* skip this page if modified
*/
continue;
}
/* LINTED: constant in conditional context */
did_something = 1;
}
continue;
}
did_something = 1;
}
/*
* No need to drop the page lock here.
* kflt_invalidate_page has done that for us
* either explicitly or through a page_free.
*/
}
/*
* Scan again if we need more memory from the kernel
* freelist or user memory allocations from the kernel freelist
* are too high.
*/
scan_again = 0;
scan_again = 1;
} else {
/*
* We need to allocate more memory to the kernel
* freelist.
*/
kflt_expand();
}
scan_again = 1;
scan_again = 1;
else if (shared_skipped > shared_skipped_thresh &&
shared_level <<= 1;
scan_again = 1;
}
} else if (kflt_export_scan) {
/*
* The delay between kflt export scans varies between a minimum
* of 60 secs and a maximum of 5 mins. The delay is set to the
* minimum if a page is promoted during a scan and increased
* otherwise.
*/
if (did_something) {
} else if (kflt_scan_delay < kflt_max_scan_delay) {
}
}
goto again;
} else {
if (shared_level > 8)
shared_level >>= 1;
goto loop;
}
}
/*
* Relocate page opp (Original Page Pointer) from kernel page freelist to page
* rpp * (Replacement Page Pointer) on the user page freelist. Page opp will be
* freed if relocation is successful, otherwise it is only unlocked.
* On entry, page opp must be exclusively locked and not free.
* *nfreedp: number of pages freed.
*/
static int
{
int result;
if (result == 0) {
while (npgs-- > 0) {
}
return (0); /* success */
}
return (result);
}
/*
* Based on page_invalidate_pages()
*
* Kflt_invalidate_page() uses page_relocate() twice. Both instances
* of use must be updated to match the new page_relocate() when it
* becomes available.
*
* Return result of kflt_relocate_page or zero if page was directly freed.
* *nfreedp: number of pages freed.
*/
static int
{
int result;
/*
* Is this page involved in some I/O? shared?
* The page_struct_lock need not be acquired to
* examine these fields since the page has an
* "exclusive" lock.
*/
#ifdef KFLT_STATS
if (result == 0)
#endif
return (result);
}
/*
* Unload the mappings and check if mod bit is set.
*/
#ifdef KFLT_STATS
if (result == 0)
#endif
return (result);
}
if (!page_try_demote_pages(pp)) {
return (EAGAIN);
}
/* LINTED: constant in conditional context */
*nfreedp = 1;
return (0);
}
void
kflt_evict_wakeup(void)
{
if (mutex_tryenter(&kflt_evict_mutex)) {
kflt_needfree)) {
}
}
/* else, kflt thread is already running */
}
void
{
ASSERT(kflt_freemem >= 0);
if (kflt_evict_ready &&
(kflt_freemem > kflt_lotsfree ||
}
}
void
{
wakeup_pcgs(); /* wakeup threads in pcgs() */
if (kflt_evict_ready && kflt_needfree &&
}
}
void
{
/*
* Once per second we wake up all the threads throttled
* waiting for kernel freelist memory, in case we've become stuck
* and haven't made forward progress expanding the kernel freelist.
*/
if (kflt_on && kflt_evict_ready)
}