rcapd_scanner.c revision 0209230bf1261579beab4f55226bb509e6b850cb
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <libproc.h>
#include <limits.h>
#include <procfs.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <time.h>
#include <unistd.h>
#include "rcapd.h"
#include "rcapd_rfd.h"
#include "rcapd_mapping.h"
#include "utils.h"
static int lpc_xmap_update(lprocess_t *);
#ifdef DEBUG
#endif /* DEBUG */
/*
* The number of file descriptors required to grab a process and create an
* agent in it.
*/
#define PGRAB_FD_COUNT 10
/*
* Record a position in an address space as it corresponds to a prpageheader_t
* and affiliated structures.
*/
typedef struct prpageheader_cur {
int pr_nmap; /* number of mappings in address space */
int pr_map; /* number of this mapping */
void *pr_pdaddr; /* address of page's byte in pagedata */
int pr_nxmap; /* number of xmaps in array */
/* or -1 if xmap is out of sync */
typedef enum {
/*
* Output a scanning-related debug message.
*/
/*PRINTFLIKE3*/ /*ARGSUSED*/
static void
{
#ifdef DEBUG_MSG
char *buf;
: RCM_DEBUG))
return;
return;
#endif /* DEBUG_MSG */
}
/*
* Determine the collection's current victim, based on its last. The last will
* be returned, or, if invalid, any other valid process, if the collection has
* any.
*/
static lprocess_t *
{
/*
* Find the next scannable process, and make it the victim.
*/
return (lpc);
}
/*
* Get a process's combined current pagedata (per-page referenced and modified
* bits) and set the supplied pointer to it. The caller is responsible for
* freeing the data. If the pagedata is unreadable, a nonzero value is
* returned, and errno is set. Otherwise, 0 is returned.
*/
static int
{
int res;
redo:
errno = 0;
debug("cannot stat pagedata\n");
return (-1);
}
errno = 0;
return (-1);
}
errno = 0;
debug("pagedata changed size, retrying\n");
goto redo;
} else {
debug("cannot read pagedata");
return (-1);
}
}
return (0);
}
/*
* Return the count of kilobytes of pages represented by the given pagedata
* which meet the given criteria, having pages which are in all of the states
* specified by the mask, and in none of the states in the notmask. If the
* CP_CLEAR flag is set, the pagedata will also be cleared.
*/
#define CP_CLEAR 1
static uint64_t
{
int map;
*cur = 0;
cur++;
}
/*
* Skip to next 64-bit-aligned address to get the next
* prasmap_t.
*/
}
return (count);
}
/*
* Return the amount of memory (in kilobytes) that hasn't been referenced or
* modified, which memory which will be paged out first. Should be written to
* exclude nonresident pages when sufficient interfaces exist.
*/
static uint64_t
{
0, PG_MODIFIED | PG_REFERENCED));
}
/*
* Advance a prpageheader_cur_t to the address space's next mapping, returning
* its address, or NULL if there is none. Any known nonpageable or nonresident
* mappings will be skipped over.
*/
static uintptr_t
{
int i;
next:
return (NULL);
}
/*
* Skip to next 64-bit-aligned address to get the next prasmap_t.
*/
/*
* Skip any known nonpageable mappings. Currently, the only one
* detected is the schedctl page.
*/
debug("identified nonpageable schedctl mapping at %p\n",
goto next;
}
/*
* Skip mappings with no resident pages. If the xmap does not
* correspond to the pagedata for any reason, it will be ignored.
*/
/*
* Remove COW pages from the pageable RSS count.
*/
break;
}
}
debug("identified nonresident mapping at 0x%p\n",
goto next;
debug("identified unpageable mapping at 0x%p\n",
goto next;
}
}
/*
* Advance a prpageheader_cur_t to the mapping's next page, returning its
* address, or NULL if there is none.
*/
static void *
{
return (NULL);
}
/*
* Initialize a prpageheader_cur_t, positioned at the first page of the mapping
* of an address space.
*/
static void *
{
return ((void *)advance_prpageheader_cur_nextmapping(pcp));
}
/*
* Position a prpageheader_cur_t to the mapped address greater or equal to the
* given value.
*/
static void *
{
break;
} else
addr =
(void *)advance_prpageheader_cur_nextmapping(pcp);
return (addr);
}
static void
{
}
#ifdef DEBUG
static void
{
void *addr;
}
}
static void
{
}
}
#endif /* DEBUG */
/*
* OR two prpagedata_t which are supposedly snapshots of the same address
* space. Intersecting mappings with different page sizes are tolerated but
* not normalized (not accurate). If the mappings of the two snapshots differ
* in any regard, the supplied mappings_changed flag will be set.
*/
static void
{
int mappings_changed = 0;
/*
* OR source pagedata with the destination, for pages of intersecting
* mappings.
*/
&src_cur);
&dst_cur);
}
mappings_changed = 1;
NULL) {
mappings_changed = 1;
&src_cur);
else
&dst_cur);
}
}
}
/*
* Merge the current pagedata with that on hand. If the pagedata is
* unretrievable for any reason, such as the process having exited or being a
* zombie, a nonzero value is returned, the process should be marked
* unscannable, and future attempts to scan it should be avoided, since the
* symptom is probably permament. If the mappings of either pagedata
* differ in any respect, the supplied callback will be invoked once.
*/
static int
void(*mappings_changed_cb) (lprocess_t *))
{
int mappings_changed = 0;
0) {
char pathbuf[PROC_PATH_MAX];
return (-1);
}
/*
* OR the two snapshots.
*/
#ifdef DEBUG
#endif /* DEBUG */
#ifdef DEBUG
if (((mappings_changed != 0) ^
debug("lmapping_changed inconsistent with lmapping\n");
debug("old\n");
debug("new\n");
debug("ignored\n");
ASSERT(0);
}
lmapping_free(&new);
lmapping_free(&old);
#endif /* DEBUG */
} else
mappings_changed = 1;
if (mappings_changed != 0) {
if (mappings_changed_cb != NULL)
}
return (0);
}
/*
* Attempt to page out a region of the given process's address space. May
* return nonzero if not all of the pages may are pageable, for any reason.
*/
static int
{
int res;
return (0);
errno = 0;
/*
* EBUSY indicates none of the pages have backing store allocated, or
* some pages were locked, which are less interesting than other
* conditions, which are noted.
*/
if (res != 0)
res = 0;
else
return (res);
}
/*
* Compute the delta of the victim process's RSS since the last call. If the
* psinfo cannot be obtained, no work is done, and no error is returned; it is
* up to the caller to detect the process' termination via other means.
*/
static int64_t
{
if (d_rss < 0)
(- d_rss);
*old_psinfo = *new_psinfo;
}
return (d_rss);
}
static void
{
}
static void
{
void *vicaddr;
!= 0) {
debug("removed mapping 0x%p+0t%llukB from"
vicaddr = (void *)advance_prpageheader_cur_nextmapping(
&cur);
vicaddr = (void *)advance_prpageheader_cur_nextmapping(
&cur);
}
}
/*
* Resume scanning, starting with the last victim, if it is still valid, or any
* other one, otherwise.
*/
void
{
(long long)excess);
/*
* Determine the address to start scanning at, depending on whether
* scanning can be resumed.
*/
resumed = 1;
} else {
resumed = 0;
}
scan_start = gethrtime();
/*
* Obtain the most current pagedata for the processes that might be
* scanned, and remove from the ignored set any mappings which have
* referenced or modified pages (in the hopes that the pageability of
* the mapping's pages may have changed). Determine if the
* unreferenced and unmodified portion is impossibly small to suffice
* to reduce the excess completely. If so, ignore these bits so that
* even working set will be paged out.
*/
col_unrm_size = 0;
" exited/temporarily unscannable",
goto next;
}
RCAPD_IGNORED_SET_FLUSH_IVAL) == 0) {
/*
* Periodically clear the set of ignored mappings.
* This will allow processes whose ignored segments'
* pageability have changed (without a corresponding
* reference or modification to a page) to be
* recognized.
*/
} else {
/*
* Ensure mappings with referenced or modified pages
* are not in the ignored set. Their usage might mean
* the condition which made them unpageable is gone.
*/
}
next:
}
if (col_unrm_size < excess) {
debug("will not reduce excess with only unreferenced pages\n");
CP_CLEAR, 0, 0);
if (lpc->lpc_pgdata_fd >= 0) {
debug("coud not close %d"
" lpc_pgdata_fd %d",
lpc->lpc_pgdata_fd);
}
}
}
}
/*
* Examine each process for pages to remove until the excess is
* reduced.
*/
/*
* Skip processes whose death was reported when the merging of
* pagedata was attempted.
*/
goto nextproc;
/*
* Obtain optional segment residency information.
*/
if (lpc_xmap_update(vic) != 0)
#ifdef DEBUG_MSG
{
#endif /* DEBUG_MSG */
#ifdef DEBUG_MSG
}
#endif /* DEBUG_MSG */
/*
* Take control of the victim.
*/
goto nextproc;
}
(void) rfd_reserve(PGRAB_FD_COUNT);
goto nextproc;
}
if (Pcreate_agent(scan_pr) != 0) {
goto nextproc;
}
/*
* Be very pessimistic about the state of the agent LWP --
* verify it's actually stopped.
*/
errno = 0;
goto nextproc;
}
/*
* Within the victim's address space, find contiguous ranges of
* unreferenced pages to page out.
*/
/*
* Skip mappings in the ignored set. Mappings get
* placed in the ignored set when all their resident
* pages are unreference and unmodified, yet unpageable
* -- such as when they are locked, or involved in
* asynchronous I/O. They will be scanned again when
* some page is referenced or modified.
*/
debug("ignored mapping at 0x%p\n",
/*
* Update statistics.
*/
vicaddr = (void *)
continue;
}
/*
* Determine a range of unreferenced pages to page out,
* and clear the R/M bits in the preceding referenced
* range.
*/
" npage %llu\n", vicaddr,
}
/*
* The end of mapping was reached before any
* unreferenced pages were seen.
*/
vicaddr = (void *)
continue;
}
do
1024) < excess);
/*
* Page out from vicaddr to the end of the mapping, or
* endaddr if set, then continue scanning after
* endaddr, or the next mapping, if not set.
*/
0) {
int willignore = 0;
/*
* If this pageout attempt was unsuccessful
* (the resident portion was not affected), and
* was for the whole mapping, put it in the
* ignored set, so it will not be scanned again
* until some page is referenced or modified.
*/
if (lmapping_insert(
&vic->lpc_ignore,
debug("not enough memory to add"
" mapping at %p to ignored"
" set\n",
willignore = 1;
}
/*
* Update statistics.
*/
1024);
"+0t(%llu/%llu)kB%s\n", vicaddr,
(unsigned long long)((d_rss <
} else {
"process %d: exited/unscannable\n",
goto nextproc;
}
/*
* Update the statistics file, if it's time.
*/
}
(long long)excess);
/*
* If a process was grabbed, release it, destroying its agent.
*/
}
/*
* Scan the collection at most once. Only if scanning was not
* aborted for any reason, and the end of lprocess has not been
* reached, determine the next victim and scan it.
*/
/*
* Determine the next process to be scanned.
*/
if (excess > 0) {
vicaddr = 0;
}
} else {
/*
* A complete scan of the collection was made,
* so tick the scan counter and stop scanning
* until the next request.
*/
/*
* If an excess still exists, tick the
* "ineffective scan" counter, signalling that
* the cap may be uneforceable.
*/
/*
* Scanning should start at the beginning of
* the process list at the next request.
*/
if (excess > 0)
}
}
}
(long long)excess);
}
}
/*
* Abort the scan in progress, and destroy the agent LWP of any grabbed
* processes.
*/
void
scan_abort(void)
{
}
static void
{
}
/*
* Retrieve the process's current xmap , which is used to determine the size of
* the resident portion of its segments. Return zero if successful.
*/
static int
{
int res;
char pathbuf[PROC_PATH_MAX];
return (-1);
}
redo:
errno = 0;
debug("cannot stat xmap\n");
return (-1);
}
debug("xmap wrong size\n");
return (-1);
}
return (-1);
}
if (res > 0) {
debug("xmap changed size, retrying\n");
goto redo;
} else {
debug("cannot read xmap");
return (-1);
}
}
return (0);
}