/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* rcapd is a long-running daemon enforcing project-based resource caps (see
* rcapd(1M)). Each instance of a process aggregate (project or, generically,
* "collection") may have a memory cap. A single thread monitors the resource
* utilization of capped collections, enforces caps when they are exceeded (and
* other conditions are met), and incorporates changes in configuration or
* caps. Each of these actions occurs not more frequently than the rate
* specified with rcapadm(1M).
*/
#include <sys/priocntl.h>
#include <sys/resource.h>
#include <sys/sysmacros.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <kstat.h>
#include <libintl.h>
#include <limits.h>
#include <locale.h>
#include <priv.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
#include <libscf.h>
#include <strings.h>
#include <time.h>
#include <unistd.h>
#include <zone.h>
#include <assert.h>
#include <sys/vm_usage.h>
#include "rcapd.h"
#include "rcapd_mapping.h"
#include "rcapd_rfd.h"
#include "rcapd_stat.h"
#include "utils.h"
#define POSITIVE_MIN(x, y) \
(((x) <= 0) ? (y) : ((y) <= 0) ? (x) : MIN(x, y))
: (hrtime_t)0)
((rcfg.rcfg_stat_file[0] != 0) ? \
typedef struct soft_scan_arg {
typedef struct sample_col_arg {
/* scanned */
/*
* Updated when we re-read the collection configurations if this rcapd instance
* is running in the global zone and the global zone is capped.
*/
/*
* Flags.
*/
static int ever_ran;
int should_run;
static int should_reconfigure;
static int verify_statistics(void);
static int update_statistics(void);
/*
* Checks if a process is marked 'system'. Returns FALSE only when it is not.
*/
static boolean_t
{
PC_KY_NULL) != -1) {
} else {
debug("cannot get class-specific scheduling parameters; "
"assuming system process\n");
return (B_TRUE);
}
}
static void
{
/* flag indicating whether the process should be scanned. */
/*
* Determine which collection to put this process into. We only have
* to worry about tracking both zone and project capped processes if
* this rcapd instance is running in the global zone, since we'll only
* see processes in our own projects in a non-global zone. In the
* global zone, if the process belongs to a non-global zone, we only
* need to track it for the capped non-global zone collection. For
* global zone processes, we first attempt to put the process into a
* capped project collection. On the second pass into this function
* the projid will be cleared so we will just track the process for the
* global zone collection as a whole.
*/
} else {
/* try to add to zone collection */
}
return;
/*
* If the process is already being tracked, update the unscannable flag,
* as determined by the caller, from the process's psinfo.
*/
debug("process %d: became unscannable\n",
}
return;
}
}
/*
* We've fallen off the list without finding our current process;
* insert it at the list head.
*/
else {
/*
* If the caller didn't flag this process as unscannable
* already, do some more checking.
*/
#ifdef DEBUG
/*
* Verify the sanity of lprocess. It should not contain the
* process we are about to prepend.
*/
debug("The collection %lld already has these members, "
"including me, %d!\n",
}
abort();
}
#endif /* DEBUG */
debug("tracking %s %ld %d %s%s\n",
}
}
static int
{
debug_high("list_walk_all aborted at lpc %d\n",
(int)pid);
return (1);
}
}
return (0);
}
/*
* Invoke the given callback for each process in each collection. Callbacks
* are allowed to change the linkage of the process on which they act.
*/
static void
{
}
static void
{
} else
debug("revoking psinfo fd for unknown process\n");
}
/*
* Retrieve a process's psinfo via an already-opened or new file descriptor.
* The supplied descriptor will be closed on failure. An optional callback
* will be invoked with the last descriptor tried, and a supplied callback
* argument, as its arguments, such that the new descriptor may be cached, or
* an old one may be invalidated. If the result of the callback is zero, the
* the caller is to assume responsibility for the file descriptor, to close it
* with rfd_close().
*
* On failure, a nonzero value is returned.
*/
int
{
int fd;
int can_try_uncached;
do {
if (cached_fd >= 0) {
can_try_uncached = 1;
debug_high("%d/psinfo, trying cached fd %d\n",
} else {
can_try_uncached = 0;
break;
} else
}
break;
else {
}
} while (can_try_uncached == 1);
if (fd >= 0) {
}
return ((fd >= 0) ? 0 : -1);
}
/*
* Retrieve the collection membership of all processes and update the psinfo of
* those non-system, non-zombie ones in collections. For global zone processes,
* we first attempt to put the process into a capped project collection. We
* also want to track the process for the global zone collection as a whole.
*/
static void
{
/*
* We also want to track this process for the global
* zone as a whole so add it to the global zone
* collection as well.
*/
}
}
}
/*
* Cache the process' psinfo fd, taking responsibility for freeing it.
*/
int
{
return (0);
}
/*
* Get the system pagesize.
*/
static void
get_page_size(void)
{
}
static void
{
else
}
/*
* Get the zone's & project's RSS from the kernel.
*/
static void
{
size_t i;
if (my_zone_only) {
} else {
flags = 0;
if (col_types & CAPPED_PROJECT)
}
if (flags == 0)
return;
/* try the current buffer to see if the list will fit */
nres = vmu_vals_len;
return;
}
}
if (nres > vmu_vals_len) {
/* array size is now too small, increase it and try again */
"kernel\n"));
vmu_vals_len = nvmu_vals = 0;
return;
}
vmu_vals_len = nres;
goto again;
}
debug("vmusage_sample\n");
for (i = 0; i < nvmu_vals; i++) {
debug("%d: id: %d, type: 0x%x, rss_all: %llu (%lluKB), "
(unsigned long long)vmu_vals[i].vmu_rss_all,
(unsigned long long)vmu_vals[i].vmu_swap_all);
}
}
static void
{
int i;
lcol->lcol_image_size = 0;
for (i = 0; i < nvmu_vals; i++) {
continue;
continue;
continue;
/* we found the right RSS entry, update the collection vals */
break;
}
}
/*
* Sample the collection RSS, updating the collection's statistics with the
* results. Also, sum the rss of all capped projects & return true if
* the collection is over cap.
*/
static int
{
if (excess > 0) {
}
return (0);
}
/*
* Determine if we have capped projects, capped zones or both.
*/
static int
{
/* skip uncapped collections */
if (lcol->lcol_rss_cap == 0)
return (1);
*col_type |= CAPPED_PROJECT;
else
*col_type |= CAPPED_ZONE;
/* once we know everything is capped, we can stop looking */
return (1);
return (0);
}
/*
* Open /proc and walk entries.
*/
static void
{
(void) rfd_reserve(1);
continue;
continue;
else
}
}
/*
* Clear unmarked callback.
*/
/*ARGSUSED*/
static int
{
} else {
}
return (0);
}
/*
* Print, for debugging purposes, a collection's recently-sampled RSS and
* excess.
*/
/*ARGSUSED*/
static int
{
(unsigned long long)lcol->lcol_rss_cap,
(long long)excess);
return (0);
}
/*
* Scan those collections which have exceeded their caps.
*
* If we're running in the global zone it might have a cap. We don't want to
* do any capping for the global zone yet since we might get under the cap by
* just capping the projects in the global zone.
*/
/*ARGSUSED*/
static int
{
/* skip over global zone collection for now but keep track for later */
return (0);
}
}
return (0);
}
/*
* Scan the global zone collection and see if it still exceeds its cap.
* We take into account the effects of capping any global zone projects here.
*/
static void
{
/*
* If we had projects over their cap and the global zone was also over
* its cap then we need to get the up-to-date global zone rss to
* determine if we are still over the global zone cap. We might have
* gone under while we scanned the capped projects. If there were no
* projects over cap then we can use the rss value we already have for
* the global zone.
*/
if (project_over_cap && excess > 0) {
}
if (excess > 0) {
}
}
/*
* Do a soft scan of those collections which have excesses. A soft scan is one
* in which the cap enforcement pressure is taken into account. The difference
* between the utilized physical memory and the cap enforcement pressure will
* be scanned-for, and each collection will be scanned proportionally by their
* present excesses.
*/
static int
{
/* skip over global zone collection for now but keep track for later */
return (0);
}
debug("%s %ld excess %lld scan_goal %lld sum_excess %llu, "
"scanning %lld\n",
"project" : "zone"),
(unsigned long long)arg->ssa_sum_excess,
(long long)adjusted_excess);
}
return (0);
}
static void
{
/*
* If we had projects over their cap and the global zone was also over
* its cap then we need to get the up-to-date global zone rss to
* determine if we are still over the global zone cap. We might have
* gone under while we scanned the capped projects. If there were no
* projects over cap then we can use the rss value we already have for
* the global zone.
*/
}
if (excess > 0) {
debug("%s %ld excess %lld scan_goal %lld sum_excess %llu, "
"scanning %lld\n",
"project" : "zone"),
(unsigned long long)arg->ssa_sum_excess,
(long long)adjusted_excess);
}
}
/*
* When a scan could happen, but caps aren't enforced tick the
* lcols_unenforced_cap counter.
*/
/*ARGSUSED*/
static int
{
return (0);
}
/*
* Update the count of physically installed memory.
*/
static void
update_phys_total(void)
{
if (phys_total != old_phys_total)
}
/*
* Unlink a process from its collection, updating relevant statistics, and
* freeing its associated memory.
*/
void
{
if (lpc->lpc_psinfo_fd >= 0) {
debug("could not close %d lpc_psinfo_fd %d",
}
if (lpc->lpc_pgdata_fd >= 0) {
debug("could not close %d lpc_pgdata_fd %d",
}
if (lpc->lpc_xmap_fd >= 0) {
debug("could not close %d lpc_xmap_fd %d",
}
}
/*
* Collection clear callback.
*/
/*ARGSUSED*/
static int
{
return (0);
}
/*
* Respond to a terminating signal by setting a termination flag.
*/
/*ARGSUSED*/
static void
{
if (termination_signal == 0)
should_run = 0;
}
/*
* Handle any synchronous or asynchronous signals that would ordinarily cause a
* process to abort.
*/
/*ARGSUSED*/
static void
{
/*
* Allow the scanner to make a last-ditch effort to resume any stopped
* processes.
*/
scan_abort();
abort();
}
/*
* Clean up collections which have been removed due to configuration. Unlink
* the collection from lcollection and free it.
*/
/*ARGSUSED*/
static int
{
debug("freeing %s %s\n",
}
return (0);
}
/*
* Set those variables which depend on the global configuration.
*/
static void
finish_configuration(void)
{
/*
* Warn that any lnode (or non-project) mode specification (by an SRM
* 1.3 configuration file, for example) is ignored.
*/
}
}
/*
* Cause the configuration to be reread and applied.
*/
static void
reread_configuration(void)
{
} else {
/*
* Done reading configuration. Remove existing
* collections in case there is a change in collection type.
*/
}
/*
* Make the newly-read configuration the global one, and update
* any variables that depend on it.
*/
}
}
/*
* First, examine changes, additions, and deletions to cap definitions.
* Then, set the next event time.
*/
static void
{
debug("reconfigure...\n");
/*
* Walk the lcollection, marking active collections so inactive ones
* can be freed.
*/
/*
* Reset each event time to the shorter of the previous and new
* intervals.
*/
next_report = now;
else
*next_proc_walk = now;
else
*next_rss_sample = now;
else
}
/*
* Respond to SIGHUP by triggering the rereading the configuration and cap
* definitions.
*/
/*ARGSUSED*/
static void
{
should_reconfigure = 1;
}
/*
* Print, for debugging purposes, each collection's interval statistics.
*/
/*ARGSUSED*/
static int
{
(unsigned long long)( \
"ineffective/scans/unenforced/samplings: %llu/%llu/%llu/%llu, RSS "
"%llu scans over %llu ms\n",
(unsigned long long)lcol->lcol_rss_cap,
return (0);
}
/*
* Record each collection's interval statistics in the statistics file.
*/
static int
{
/*
* Copy the relevant fields to the collection's record.
*/
} else {
debug("can't write %s %s statistics",
"project" : "zone"),
}
return (0);
}
/*
* Determine the count of pages scanned by the global page scanner, obtained
* from the cpu_stat:*::scan kstats. Return zero on success.
*/
static int
{
return (0);
}
scanned += ((cpu_stat_t *)
} else {
return (-1);
}
}
}
return (0);
}
/*
* Determine if the global page scanner is running, during which no memory
* caps should be enforced, to prevent interference with the global page
* scanner.
*/
static boolean_t
{
/* measure delta in page scan count */
if (get_globally_scanned_pages(&new_sp) == 0) {
debug("global memory pressure detected (%llu "
"pages scanned since last interval)\n",
}
} else {
}
return (res);
}
/*
* If soft caps are in use, determine if global memory pressure exceeds the
* configured maximum above which soft caps are enforced.
*/
static boolean_t
{
/*
* Check for changes to the amount of installed physical memory, to
* compute the current memory pressure.
*/
* 100.0 / phys_total);
if (rcfg.rcfg_memory_cap_enforcement_pressure > 0 &&
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Update the shared statistics file with each collection's current statistics.
* Return zero on success.
*/
static int
update_statistics(void)
{
/*
* Try to create a directory irrespective of whether it is existing
* or not. If it is not there then it will create. Otherwise any way
* it will fail at mkstemp call below.
*/
/*
* Create a temporary file.
*/
debug("temporary file template size too small\n");
return (-1);
}
(void) rfd_reserve(1);
/*
* Write the header and per-collection statistics.
*/
if (fd >= 0) {
sizeof (rs)) {
/*
* Replace the existing statistics file with this new
* one.
*/
} else
res = -1;
} else
res = -1;
return (res);
}
/*
* Verify the statistics file can be created and written to, and die if an
* existing file may be in use by another rcapd.
*/
static int
verify_statistics(void)
{
/*
* Warn if another instance of rcapd might be active.
*/
(void) rfd_reserve(1);
return (update_statistics());
}
static int
{
lcol->lcol_rss_cap));
return (0);
}
/*
* Compute the quantity of memory (in kilobytes) above the cap enforcement
* pressure. Set the scan goal to that quantity (or at most the excess).
*/
static void
{
/*
* Compute the sum of the collections' excesses, which will be the
* denominator.
*/
argp->ssa_sum_excess = 0;
}
static void
rcapd_usage(void)
{
}
void
check_update_statistics(void)
{
debug("updating statistics...\n");
if (update_statistics() != 0)
debug("couldn't update statistics");
}
}
static void
{
/*
* Ensure the required privileges, suitable for controlling processes,
* are possessed.
*/
/*
*/
}
/*
* This function does the top-level work to determine if we should do any
* memory capping, and if so, it invokes the right call-backs to do the work.
*/
static void
{
/* soft cap enforcement flag, depending on memory pressure */
/* avoid interference with kernel's page scanner */
/* no capped collections, skip checking rss */
if (col_types == 0)
return;
/* Determine if soft caps are enforced. */
/* Determine if the global page scanner is running. */
/*
* Sample collections' member processes RSSes and recompute
* collections' excess.
*/
debug("any collection/project over cap = %d, %d\n",
if (enforce_soft_caps)
/*
* Cap enforcement is determined by the previous conditions.
*/
(rcfg.rcfg_memory_cap_enforcement_pressure == 0 ||
/*
* If soft caps are in use, determine the size of the portion from each
* collection to scan for.
*/
if (enforce_caps && enforce_soft_caps)
/*
* Victimize offending collections.
*/
if (enforce_caps && (!enforce_soft_caps ||
/*
* Since at least one collection is over its cap & needs
* enforcing, check if it is at least time for a process walk
* (we could be well past time since we only walk /proc when
* we need to) and if so, update each collections process list
* in a single pass through /proc.
*/
debug("scanning process list...\n");
}
if (enforce_soft_caps) {
debug("scan goal is %lldKB\n",
(long long)arg.ssa_scan_goal);
/* process global zone */
}
} else {
/* process global zone */
}
}
} else if (col_arg.sca_any_over_cap) {
}
}
int
{
int res;
(void) set_message_priority(RCM_INFO);
(void) setpname("rcapd");
(void) chdir("/");
should_run = 1;
ever_ran = 0;
(void) textdomain(TEXT_DOMAIN);
/*
* Parse command-line options.
*/
switch (res) {
case 'd':
should_fork = 0;
if (debug_mode == 0) {
debug_mode = 1;
(void) set_message_priority(RCM_DEBUG);
} else
(void) set_message_priority(RCM_DEBUG_HIGH);
break;
case 'F':
should_fork = 0;
break;
default:
rcapd_usage();
return (E_USAGE);
/*NOTREACHED*/
}
/*
* Read the configuration.
*/
return (SMF_EXIT_ERR_CONFIG);
}
/*
* If not debugging, fork and continue operating, changing the
* destination of messages to syslog().
*/
if (should_fork == 1) {
debug("forking\n");
if (child == -1)
if (child > 0)
return (0);
else {
(void) set_message_destination(RCD_SYSLOG);
}
/*
* Start a new session and detatch from the controlling tty.
*/
"terminal"));
}
should_reconfigure = 0;
/*
* Check that required privileges are possessed.
*/
/*
* Open the kstat chain.
*/
kctl = kstat_open();
/*
* Set RLIMIT_NOFILE as high as practical, so roughly 10K processes can
* be effectively managed without revoking descriptors (at 3 per
* process).
*/
}
else
debug("fd limit: unknown\n");
/*
* Handle those signals whose (default) exit disposition
* prevents rcapd from finishing scanning before terminating.
*/
/*
* Install a signal handler for reconfiguration processing.
*/
/*
* Determine which process collections to cap.
*/
/*
* Loop forever, monitoring collections' resident set sizes and
* enforcing their caps. Look for changes in caps as well as
* responding to requests to reread the configuration. Update
* per-collection statistics periodically.
*/
while (should_run != 0) {
/*
* Announce that rcapd is starting.
*/
if (ever_ran == 0) {
ever_ran = 1;
}
/*
* Check the configuration at every next_configuration interval.
* Update the rss data once every next_rss_sample interval.
* The condition of global memory pressure is also checked at
* the same frequency, if strict caps are in use.
*/
/*
* Detect configuration and cap changes only when SIGHUP
* is received. Call reconfigure to apply new configuration
* parameters.
*/
if (should_reconfigure == 1) {
should_reconfigure = 0;
}
}
/*
* Do the main work for enforcing caps.
*/
}
/*
* Update the statistics file, if it's time.
*/
/*
* Sleep for some time before repeating.
*/
}
}
if (termination_signal != 0)
if (ever_ran != 0)
/*
* Unlink the statistics file before exiting.
*/
if (rcfg.rcfg_stat_file[0] != 0)
return (E_SUCCESS);
}