zonestatd.c revision efd4c9b63ad77503c101fc6c2ed8ba96c9d52964
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#include <alloca.h>
#include <assert.h>
#include <dirent.h>
#include <dlfcn.h>
#include <door.h>
#include <errno.h>
#include <exacct.h>
#include <ctype.h>
#include <fcntl.h>
#include <kstat.h>
#include <libcontract.h>
#include <libintl.h>
#include <libscf.h>
#include <zonestat.h>
#include <zonestat_impl.h>
#include <limits.h>
#include <pool.h>
#include <procfs.h>
#include <rctl.h>
#include <thread.h>
#include <signal.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <synch.h>
#include <sys/priocntl.h>
#include <sys/fxpriocntl.h>
#include <sys/processor.h>
#include <sys/systeminfo.h>
#include <thread.h>
#include <sys/vm_usage.h>
#include <time.h>
#include <ucred.h>
#include <unistd.h>
#include <zone.h>
#include <zonestat.h>
#define ZSD_PSET_UNLIMITED UINT16_MAX
#define ZONESTAT_EXACCT_FILE "/var/adm/exacct/zonestat-process"
/*
* zonestatd implements gathering cpu and memory utilization data for
* running zones. It has these components:
*
* zsd_server:
* Door server to respond to client connections. Each client
* will connect using libzonestat.so, which will open and
* a file descriptor to the stat server.
*
* The zsd_server also responds to zoneadmd, which reports when a
* new zone is booted. This is used to fattach the zsd_server door
* into the new zone.
*
* zsd_stat_server:
* Receives client requests for the current utilization data. Each
* client request will cause zonestatd to update the current utilization
* data by kicking the stat_thread.
*
* If the client is in a non-global zone, the utilization data will
* be filtered to only show the given zone. The usage by all other zones
* will be added to the system utilization.
*
* stat_thread:
* The stat thread implements querying the system to determine the
* current utilization data for each running zone. This includes
* inspecting the system's processor set configuration, as well as details
* of each zone, such as their configured limits, and which processor
* sets they are running in.
*
* The stat_thread will only update memory utilization data as often as
* the configured config/sample_interval on the zones-monitoring service.
*/
/*
* The private vmusage structure unfortunately uses size_t types, and assumes
* the caller's bitness matches the kernel's bitness. Since the getvmusage()
* system call is contracted, and zonestatd is 32 bit, the following structures
* are used to interact with a 32bit or 64 bit kernel.
*/
typedef struct zsd_vmusage32 {
typedef struct zsd_vmusage64 {
/*
* An amd64 kernel will align the following uint64_t members, but a
* 32bit i386 process will not without help.
*/
struct zsd_zone;
/* Used to store a zone's usage of a pset */
typedef struct zsd_pset_usage {
/* Used to store a pset's utilization */
typedef struct zsd_pset {
char zsp_name[ZS_PSETNAME_MAX];
/* Individual zone usages of pset */
int zsp_nusage;
/* Summed kstat values from individual cpus in pset */
} zsd_pset_t;
/* Used to track an individual cpu's utilization as reported by kstats */
typedef struct zsd_cpu {
/* kstats this interval */
/* kstats in most recent interval */
/* Total kstat increases since zonestatd started reading kstats */
} zsd_cpu_t;
/* Used to describe an individual zone and its utilization */
typedef struct zsd_zone {
char zsz_name[ZS_ZONENAME_MAX];
char zsz_pool[ZS_POOLNAME_MAX];
char zsz_pset[ZS_PSETNAME_MAX];
int zsz_default_sched;
/* These are deduced by inspecting processes */
} zsd_zone_t;
/*
* Used to track the cpu usage of an individual processes.
*
* zonestatd sweeps /proc each interval and charges the cpu usage of processes.
* to their zone. As processes exit, their extended accounting records are
* read and the difference of their total and known usage is charged to their
* zone.
*
* If a process is never seen in /proc, the total usage on its extended
* accounting record will be charged to its zone.
*/
typedef struct zsd_proc {
int zspr_sched;
} zsd_proc_t;
/* Used to track the overall resource usage of the system */
typedef struct zsd_system {
} zsd_system_t;
/*
* A dumping ground for various information and structures used to compute
* utilization.
*
* This structure is used to track the system while clients are connected.
* When The first client connects, a zsd_ctl is allocated and configured by
* zsd_open(). When all clients disconnect, the zsd_ctl is closed.
*/
typedef struct zsd_ctl {
/* To track extended accounting */
int zsctl_proc_fd; /* Log currently being used */
struct stat64 zsctl_proc_stat;
int zsctl_proc_open;
int zsctl_proc_fd_next; /* Log file to use next */
struct stat64 zsctl_proc_stat_next;
int zsctl_proc_open_next;
/* pool configuration handle */
int zsctl_pool_status;
int zsctl_pool_changed;
/* The above usage tacking structures */
/* Various system info */
/* Used to track time available under a cpu cap. */
struct timeval zsctl_timeofday;
/* Caches for arrays allocated for use by various system calls */
struct swaptable *zsctl_swap_cache;
/* Info about procfs for scanning /proc */
struct dirent *zsctl_procfs_dent;
long zsctl_procfs_dent_size;
/* Counts on tracked entities */
} zsd_ctl_t;
int g_hasclient; /* True if any clients are connected */
/*
* The usage cache is updated by the stat_thread, and copied to clients by
* the zsd_stat_server. Mutex and cond are to synchronize between the
* stat_thread and the stat_server.
*/
char *g_usage_cache_buf;
/* fds of door servers */
int g_server_door;
int g_stat_door;
/*
* Starting and current time. Used to throttle memory calculation, and to
* mark new zones and psets with their boot and creation time.
*/
/*
* main() thread.
*/
/* PRINTFLIKE1 */
static void
{
}
/* PRINTFLIKE1 */
static void
{
exit(1);
}
/* Turns on extended accounting if not configured externally */
int
{
char *path = ZONESTAT_EXACCT_FILE;
char oldfile[MAXPATHLEN];
/*
* Start a new accounting file if accounting not configured
* externally.
*/
return (-1);
}
/* Only set accounting file if none is configured */
if (ret < 0) {
== -1) {
return (-1);
}
}
return (-1);
}
return (0);
}
/* Turns off extended accounting if not configured externally */
int
{
char *path = ZONESTAT_EXACCT_FILE;
char oldfile[MAXPATHLEN];
/* If accounting file is externally configured, leave it alone */
return (0);
return (-1);
}
return (-1);
}
return (-1);
}
return (0);
}
/*
* If not configured externally, deletes the current extended accounting file
* and starts a new one.
*
* Since the stat_thread holds an open handle to the accounting file, it will
* read all remaining entries from the old file before switching to
* read the new one.
*/
int
zsd_roll_exacct(void)
{
int ret;
char *path = ZONESTAT_EXACCT_FILE;
char oldfile[MAXPATHLEN];
/* If accounting file is externally configured, leave it alone */
return (0);
/* Roll it next time */
return (0);
return (-1);
}
return (0);
}
/* Contract stuff for zone_enter() */
int
init_template(void)
{
int fd;
int err = 0;
if (fd == -1)
return (-1);
/*
* For now, zoneadmd doesn't do anything with the contract.
* Deliver no events, don't inherit, and allow it to be orphaned.
*/
return (-1);
}
return (fd);
}
/*
* Contract stuff for zone_enter()
*/
int
{
int cfd, r;
return (errno);
return (r);
}
return (0);
}
static int
close_on_exec(int fd)
{
return (0);
return (-1);
}
int
{
int n, fd;
type = "all";
if (n >= sizeof (path)) {
return (-1);
}
if (fd != -1) {
return (-1);
}
}
return (fd);
}
int
{
if (fd == -1)
return (errno);
return (err);
}
/*
* Attach the zsd_server to a zone. Called for each zone when zonestatd
* starts, and for each newly booted zone when zoneadmd contacts the zsd_server
*
* Zone_enter is used to avoid reaching into zone to fattach door.
*/
static void
{
char *path = ZS_DOOR_PATH;
zsd_warn("Unable to init template");
return;
}
if (pid < 0) {
(void) ct_tmpl_clear(tmpl_fd);
"Unable to fork to add zonestat to zoneid %d\n"), zid);
return;
}
if (pid == 0) {
(void) ct_tmpl_clear(tmpl_fd);
_exit(0);
}
_exit(1);
}
if (detach_only)
_exit(0);
if (fd < 0)
_exit(2);
_exit(3);
_exit(0);
}
ct = -1;
(void) ct_tmpl_clear(tmpl_fd);
(void) contract_abandon_id(ct);
;
return;
}
/*
* Zone lookup and allocation functions to manage list of currently running
* zones.
*/
static zsd_zone_t *
{
if (zoneid != -1)
return (zone);
}
}
return (NULL);
}
static zsd_zone_t *
{
return (zone);
}
return (NULL);
}
static zsd_zone_t *
{
return (NULL);
/*
* Allocate as deleted so if not found in first pass, zone is deleted
* from list. This can happen if zone is returned by zone_list, but
* exits before first attempt to fetch zone details.
*/
ctl->zsctl_nzones++;
return (zone);
}
static zsd_zone_t *
{
return (zone);
return (NULL);
/* Insert sorted by zonename */
return (zone);
}
/*
* Mark all zones as not existing. As zones are found, they will
* be marked as existing. If a zone is not found, then it must have
* halted.
*/
static void
{
}
}
/*
* Mark each zone as not using pset. If processes are found using the
* pset, the zone will remain bound to the pset. If none of a zones
* processes are bound to the pset, the zone's usage of the pset will
* be deleted.
*
*/
static void
{
}
}
/*
* Mark each pset as not existing. If a pset is found, it will be marked
* as existing. If a pset is not found, it wil be deleted.
*/
static void
{
}
}
/*
* A pset was found. Update its information
*/
static void
{
/* update pset flags */
/* pset not seen on previous interval. It is new. */
else
pset->zsp_cpu_shares = 0;
pset->zsp_scheds = 0;
}
/*
* A zone's process was found using a pset. Charge the process to the pset and
* the per-zone data for the pset.
*/
static void
{
/* Nothing to do if already found */
goto add_stats;
/* update usage flags */
else
usage->zsu_scheds = 0;
/* Detect zone's pset id, and if it is bound to multiple psets */
/* Record if FSS is co-habitating with conflicting scheduler */
usage->zsu_scheds & (
}
}
/* Add cpu time for a process to a pset, zone, and system totals */
static void
{
}
/* Determine which processor sets have been deleted */
static void
{
/*
* Mark pset as not exists, and deleted if it existed
* previous interval.
*/
ctl->zsctl_npsets--;
continue;
} else {
/* Pset vanished during this interval */
}
}
}
}
/* Determine which zones are no longer bound to processor sets */
static void
{
/*
* Mark pset as not exists, and deleted if it existed previous
* interval.
*/
/*
* Mark pset as not exists, and deleted if it existed
* previous interval.
*/
usage);
pset->zsp_nusage--;
continue;
} else {
}
/* Add cpu shares for usages that are in FSS */
zone->zsz_cpu_shares != 0) {
}
usage);
}
}
}
/* A zone has been found. Update its information */
static void
{
/*
* Mark zone as exists, and new if it did not exist in previous
* interval.
*/
/*
* Zone is new. Assume zone's properties are the same over entire
* interval.
*/
else
/* Schedulers updated later as processes are found */
zone->zsz_scheds = 0;
/* Cpus updated later as psets bound are identified */
zone->zsz_cpus_online = 0;
}
/* Determine which zones have halted */
static void
{
/*
* Mark zone as not existing, or delete if it did not exist in
* previous interval.
*/
/*
* Zone deleted in prior interval,
* so it no longer exists.
*/
ctl->zsctl_nzones--;
continue;
} else {
}
}
}
}
/*
* Mark cpus as not existing. If a cpu is found, it will be updated. If
* a cpu is not found, then it must have gone offline, so it will be
* deleted.
*
* The kstat tracking data is rolled so that the usage since the previous
* interval can be determined.
*/
static void
{
/*
* Mark all cpus as not existing. As cpus are found, they will
* be marked as existing.
*/
}
}
}
/*
* An array the size of the maximum number of cpus is kept. Within this array
* a list of the online cpus is maintained.
*/
{
}
return (cpu);
}
/* A cpu has been found. Update its information */
static void
{
/*
* legacy processor sets, the cpu may move while zonestatd is
* inspecting, causing it to be found twice. In this case, just
* leave cpu in the first processor set in which it was found.
*/
return;
/* Mark cpu as online */
/*
* cpu is newly online.
*/
/*
* Cpu is newly online.
*/
} else {
/*
* cpu online during previous interval. Save properties at
* start of interval
*/
}
}
/* Remove all offlined cpus from the list of tracked cpus */
static void
{
int id;
/* Mark cpu as online or offline */
/*
* cpu offlined in prior interval. It is gone.
*/
/* Clear structure for future use */
} else {
/*
* cpu online at start of interval. Treat
* as still online, since it was online for
* some portion of the interval.
*/
}
}
}
}
/* Some utility functions for managing the list of processor sets */
static zsd_pset_t *
{
return (pset);
}
return (NULL);
}
static zsd_pset_t *
{
if (psetid != -1)
return (pset);
}
}
return (NULL);
}
static zsd_pset_t *
{
return (NULL);
/*
* Allocate as deleted so if not found in first pass, pset is deleted
* from list. This can happen if pset is returned by pset_list, but
* is destroyed before first attempt to fetch pset details.
*/
ctl->zsctl_npsets++;
return (pset);
}
static zsd_pset_t *
{
return (pset);
return (NULL);
/* Insert sorted by psetname */
return (pset);
}
/* Some utility functions for managing the list of zones using each pset */
static zsd_pset_usage_t *
{
return (usage);
return (NULL);
}
static zsd_pset_usage_t *
{
== NULL)
return (NULL);
/*
* Allocate as not deleted. If a process is found in a pset for
* a zone, the usage will not be deleted until at least the next
* interval.
*/
usage->zsu_scheds = 0;
pset->zsp_nusage++;
return (usage);
}
static zsd_pset_usage_t *
{
!= NULL)
return (usage);
return (NULL);
> 0)
return (usage);
}
static void
{
/* Re-count these values each interval */
system->zss_processes = 0;
system->zss_shmids = 0;
system->zss_semids = 0;
system->zss_msgids = 0;
}
/* Reads each cpu's kstats, and adds the usage to the cpu's pset */
static void
{
kid = -1;
/* Get the cpu time totals for this cpu */
return;
if (kid == -1)
return;
return;
return;
return;
/*
* Tracking intr time per cpu just exists for future enhancements.
* The value is presently always zero.
*/
intr = 0;
/*
* cpu is newly online. There is no reference value,
* so just record its current stats for comparison
* on next stat read.
*/
return;
}
/*
* Calculate relative time since previous refresh.
* Paranoia. Don't let time go backwards.
*/
/* Update totals for cpu usage */
/*
* Add cpu's stats to its pset if it is known to be in
* the pset since previous read.
*/
} else {
/*
* Last pset was different than current pset.
* Best guess is to split usage between the two.
*/
}
}
/* Determine the details of a processor set by pset_id */
static int
{
const char *string;
/*
* Inspect legacy psets
*/
for (;;) {
if (ret < 0) {
/* pset is gone. Tell caller to retry */
return (-1);
}
/* Success */
break;
}
sizeof (processorid_t))) != NULL) {
} else {
/*
* Could not allocate to get new cpu list.
*/
"Could not allocate for cpu list"));
return (-1);
}
}
/*
* Old school pset. Just make min and max equal
* to its size
*/
if (psetid == ZS_PSET_DEFAULT) {
} else {
"SUNWlegacy_pset_%d", psetid);
}
/*
* Just treat legacy pset as a simple pool pset
*/
*importance = 1;
return (0);
}
/* Look up the pool pset using the pset id */
!= PO_SUCCESS)
goto err;
goto err;
goto err;
goto err;
if (num != 1)
goto err;
goto err;
else if (psetid == ZS_PSET_DEFAULT)
else
/* Get size, min, max, and importance */
else
*size = 0;
/* Get size, min, max, and importance */
else
*min = 0;
if (*min >= ZSD_PSET_UNLIMITED)
*min = ZS_LIMIT_NONE;
else
*max = ZS_LIMIT_NONE;
if (*max >= ZSD_PSET_UNLIMITED)
*max = ZS_LIMIT_NONE;
*importance = int64;
else
*online = 0;
if (*size == 0)
return (0);
/* get cpus */
goto err;
/* Make sure there is space for cpu id list */
sizeof (processorid_t))) != NULL) {
} else {
/*
* Could not allocate to get new cpu list.
*/
"Could not allocate for cpu list"));
goto err;
}
}
/* count the online cpus */
for (i = 0; i < num; i++) {
goto err;
continue;
goto err;
(*online)++;
}
return (0);
err:
/*
* The pools operations should succeed since the conf is a consistent
* snapshot. Tell caller there is no need to retry.
*/
return (-1);
}
/*
* Update the current list of processor sets.
* This also updates the list of online cpus, and each cpu's pset membership.
*/
static void
{
char psetname[ZS_PSETNAME_MAX];
pool_value_t **vals;
/* Zero cpu counters to recount them */
system->zss_ncpus_online = 0;
pool_dynamic_location(), PO_RDONLY) == 0) {
}
} else {
ctl->zsctl_pool_changed = 0;
&(ctl->zsctl_pool_changed));
if (ret < 0) {
/* Pools must have become disabled */
goto retry;
"Unable to update pool configuration"));
/* Not able to get pool info. Don't update. */
goto err;
}
}
/* Get the list of psets using libpool */
goto err;
goto err;
== NULL)
goto err;
goto err;
}
}
/* Save the pset id of each pset */
for (i = 0; i < num; i++) {
!= PO_SUCCESS)
goto err;
}
} else {
}
/* Get the pset list using legacy psets */
for (;;) {
break;
}
} else {
/*
* Could not allocate to get new pset list.
* Give up
*/
return;
}
}
/* Add the default pset to list */
num++;
}
/* Refresh cpu membership of all psets */
for (i = 0; i < num; i++) {
/* Get pool pset information */
!= 0) {
goto psets_changed;
sys_id);
continue;
}
ctl->zsctl_pset_cache[i]);
/* update pset info */
max, importance);
/* update each cpu in pset */
for (j = 0; j < pset->zsp_online; j++) {
}
}
err:
}
/*
* Fetch the current pool and pset name for the given zone.
*/
static void
{
char poolname[ZS_POOLNAME_MAX];
char psetname[ZS_PSETNAME_MAX];
const char *string;
int ret;
if (ret < 0)
goto lookup_done;
/* Default values if lookup fails */
/* no dedicated cpu if pools are disabled */
goto lookup_done;
/* Get the pool name using the id */
goto lookup_done;
goto lookup_done;
if (num != 1)
goto lookup_done;
goto lookup_done;
/* Get the name of the pset for the pool */
goto lookup_done;
goto lookup_done;
== NULL)
goto lookup_done;
if (num != 1)
goto lookup_done;
goto lookup_done;
if (int64 == ZS_PSET_DEFAULT)
goto lookup_done;
else
if (res_list)
if (pools)
}
/* Convert scheduler names to ZS_* scheduler flags */
static uint_t
{
sched = ZS_SCHED_TS;
sched = ZS_SCHED_IA;
if (pri > 59) {
} else {
sched = ZS_SCHED_FX;
}
sched = ZS_SCHED_RT;
}
return (sched);
}
static uint64_t
zsd_get_zone_rctl_limit(char *name)
{
!= 0) {
return (ZS_LIMIT_NONE);
}
return (rctlblk_get_value(rblk));
}
static uint64_t
zsd_get_zone_rctl_usage(char *name)
{
!= 0) {
return (0);
}
return (rctlblk_get_value(rblk));
}
#define ZSD_NUM_RCTL_VALS 19
/*
* Fetch the limit information for a zone. This uses zone_enter() as the
* getrctl(2) system call only returns rctl information for the zone of
* the caller.
*/
static int
{
int i = 0;
int res = 0;
/* Treat all caps as no cap on error */
*cpu_cap = ZS_LIMIT_NONE;
*ram_cap = ZS_LIMIT_NONE;
*vm_cap = ZS_LIMIT_NONE;
*shm_cap = ZS_LIMIT_NONE;
*processes = 0;
*lwps = 0;
*shm = 0;
*shmids = 0;
*semids = 0;
*msgids = 0;
*lofi = 0;
/* Get the ram cap first since it is a zone attr */
*ram_cap = ZS_LIMIT_NONE;
/* Get the zone's default scheduling class */
if (ret < 0)
return (-1);
/* rctl caps must be fetched from within the zone */
if (pipe(p) != 0)
return (-1);
(void) close(p[0]);
(void) close(p[1]);
return (-1);
}
if (pid < 0) {
(void) ct_tmpl_clear(tmpl_fd);
(void) close(p[0]);
(void) close(p[1]);
return (-1);
}
if (pid == 0) {
(void) ct_tmpl_clear(tmpl_fd);
(void) close(p[0]);
(void) close(p[1]);
_exit(0);
}
}
/* Get caps for zone, and write them to zonestatd parent. */
ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
(void) close(p[1]);
_exit(1);
}
(void) close(p[1]);
_exit(0);
}
ct = -1;
(void) ct_tmpl_clear(tmpl_fd);
(void) close(p[1]);
;
/* Read cap from child in zone */
ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
res = -1;
goto cleanup;
}
i = 0;
*cpu_shares = vals[i++];
*locked_cap = vals[i++];
*processes_cap = vals[i++];
*shmids_cap = vals[i++];
*semids_cap = vals[i++];
*msgids_cap = vals[i++];
/* Interpret maximum values as no cap */
*cpu_cap = ZS_LIMIT_NONE;
*shm_cap = ZS_LIMIT_NONE;
(void) close(p[0]);
(void) ct_tmpl_clear(tmpl_fd);
(void) contract_abandon_id(ct);
return (res);
}
/* Update the current list of running zones */
static void
{
int i, ret;
char zonename[ZS_ZONENAME_MAX];
char poolname[ZS_POOLNAME_MAX];
char psetname[ZS_PSETNAME_MAX];
/* Get the current list of running zones */
for (;;) {
break;
} else {
/* Could not allocate to get new zone list. Give up */
return;
}
}
for (i = 0; i < num; i++) {
if (ret < 0)
continue;
ctl->zsctl_zone_cache[i]);
if (ret < 0)
continue;
if (flags & ZF_NET_EXCL)
else
continue;
}
}
/* Fetch the details of a process from its psinfo_t */
static void
{
timestruc_t d;
/* Get cached data for proc */
*prev_psetid = *psetid;
else
*prev_zoneid = *zoneid;
else
*delta = d;
/* Update cached data for proc */
}
/*
* Reset the known cpu usage of a process. This is done after a process
* exits so that if the pid is recycled, data from its previous life is
* not reused
*/
static void
{
}
/*
* Open the current extended accounting file. On initialization, open the
* file as the current file to be used. Otherwise, open the file as the
* next file to use of the current file reaches EOF.
*/
static int
{
char path[MAXPATHLEN];
/*
* The accounting file is first opened at the tail. Following
* opens to new accounting files are opened at the head.
*/
} else {
}
*fd = -1;
*open = 0;
/* open accounting files for cpu consumption */
if (ret != 0) {
goto err;
}
if (trys > 0) {
"Unable to enable process accounting"));
goto err;
}
(void) zsd_enable_cpu_stats();
trys++;
goto retry;
}
if (ret != 0) {
goto err;
}
/*
* It is possible the accounting file is momentarily unavailable
* because it is being rolled. Try for up to half a second.
*
* If failure to open accounting file persists, give up.
*/
if (oret == 0)
else if (*fd >= 0)
if (trys > 500) {
"Unable to open process accounting file"));
goto err;
}
/* wait one millisecond */
goto retry;
}
*open = 1;
return (0);
err:
if (*fd >= 0)
*open = 0;
*fd = -1;
return (-1);
}
/*
* Walk /proc and charge each process to its zone and processor set.
* Then read exacct data for exited processes, and charge them as well.
*/
static void
{
char path[MAXPATHLEN];
struct timeval interval_end;
/*
* Get the current accounting file. The current accounting file
* may be different than the file in use, as the accounting file
* may have been rolled, or manually changed by an admin.
*/
if (ret != 0) {
return;
}
/*
* Mark the current time as the interval end time. Don't track
* processes that exit after this time.
*/
return;
}
/* Walk all processes and compute each zone's usage on each pset. */
continue;
if (fd < 0)
continue;
continue;
}
/* Get the zone and pset this process is running in */
continue;
continue;
continue;
/*
* Get the usage of the previous zone and pset if they were
* different.
*/
if (zoneid != prev_zoneid)
else
if (psetid != prev_psetid)
else
prev_usage = NULL;
}
/* Update the usage with the processes info */
if (prev_usage == NULL) {
} else {
}
/*
* First time around is just to get a starting point. All
* usages will be zero.
*/
continue;
if (prev_usage == NULL) {
} else {
}
}
/*
* No need to collect exited proc data on initialization. Just
* caching the usage of the known processes to get a zero starting
* point.
*/
return;
/*
* Add accounting records to account for processes which have
* exited.
*/
for (;;) {
int numfound = 0;
/*
* See if the next accounting file is the
* same as the current accounting file.
*/
/*
* End of current accounting file is
* reached, so finished. Clear EOF
* bit for next time around.
*/
break;
} else {
/*
* Accounting file has changed. Move
* to current accounting file.
*/
ctl->zsctl_proc_fd =
ctl->zsctl_proc_open_next = 0;
continue;
}
} else {
/*
* Other accounting error. Give up on
* accounting.
*/
goto ea_err;
}
}
/* Skip if not a process group */
continue;
}
/* The process group entry should be complete */
while (numfound < 9) {
&pobject);
if (ret < 0) {
"unable to get process accounting data");
goto ea_err;
}
/* Next entries should be process data */
EXT_GROUP) {
"process data of wrong type");
goto ea_err;
}
case EXD_PROC_PID:
/*
* This process should not be currently in
* the list of processes to process.
*/
numfound++;
break;
case EXD_PROC_ANCPID:
numfound++;
break;
case EXD_PROC_ZONENAME:
numfound++;
break;
case EXD_PROC_CPU_USER_SEC:
numfound++;
break;
case EXD_PROC_CPU_USER_NSEC:
numfound++;
break;
case EXD_PROC_CPU_SYS_SEC:
numfound++;
break;
case EXD_PROC_CPU_SYS_NSEC:
numfound++;
break;
case EXD_PROC_FINISH_SEC:
numfound++;
break;
case EXD_PROC_FINISH_NSEC:
numfound++;
break;
}
}
if (numfound != 9) {
"Malformed process accounting entry found"));
goto proc_done;
}
/*
* Try to identify the zone and pset to which this
* exited process belongs.
*/
goto proc_done;
/* Save proc info */
sched = 0;
/*
* The following tries to deduce the processes pset.
*
* First choose pset and sched using cached value from the
* most recent time the process has been seen.
*
* pset and sched can change across zone_enter, so make sure
* most recent sighting of this process was in the same
* zone before using most recent known value.
*
* If there is no known value, use value of processes
* parent. If parent is unknown, walk parents until a known
* parent is found.
*
* If no parent in the zone is found, use the zone's default
* pset and scheduling class.
*/
}
/*
* Process or processes parent has never been seen.
* Save to deduce a known parent later.
*/
proc_usage = sys;
proc->zspr_usage);
continue;
}
/* Add the zone's usage to the pset */
goto proc_done;
/* compute the usage to add for the exited proc */
proc_usage = sys;
proc->zspr_usage);
if (hrtime_expired == B_TRUE)
break;
}
/*
* close next accounting file.
*/
if (ctl->zsctl_proc_open_next) {
(void) ea_close(
ctl->zsctl_proc_open_next = 0;
}
/* For the remaining processes, use pset and sched of a known parent */
for (;;) {
/*
* Kernel process, or parent is unknown, skip
* process, remove from process list.
*/
break;
}
/*
* Parent in different zone. Save process and
* use zone's default pset and sched below
*/
break;
}
/* Parent has unknown pset, Search parent's parent */
continue;
}
/* Found parent with known pset. Use its info */
proc->zspr_zoneid);
break;
}
proc->zspr_psetid);
break;
}
/* Add the zone's usage to the pset */
break;
}
break;
}
}
/*
* Process has never been seen. Using zone info to
* determine pset and scheduling class.
*/
goto next;
} else {
}
goto next;
/*
* Ignore FX high scheduling class if it is not the
* only scheduling class in the zone.
*/
if (sched != ZS_SCHED_FX_60)
sched &= (~ZS_SCHED_FX_60);
/*
* If more than one scheduling class has been found
* in the zone, use zone's default scheduling class for
* this process.
*/
/* Add the zone's usage to the pset */
goto next;
next:
}
return;
/*
* Close the next accounting file if we have not transitioned to it
* yet.
*/
if (ctl->zsctl_proc_open_next) {
ctl->zsctl_proc_open_next = 0;
}
}
/*
* getvmusage(2) uses size_t's in the passwd data structure, which differ
* in size for 32bit and 64 bit kernels. Since this is a contracted interface,
* and zonestatd does not necessarily match the kernel's bitness, marshal
* results appropriately.
*/
static int
{
int i;
int ret;
/*
* An array of vmusage32_t's has been returned.
* Convert it to an array of vmusage64_t's.
*/
for (i = nres32 - 1; i >= 0; i--) {
vmu64[i].vmu_rss_private =
vmu32[i].vmu_rss_private;
vmu64[i].vmu_rss_shared =
vmu32[i].vmu_rss_shared;
vmu64[i].vmu_swap_private =
vmu64[i].vmu_swap_shared =
vmu32[i].vmu_swap_shared;
}
}
return (ret);
} else {
/*
* kernel is 64 bit, so use 64 bit structures as zonestat
* expects.
*/
}
}
/*
* Update the current physical, virtual, and locked memory usage of the
* running zones.
*/
static void
{
int num_swap_devices;
char *path;
int i, ret;
int vmu_nzones;
char kstat_name[KSTAT_STRLEN];
if (init)
return;
/* interrogate swap devices to find the amount of disk swap */
if (num_swap_devices == 0) {
/* No disk swap */
goto disk_swap_done;
}
/* see if swap table needs to be larger */
swt_size = sizeof (int) +
(num_swap_devices * sizeof (struct swapent)) +
/*
* Could not allocate to get list of swap devices.
* Just use data from the most recent read, which will
* be zero if this is the first read.
*/
"virtual memory"));
goto disk_swap_done;
}
num_swap_devices * sizeof (swapent_t));
for (i = 0; i < num_swap_devices; i++, swent++) {
path += MAXPATHLEN;
}
}
if (num_swap_devices < 0) {
/* More swap devices have arrived */
goto disk_swap_again;
/* Unexpected error. Use existing data */
goto disk_swap_done;
}
/* add up the disk swap */
disk_swap_total = 0;
disk_swap_used = 0;
for (i = 0; i < num_swap_devices; i++, swent++) {
}
/* get system pages kstat */
kid = -1;
else
if (kid == -1) {
return;
} else {
} else {
else
return;
}
} else {
else
return;
}
}
/* get the zfs arc size if available */
arc_size = 0;
kid = -1;
if (kid != -1) {
}
/* Try to get swap information */
return;
}
/* getvmusage to get physical memory usage */
vmusage, &num_vmusage);
if (ret != 0) {
/* Unexpected error. Use existing data */
"Unable to read physical memory usage"));
goto vmusage_done;
}
}
/* vmusage results cache too small */
"physical memory usage"));
goto vmusage_done;
}
goto vmusage_again;
}
phys_zones_overcount = 0;
vmu_nzones = 0;
for (i = 0; i < num_vmusage; i++) {
case VMUSAGE_SYSTEM:
/* total pages backing user process mappings */
vmusage[i].vmu_rss_all;
break;
case VMUSAGE_ZONE:
vmu_nzones++;
break;
default:
break;
}
}
/*
* Figure how much memory was double counted due to text sharing
* between zones. Credit this back so that the sum of the zones
* equals the total zone ram usage;
*/
/* walk the zones to get swap and locked kstats. Fetch ram cap. */
sys->zss_locked_zones = 0;
sys->zss_vm_zones = 0;
/* If zone halted during interval, show memory usage as none */
zone->zsz_usage_ram = 0;
zone->zsz_usage_vm = 0;
zone->zsz_usage_locked = 0;
continue;
}
if (phys_zones_credit > 0) {
}
}
/*
* Get zone's swap usage. Since zone could have halted,
* treats as zero if cannot read
*/
zone->zsz_usage_vm = 0;
kid = -1;
if (kid != -1) {
}
}
/*
* Get zone's locked usage. Since zone could have halted,
* treats as zero if cannot read
*/
zone->zsz_usage_locked = 0;
kid = -1;
if (kid != -1) {
/*
* Since locked memory accounting for zones
* can double count ddi locked memory, cap each
* zone's locked usage at its ram usage.
*/
if (zone->zsz_usage_locked >
sys->zss_locked_zones +=
}
}
}
* ctl->zsctl_pagesize;
/* Compute remaining statistics */
/*
* The total for kernel locked memory should include
* segkp locked pages, but oh well. The arc size is subtracted,
* as that physical memory is reclaimable.
*/
/* Add memory used by kernel startup and obp to kernel locked */
if ((phys_total - physmem) > 0)
/*
* Add in the portion of (RAM+DISK) that is not available as swap,
* and consider it swap used by the kernel.
*/
}
/*
* Charge each cpu's usage to its processor sets. Also add the cpu's total
* time to each zone using the processor set. This tracks the maximum
* amount of cpu time that a zone could have used.
*/
static void
{
/* Update the per-cpu kstat data */
}
/* Update the elapsed real time */
if (init) {
/* first time around, store hrtime for future comparision */
} else {
/* Compute increase in hrtime since the most recent read */
}
/* On initialization, all psets have zero time */
if (init)
return;
continue;
}
/* sum total used time for pset */
/* kernel time in pset is total time minus zone time */
}
/* Total pset elapsed time is used time plus idle time */
usage->zsu_cpu_shares != 0) {
/*
* Figure out how many nanoseconds of share time
* to give to the zone
*/
hrtime);
}
/* Add pset time to each zone using pset */
}
}
/* update cpu cap tracking if the zone has a cpu cap */
}
}
/* kernel time in pset is total time minus zone time */
}
/* Total pset elapsed time is used time plus idle time */
}
/*
* Saves current usage data to a cache that is read by libzonestat when
* calling zs_usage_read().
*
* All pointers in the cached data structure are set to NULL. When
* libzonestat reads the cached data, it will set the pointers relative to
* its address space.
*/
static void
{
char *next;
size =
sizeof (zs_usage_cache_t) +
sizeof (zs_usage_t) +
sizeof (zs_system_t) +
return;
}
next += sizeof (zs_usage_cache_t);
/* LINTED */
next += sizeof (zs_usage_t);
/* LINTED */
next += sizeof (zs_system_t);
i < ctl->zsctl_nzones;
/* LINTED */
}
i < ctl->zsctl_npsets;
/* LINTED */
/* Add pset usages for pset */
j < dpset->zsp_nusage;
/* LINTED */
next += sizeof (zs_pset_zone_t);
/* pointers are computed by client */
}
}
/* Update the current cache pointer */
(void) mutex_lock(&g_usage_cache_lock);
old = g_usage_cache;
}
g_gen_next++;
/* Wake up any clients that are waiting for this calculation */
if (g_usage_cache_kickers > 0) {
(void) cond_broadcast(&g_usage_cache_wait);
}
(void) mutex_unlock(&g_usage_cache_lock);
}
static zs_usage_cache_t *
{
ret = g_usage_cache;
return (ret);
}
void
{
(void) mutex_lock(&g_usage_cache_lock);
(void) mutex_unlock(&g_usage_cache_lock);
}
/* Close the handles held by zsd_open() */
void
{
int id;
if (ctl->zsctl_kstat_ctl) {
}
if (ctl->zsctl_proc_open) {
ctl->zsctl_proc_open = 0;
}
if (ctl->zsctl_pool_conf) {
}
ctl->zsctl_nzones--;
}
!= NULL) {
}
ctl->zsctl_npsets--;
}
/* Release all cpus being tracked */
}
(void) zsd_disable_cpu_stats();
}
/*
* Update the utilization data for all zones and processor sets.
*/
static int
{
/*
* Memory calculation is expensive. Only update it on sample
* intervals.
*/
/*
* Delete objects that no longer exist.
* Pset usages must be deleted first as they point to zone and
* pset objects.
*/
/*
* Save results for clients.
*/
/*
* Roll process accounting file.
*/
(void) zsd_roll_exacct();
return (0);
}
/*
* Get the system rctl, which is the upper most limit
*/
static uint64_t
zsd_get_system_rctl(char *name)
{
return (ZS_LIMIT_NONE);
return (rctlblk_get_value(rblk_last));
}
/*
* Open any necessary subsystems for collecting utilization data,
* allocate and initialize data structures, and get initial utilization.
*
* Errors:
* ENOMEM out of memory
* EINVAL other error
*/
static zsd_ctl_t *
{
char path[MAXPATHLEN];
long pathmax;
int ret;
int i;
int err;
goto err;
}
/* open kstats */
goto err;
}
/*
* These are set when the accounting file is opened by
* zsd_update_procs()
*/
ctl->zsctl_proc_open = 0;
ctl->zsctl_proc_open_next = 0;
(void) zsd_enable_cpu_stats();
/* Create structures to track usage */
ret = -1;
goto err;
}
/* get the kernel bitness to know structure layout for getvmusage */
if (ret < 0)
else
goto err;
}
for (i = 0; i <= ctl->zsctl_maxcpuid; i++) {
}
goto err;
}
sizeof (zsd_proc_t))) == NULL) {
goto err;
}
for (i = 0; i <= ctl->zsctl_maxproc; i++) {
}
if (pathmax < 0) {
goto err;
}
== NULL) {
goto err;
}
goto err;
}
ctl->zsctl_pool_changed = 0;
goto err;
}
goto err;
}
/*
* get system limits
*/
g_gen_next = 1;
return (ctl);
err:
if (ctl)
return (NULL);
}
/* Copy utilization data to buffer, filtering data if non-global zone. */
static void
{
int i, j;
/* Privileged users in the global zone get everything */
if (is_gz) {
return;
}
/* Zones just get their own usage */
size += sizeof (zs_usage_t);
csize += sizeof (zs_usage_t);
usage->zsu_npsets = 0;
/* LINTED */
/* LINTED */
size += sizeof (zs_system_t);
csize += sizeof (zs_system_t);
/* Save system limits but not usage */
sys->zss_ncpus_online = 0;
/* LINTED */
/* LINTED */
/* Find the matching zone */
for (i = 0; i < cusage->zsu_nzones; i++) {
}
/* LINTED */
}
/* LINTED */
/* LINTED */
for (i = 0; i < cusage->zsu_npsets; i++) {
/* LINTED */
for (j = 0; j < cpset->zsp_nusage; j++) {
csize += sizeof (zs_pset_zone_t);
/* LINTED */
}
/* LINTED */
size += sizeof (zs_pset_zone_t);
pz->zspz_cpu_usage);
usage->zsu_npsets++;
}
/* LINTED */
}
}
/*
* Respond to new connections from libzonestat.so. Also respond to zoneadmd,
* which reports new zones.
*/
/* ARGSUSED */
static void
{
const priv_set_t *eset;
if (argp == DOOR_UNREF_DATA) {
}
}
/* LINTED */
/* If connection, return door to stat server */
if (cmd == ZSD_CMD_CONNECT) {
/* Verify client compilation version */
}
/* Verify client permission */
if (door_ucred(&ucred) != 0) {
}
}
}
/* Return stat server door */
}
/* Respond to zoneadmd informing zonestatd of a new zone */
if (cmd == ZSD_CMD_NEW_ZONE) {
}
}
/*
* Respond to libzonestat.so clients with the current utlilzation data.
*/
/* ARGSUSED */
static void
{
int ret;
char *rvalp;
const priv_set_t *eset;
/* Tell stat thread there are no more clients */
if (argp == DOOR_UNREF_DATA) {
(void) mutex_lock(&g_usage_cache_lock);
(void) cond_signal(&g_usage_cache_kick);
(void) mutex_unlock(&g_usage_cache_lock);
}
}
/* LINTED */
if (cmd != ZSD_CMD_READ) {
}
if (door_ucred(&ucred) != 0) {
}
if (zoneid == GLOBAL_ZONEID)
}
}
(void) mutex_lock(&g_usage_cache_lock);
/*
* Force a new cpu calculation for client. This will force a
* new memory calculation if the memory data is older than the
* sample period.
*/
(void) cond_signal(&g_usage_cache_kick);
(void) mutex_unlock(&g_usage_cache_lock);
"Interrupted before writing usage size to client\n"));
}
}
(void) mutex_unlock(&g_usage_cache_lock);
/* Copy current usage data to stack to send to client */
/* Filter out results if caller is non-global zone */
}
/* ARGSUSED */
static void
zonestat_quithandler(int sig)
{
}
/*
* The stat thread generates new utilization data when clients request
* it. It also manages opening and closing the subsystems used to gather
* data depending on if clients exist.
*/
/* ARGSUSED */
void *
stat_thread(void *arg)
{
if (start < 0) {
goto quit;
goto quit;
}
next_memory = start;
for (;;) {
/*
* These are used to decide if the most recent memory
* calculation was within a sample interval,
* and weather or not the usage collection needs to
* be opened or closed.
*/
/*
* If all clients have gone, close usage collecting
*/
(void) mutex_lock(&g_usage_cache_lock);
(void) mutex_unlock(&g_usage_cache_lock);
break;
}
(void) mutex_unlock(
break;
}
/*
* Wait for a usage data request
*/
if (g_usage_cache_kickers == 0) {
(void) cond_wait(&g_usage_cache_kick,
}
if (now < 0) {
(void) mutex_unlock(
goto quit;
}
(void) mutex_unlock(&g_usage_cache_lock);
"Unable to fetch current time"));
goto quit;
}
if (g_hasclient) {
if (now >= next_memory) {
}
} else {
}
(void) mutex_unlock(&g_usage_cache_lock);
break;
}
"Unable to open zone statistics"));
else
}
"Unable to read zone statistics"));
return (NULL);
}
}
(void) mutex_lock(&g_usage_cache_lock);
(void) mutex_unlock(&g_usage_cache_lock);
} else {
(void) mutex_unlock(&g_usage_cache_lock);
}
}
quit:
if (g_open)
return (NULL);
}
void
{
return;
}
}
static int pipe_fd;
static void
daemonize_ready(char status)
{
/*
* wake the parent with a clue
*/
}
static int
daemonize_start(void)
{
char data;
int status;
int filedes[2];
(void) close(0);
return (-1);
return (-1);
if (pid != 0) {
/*
* parent
*/
/* forward ready code via exit status */
}
status = -1;
/* daemon process exited before becoming ready */
/* assume daemon process printed useful message */
} else {
exit(1);
}
}
/*
* child
*/
/*
* generic Unix setup
*/
(void) setsid();
(void) umask(0000);
return (0);
}
static void
{
int i;
nzids_last = nzids;
if (nzids > nzids_last) {
goto again;
}
for (i = 0; i < nzids; i++)
}
int
{
int arg;
/* (void) sigignore(SIGCHLD); */
if (getzoneid() != GLOBAL_ZONEID)
!= EOF) {
switch (arg) {
case 'c':
break;
default:
}
}
if (opt_cleanup) {
if (zsd_disable_cpu_stats() != 0)
exit(1);
else
exit(0);
}
/* Get the configured sample interval */
"config", "sample_interval");
"\"config/sample_interval\""));
"\"config/sample_interval\". Must be of type \"count\""));
g_interval = *intervalp;
if (g_interval == 0)
"\"config/sample_interval\". Must be greater than zero"));
if (daemonize_start() < 0)
/* Run at high priority */
zsd_set_fx();
if (g_server_door < 0)
if (g_stat_door < 0)
daemonize_ready(0);
/* Wait for signal to quit */
(void) pause();
/* detach doors */
(void) door_revoke(g_server_door);
(void) door_revoke(g_stat_door);
/* kick stat thread and wait for it to close the statistics */
(void) mutex_lock(&g_usage_cache_lock);
(void) cond_signal(&g_usage_cache_kick);
(void) mutex_unlock(&g_usage_cache_lock);
end:
return (0);
}