rds.c revision 004388ebfdfe2ed7dfd2d153a876dfcc22d2c006
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
#include <strings.h>
#include <limits.h>
#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <fcntl.h>
#include <pthread.h>
#include "rdimpl.h"
#include "rdprot.h"
#include "rdutil.h"
#include "rdlist.h"
#include "rdfile.h"
#define RDS_VERSION "RDS Version 1.0\n"
#define TIMEOUT_MSG "Timeout"
#define NOTREADY_RESPONSE "BUSY"
extern char errmsg[]; /* global message buffer */
static char greeting[] = \
"Resource Data Server\n" \
"Copyright 2001 SMI.\n" \
"Version 1.0\n";
/* ms timeout between successive cmds */
static int timeout = DEFAULT_CMD_TIMEOUT;
/* ms interval between successive scans */
static int interval = DEFAULT_SCAN_INTERVAL;
/* global signal flag */
static int sigterm = 0;
/* print all cmd data on stdout in server mode flag */
static int Po = 0;
/* count of scans performed in server mode */
static long scans_done = 0;
/* name of rds logging file */
/* enable microstate accounting flag */
int mo = 0;
/* name of stored data file */
/* mutex lock for data lists */
/* mutex lock for log */
/* identifiers for the various threads */
/*
* Clean up calling thread's state.
*/
static void
{
/* shut down the command protocol */
"cleanup_state: server thread shutdown\n");
log_msg("server thread shutdown init\n");
log_msg("server thread shutdown complete\n");
/* shut down the scanner */
"cleanup_state: scanner thread shutdown\n");
log_msg("scanner thread shutdown init\n");
log_msg("Waiting for server thread %d join from %d\n",
int e = errno;
perror("server join (cleanup)");
log_msg("server join (cleanup) failed with %d\n", e);
}
monitor_stop();
log_msg("scanner thread shutdown complete\n");
"cleanup_state: master thread shutdown\n");
log_msg("master thread shutdown\n");
} else {
"cleanup_state: unknown thread id %d\n", (int)this);
}
}
/*
* Called by any of the threads, this should set state
* that the other threads will pick up so they will (eventually)
* shut themselves down cleanly, then call pthread_exit
* to properly shut down the calling thread.
* The calling thread will exit with its code set to 1.
*/
static void
{
char wb[256];
/* cannot be on the stack since thread terminates with pthread_exit */
static int retcode = 0;
/* worker-specific cleanup */
/* announce the calling thread's demise */
(int)pthread_self(), msg);
/* everybody checks this periodically */
sigterm = 1;
/* return status as the calling thread's exit code */
}
/*
* Called by any of the threads, this should set state
* that the other threads will pick up so they will (eventually)
* shut themselves down cleanly, then call pthread_exit
* to properly shut down the calling thread.
* The calling thread will exit with its code set to 1.
*/
void
err_exit()
{
}
/*
* Called by any of the threads, this should set state
* that the other threads will pick up so they will (eventually)
* shut themselves down cleanly, then call pthread_exit
* to properly shut down the calling thread.
* The calling thread will exit with its code set to 0.
*/
static void
ok_exit()
{
generic_exit("Normal exit.\n", 0);
}
static void
usage()
{
(void) printf("rds [ options ]\n" \
"-u\t\t- print stats for all users\n" \
"-U<uid>\t\t- print stats for <uid>\n" \
"-j\t\t- print stats for all projects\n" \
"-J<projid>\t- print stats for <projid>\n" \
"-p\t\t- print stats for all processes\n" \
"-P <pid>\t- print stats for <pid>\n" \
"-m\t\t- enable microstate accounting\n" \
"-a\t\t- run in server mode\n" \
"-t<time>\t- set command timeout to <time>\n" \
"-i<interval>\t- set interval between scans to <time>\n" \
"-d\t\t- in server mode print stats on stdout\n" \
"-L<file>|stderr - write log messages into <file> or stderr\n" \
"-v\t\t- print rds version\n");
}
/*
* Initiate the rds command protocol from the server side.
* Emits the header and version strings.
*/
static void
{
/* emit version and header strings */
err_exit();
if (wr_phead() != 0)
err_exit();
}
/*
* Emit the "not ready" message and a prompt.
*/
static void
notready()
{
(void) wr_string(NOTREADY_RESPONSE);
(void) wr_string("\n");
}
/*
* process_cmds() implements the rds server running in threaded mode.
*
* It assumes that the /proc scanner is running in another thread and
* guarding access to critical sections.
*
* This function writes version and header to the output stream and waits
* for commands on the input stream.
*
* Each received command may block on a mutex while the scanner thread is
* updating.
*
* If the timeout expires without receiving a command, it will write an
* error message and terminate. A received command resets the timeout.
*
* Each command is acknowledged with a prompt.
*/
/*ARGSUSED*/
static void *
process_cmds(void *p)
{
int ret;
char *cmd;
double d;
int cmd_is_noop;
/* start the protocol so the client knows we're alive */
/* establish timeout value */
/* initialize stdin object */
/* emit initial prompt */
while (interval_cnt > 0) {
/* time to shut down, exit gracefully */
if (sigterm == 1) {
break; /* ok_exit(); */
}
/* check for stdin status */
/* block on stdin, max timeout */
/* timed out waiting for a command */
--interval_cnt;
continue;
}
/* if interrupted system call then exit gracefully */
log_msg("select() interrupted\n");
ok_exit();
}
/* weird error condition */
if (ret != 1) {
perror("RDS Select error");
continue;
}
/* process whatever is waiting on stdin */
cmd_is_noop = 0;
/* try to parse out a valid command */
err_exit();
}
/* handle the various commands */
/* exit now */
ok_exit();
/* null command */
++cmd_is_noop;
/* keepalive, another null command */
++cmd_is_noop;
/*
* If the first scan has not yet
* completed, notify the requester and
* wait for a new command. The
* command timeout counter is
* suspended until the next command
* arrives.
*/
if (scans_done == 0) {
notready();
continue;
}
/* grab the mutex */
if ((ret = pthread_mutex_lock(
&listLock)) == 0) {
d = (double)
log_msg("Server lock wait"
" was %1.5f sec\n", d);
if (wr_lshead(5) != 0)
err_exit();
break;
break;
break;
break;
break;
/* release the mutex */
if ((ret = pthread_mutex_unlock(
&listLock)) != 0) {
log_msg("pthread_mutex_unlock" \
"failed with %d\n", ret);
}
} else {
log_msg("pthread_mutex_lock failed" \
"with %d\n", ret);
}
/* get all process data (deprecated?) */
if (scans_done == 0) {
notready();
continue;
}
/* grab the mutex */
if ((ret = pthread_mutex_lock(
&listLock)) == 0) {
if (wr_lshead(1) != 0)
err_exit();
break;
/* release the mutex */
if ((ret = pthread_mutex_unlock(
&listLock)) != 0) {
log_msg("pthread_mutex_unlock"\
"failed with %d\n", ret);
}
} else {
log_msg("pthread_mutex_lock"\
"failed with %d\n", ret);
}
/* get the active user list */
if (scans_done == 0) {
notready();
continue;
}
/* grab the mutex */
if ((ret = pthread_mutex_lock(
&listLock)) == 0) {
if (wr_lshead(1) != 0)
err_exit();
break;
/* release the mutex */
if ((ret = pthread_mutex_unlock(
&listLock)) != 0) {
log_msg("pthread_mutex_unlock"\
"failed with %d\n", ret);
}
} else {
log_msg("pthread_mutex_lock" \
"failed with %d\n", ret);
}
/* get data for a particular user */
if (scans_done == 0) {
notready();
continue;
}
/* grab the mutex */
if ((ret = pthread_mutex_lock(
&listLock)) == 0) {
if (wr_lshead(1) != 0)
err_exit();
break;
/* release the mutex */
if ((ret = pthread_mutex_unlock(
&listLock)) != 0) {
log_msg("pthread_mutex_unlock" \
"failed with %d\n", ret);
}
} else {
log_msg("pthread_mutex_lock" \
"failed with %d\n", ret);
}
if (scans_done == 0) {
notready();
continue;
}
/* grab the mutex */
if ((ret = pthread_mutex_lock(
&listLock)) == 0) {
if (wr_lshead(1) != 0)
err_exit();
/* grab the mutex here */
break;
/* release the mutex */
if ((ret = pthread_mutex_unlock(
&listLock)) != 0) {
log_msg("pthread_mutex_unlock" \
"failed with %d\n", ret);
}
} else {
log_msg("pthread_mutex_lock" \
"failed with %d\n", ret);
}
if (scans_done == 0) {
notready();
continue;
}
/* grab the mutex */
if ((ret = pthread_mutex_lock(
&listLock)) == 0) {
if (wr_lshead(1) != 0)
err_exit();
break;
/* release the mutex */
if ((ret = pthread_mutex_unlock(
&listLock)) != 0) {
log_msg("pthread_mutex_unlock" \
"failed with %d\n", ret);
}
} else {
log_msg("pthread_mutex_lock" \
"failed with %d\n", ret);
}
if (scans_done == 0) {
notready();
continue;
}
/* grab the mutex */
if ((ret = pthread_mutex_lock(
&listLock)) == 0) {
if (wr_lshead(1) != 0)
err_exit();
break;
/* release the mutex */
if ((ret = pthread_mutex_unlock(
&listLock)) != 0) {
log_msg("pthread_mutex_unlock"
"failed with %d\n", ret);
}
} else {
log_msg("pthread_mutex_lock"
"failed with %d\n", ret);
}
} else {
/* bad command */
(void) wr_prompt(PROMPT_WHAT);
format_err("RDS protocol error:"
"unknown command");
++cmd_is_noop;
}
if (!cmd_is_noop) {
log_msg("Command took %2.3f sec"
" (%ld scans done)\n",
d, scans_done);
}
/* reset the interval counter for timeout */
continue;
}
/* timed out, one less interval to wait */
--interval_cnt;
}
/* timed out, print message */
if (interval_cnt == 0) {
err_exit();
}
/* clean exit */
log_msg("process_cmds exits\n");
ok_exit(); /* calls pthread_exit() */
return (NULL);
}
/*
* The thread procedure for the /proc scanner.
* Does a full scan of /proc, then sleeps for a specified time.
*
* The specified time ('interval') is adjusted according to
* the average of the last three scan times.
* The sleep time is increase if the average scan duration time
* exceeds a threshold. The threshold is set to 50% of the current
* sleep time.
* The sleep time is decreased in a similar way.
*
* The update of the project and user lists is guarded by aggregate_list_mutex.
* The update of the process list is guarded by process_list_mutex.
*/
/*ARGSUSED*/
static void *
scanprocfs(void *p)
{
double d0; /* duration of the for last scan */
double d1; /* duration of the last scan */
double d2; /* duration of current scan */
double ad; /* average duration of the last three scans */
double threshold_up; /* threshold for increasing scan duration */
double threshold_down; /* threshold for decreasing scan duration */
int new_interval = interval;
int time_to_sleep;
threshold_down = 0;
while (sigterm != 1) {
if (monitor_update() != 0)
err_exit();
++scans_done;
/* make sure we're sleeping a reasonable amount of time */
if (threshold_up < ad) {
/* increase the new_interval in 1000 ms steps */
new_interval += 500;
/* pull up the thresholds */
}
if (threshold_down > ad) {
/* decrease the new_interval in 1000 ms steps */
new_interval += 500;
/* pull down the thresholds */
if (new_interval < interval) {
/* just as at the beginning */
threshold_down = 0;
} else {
}
}
log_msg("scan %.0f ms, ad %.0f ms, thold_up %.0f ms,"
" thold_down %.0f ms, interval %d ms\n",
while (time_to_sleep > 0) {
napms(1000);
time_to_sleep -= 1000;
if (sigterm == 1)
break;
}
}
log_msg("scanprocfs exits\n");
ok_exit();
return (NULL);
}
static void
{
switch (sig) {
case SIGINT:
case SIGTERM:
sigterm = 1;
break;
}
}
/*
* Run the command processor, with the /proc scanner and rds command processor
* in separate threads.
*
* Initializes the mutex as a side effect.
*
* Returns on exit of the command process or as a result of a signal.
*/
static void
{
int rv;
/* keep track of main()'s thread */
master = pthread_self();
/* initialize the mutexes for later use */
if (rv != 0) {
err_exit();
}
if (rv != 0) {
err_exit();
}
/* launch the command processor in its thread */
if (rv != 0) {
"Server thread create failed with %d", rv);
err_exit();
}
log_msg("Server pthread_create = %d returns %d\n",
/* launch the scanner in its thread */
if (rv != 0) {
"Scanner thread create failed with %d", rv);
err_exit();
}
log_msg("Scanner pthread_create = %d returns %d\n",
/* nothing much else to do here */
while (sigterm != 1)
(void) sleep(1);
/* wait for the scanner & server threads to shut down */
log_msg("Waiting for scanner thread %d join from %d\n",
(int)scanner, (int)pthread_self());
int e = errno;
perror("scanner join");
log_msg("scanner join failed with %d\n", e);
}
log_msg("Scanner thread joined.\n");
/* finish cleaning up global state */
(void) pthread_mutex_destroy(&listLock);
log_msg("Global cleanup completed.\n");
}
int
{
selected = 0;
int lo_arg = 1;
int rv;
/* parse args */
switch (i) {
case 'U':
break;
case 'u':
break;
case 'J':
break;
case 'j':
break;
case 'P':
break;
case 'p':
break;
case 'a':
do_server_mode = 1;
break;
case 'l':
usage();
exit(1);
}
break;
case 'd':
Po = 1;
break;
case 't':
usage();
exit(1);
}
break;
case 'i':
usage();
exit(1);
}
break;
case 'f':
break;
case 'L':
break;
case 'm':
mo = 1;
break;
exit(1);
break;
case '?':
usage();
exit(1);
default:
usage();
exit(1);
}
/* set handlers */
/* initialize the log mutex */
if (rv != 0) {
err_exit();
}
if (do_server_mode == 1) {
/*
* Initialize list data structures, possibly
* reading saved data.
*
* As a side effect this messes with the protocol
* state since the list reader pretends it's reading
* the protocol.
*
* A problem here is that we cannot start the server
* thread until this has completed because it will try to
* use the same state hidden inside the protocol code.
*
* The consequence is that this may occupy the main
* thread for an arbitrarily long time *before* the server
* thread is started and the app becomes able to respond
* to commands.
*/
if (monitor_start() != 0)
err_exit();
/* Open pipes in and out for the command protocol */
err_exit();
}
err_exit();
}
/* Waits for the child threads to end */
runserver();
/* Close command I/O pipes */
close_prot();
} else {
if (monitor_start() != 0)
err_exit();
for (i = 0; i < lo_arg; i ++) {
if (sigterm == 1)
break;
if (monitor_update() != 0)
err_exit();
}
}
}
if (i < lo_arg - 1)
}
}
/* clean up the log stuff at the very end */
log_close();
(void) pthread_mutex_destroy(&logLock);
return (0);
}