msacct.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Mega-theory block comment:
*
* Microstate accounting uses finite states and the transitions between these
* states to measure timing and accounting information. The state information
* is presently tracked for threads (via microstate accounting) and cpus (via
* cpu microstate accounting). In each case, these accounting mechanisms use
* states and transitions to measure time spent in each state instead of
* clock-based sampling methodologies.
*
* For microstate accounting:
* state transitions are accomplished by calling new_mstate() to switch between
* states. Transitions from a sleeping state (LMS_SLEEP and LMS_STOPPED) occur
* by calling restore_mstate() which restores a thread to its previously running
* state. This code is primarialy executed by the dispatcher in disp() before
* running a process that was put to sleep. If the thread was not in a sleeping
* state, this call has little effect other than to update the count of time the
* thread has spent waiting on run-queues in its lifetime.
*
* For cpu microstate accounting:
* Cpu microstate accounting is similar to the microstate accounting for threads
* but it tracks user, system, and idle time for cpus. Cpu microstate
* accounting does not track interrupt times as there is a pre-existing
* interrupt accounting mechanism for this purpose. Cpu microstate accounting
* tracks time that user threads have spent active, idle, or in the system on a
* given cpu. Cpu microstate accounting has fewer states which allows it to
* have better defined transitions. The states transition in the following
* order:
*
* CMS_USER <-> CMS_SYSTEM <-> CMS_IDLE
*
* In order to get to the idle state, the cpu microstate must first go through
* the system state, and vice-versa for the user state from idle. The switching
* of the microstates from user to system is done as part of the regular thread
* microstate accounting code, except for the idle state which is switched by
* the dispatcher before it runs the idle loop.
*
* Cpu percentages:
* Cpu percentages are now handled by and based upon microstate accounting
* information (the same is true for load averages). The routines which handle
* here as it now makes more sense for them to be generated from the microstate
* code. Cpu percentages are generated similarly to the way they were before;
* however, now they are based upon high-resolution timestamps and the
* timestamps are modified at various state changes instead of during a clock()
* interrupt. This allows us to generate more accurate cpu percentages which
* are also in-sync with microstate data.
*/
/*
* Initialize the microstate level and the
* associated accounting information for an LWP.
*/
void
kthread_t *t,
int init_state)
{
t->t_mstate = init_state;
t->t_waitrq = 0;
if ((t->t_proc_flag & TP_MSACCT) == 0)
t->t_proc_flag |= TP_MSACCT;
}
}
/*
* Initialize the microstate level and associated accounting information
* for the specified cpu
*/
void
int init_state)
{
cpu->cpu_waitrq = 0;
}
/*
* sets cpu state to OFFLINE. We don't actually track this time,
* but it serves as a useful placeholder state for when we're not
* doing anything.
*/
void
{
cpu->cpu_mstate_start = 0;
}
void
{
do {
if (newtime < 0) {
/* force CAS to fail */
continue;
}
}
/*
* Return an aggregation of microstate times in scaled nanoseconds (high-res
* time). This keeps in mind that p_acct is already scaled, and ms_acct is
* not.
*/
{
kthread_t *t;
if (a_state == LMS_SYSTEM)
t = p->p_tlist;
if (t == NULL)
return (aggr_time);
do {
if (t->t_proc_flag & TP_LWPEXIT)
continue;
aggr_time += scaledtime;
if (a_state == LMS_SYSTEM) {
aggr_time += scaledtime;
}
return (aggr_time);
}
void
{
return;
while (newtime < 0) {
}
/*
* Here, you could call new_cpu_mstate() to switch the cpu
* microstate. However, in the interest of making things
* as expeditious as possible, the relevant work has been inlined.
*/
kpreempt_disable(); /* MUST disable kpreempt before touching t->cpu */
while (newtime < 0) {
}
while (newtime < 0) {
}
}
}
/*
* The following is for computing the percentage of cpu time used recently
* by an lwp. The function cpu_decay() is also called from /proc code.
*
* exp_x(x):
* Given x as a 64-bit non-negative scaled integer of arbitrary magnitude,
* Return exp(-x) as a 64-bit scaled integer in the range [0 .. 1].
*
* Scaling for 64-bit scaled integer:
* The binary point is to the right of the high-order bit
* of the low-order 32-bit word.
*/
#define LSHIFT 31
#ifdef DEBUG
#endif
static uint64_t
{
int i;
#ifdef DEBUG
expx_cnt++;
#endif
/*
* By the formula:
* exp(-x) = exp(-x/2) * exp(-x/2)
* we keep halving x until it becomes small enough for
* the following approximation to be accurate enough:
* exp(-x) = 1 - x
* We reduce x until it is less than 1/4 (the 2 in LSHIFT-2 below).
* Our final error will be smaller than 4% .
*/
/*
* Use a uint64_t for the initial shift calculation.
*/
/*
* Short circuit:
* A number this large produces effectively 0 (actually .005).
* This way, we will never do more than 5 multiplies.
*/
return (0);
for (i = 0; ui != 0; i++)
ui >>= 1;
if (i != 0) {
#ifdef DEBUG
expx_mul += i; /* seldom happens */
#endif
x >>= i;
}
/*
* Now we compute 1 - x and square it the number of times
* that we halved x above to produce the final result:
*/
x = LSI_ONE - x;
while (i--)
x = (x * x) >> LSHIFT;
return (x);
}
/*
* Given the old percent cpu and a time delta in nanoseconds,
* return the new decayed percent cpu: pct * exp(-tau),
* where 'tau' is the time delta multiplied by a decay factor.
* We have chosen the decay factor (cpu_decay_factor in param.c)
* to make the decay over five seconds be approximately 20%.
*
* 'pct' is a 32-bit scaled integer <= 1
* The binary point is to the right of the high-order bit
* of the 32-bit word.
*/
static uint32_t
{
}
/*
* Given the old percent cpu and a time delta in nanoseconds,
* return the new grown percent cpu: 1 - ( 1 - pct ) * exp(-tau)
*/
static uint32_t
{
}
/*
* Defined to determine whether a lwp is still on a processor.
*/
{
/*
* This routine can get called at PIL > 0, this *has* to be
* done atomically. Holding locks here causes bad things to happen.
* (read: deadlock).
*/
do {
if (delta < 0) {
}
scalehrtime(&delta);
} else {
if (delta < 0) {
}
scalehrtime(&delta);
}
return (npctcpu);
}
/*
* Change the microstate level for the LWP and update the
* associated accounting information. Return the previous
* LWP state.
*/
int
{
unsigned state;
return (LMS_SYSTEM);
/* adjust cpu percentages before we go any further */
(void) cpu_update_pct(t, curtime);
do {
switch (state) {
case LMS_TFAULT:
case LMS_DFAULT:
case LMS_KFAULT:
case LMS_USER_LOCK:
break;
default:
break;
}
if (newtime < 0) {
continue;
}
/*
* Remember the previous running microstate.
*/
/*
* Switch CPU microstate if appropriate
*/
kpreempt_disable(); /* MUST disable kpreempt before touching t->cpu */
}
}
static long waitrqis0 = 0;
/*
* Restore the LWP microstate to the previous runnable state.
* Called from disp() with the newly selected lwp.
*/
void
{
return;
(void) cpu_update_pct(t, curtime);
do {
switch (t->t_mstate) {
case LMS_SLEEP:
/*
* Update the timer for the current sleep state.
*/
case LMS_TFAULT:
case LMS_DFAULT:
case LMS_KFAULT:
case LMS_USER_LOCK:
break;
default:
break;
}
/*
* Return to the previous run state.
*/
break;
case LMS_STOPPED:
/*
* Return to the previous run state.
*/
break;
case LMS_TFAULT:
case LMS_DFAULT:
case LMS_KFAULT:
case LMS_USER_LOCK:
break;
default:
break;
}
t->t_waitrq = 0;
if (waitrq == 0) { /* should only happen during boot */
waitrqis0++;
}
if (newtime < 0) {
continue;
}
/*
* Update the WAIT_CPU timer and per-cpu waitrq total.
*/
}
/*
* Copy lwp microstate accounting and resource usage information
* to the process. (lwp is terminating)
*/
void
term_mstate(kthread_t *t)
{
int i;
(void) new_mstate(t, LMS_STOPPED);
scalehrtime(&tmp);
for (i = 0; i < NMSTATES; i++) {
scalehrtime(&tmp);
}
p->p_defunct++;
}