clock_tick.c revision 1b7f7204f8aa806a57b84cdeba100d819be85a61
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/clock_tick.h>
#include <sys/clock_impl.h>
#include <sys/sysmacros.h>
/*
* This file contains the implementation of clock tick accounting for threads.
* Every tick, user threads running on various CPUs are located and charged
* with a tick to account for their use of CPU time.
*
* Every tick, the clock() handler calls clock_tick_schedule() to perform tick
* accounting for all the threads in the system. Tick accounting is done in
* two phases:
*
* Tick scheduling Done in clock_tick_schedule(). In this phase, cross
* calls are scheduled to multiple CPUs to perform
* multi-threaded tick accounting. The CPUs are chosen
* on a rotational basis so as to distribute the tick
* accounting load evenly across all CPUs.
*
* Tick execution Done in clock_tick_execute(). In this phase, tick
* accounting is actually performed by softint handlers
* on multiple CPUs.
*
* This implementation gives us a multi-threaded tick processing facility that
* is suitable for configurations with a large number of CPUs. On smaller
* configurations it may be desirable to let the processing be single-threaded
* and just allow clock() to do it as it has been done traditionally. To
* facilitate this, a variable, clock_tick_threshold, is defined. Platforms
* that desire multi-threading should set this variable to something
* appropriate. A recommended value may be found in clock_tick.h. At boot time,
* if the number of CPUs is greater than clock_tick_threshold, multi-threading
* kicks in. Note that this is a decision made at boot time. If more CPUs
* are dynamically added later on to exceed the threshold, no attempt is made
* to switch to multi-threaded. Similarly, if CPUs are removed dynamically
* no attempt is made to switch to single-threaded. This is to keep the
* implementation simple. Also note that the threshold can be changed for a
*
* The boot time decision is reflected in clock_tick_single_threaded.
*/
/*
* clock_tick_threshold
* If the number of CPUs at boot time exceeds this threshold,
* multi-threaded tick accounting kicks in.
*
* clock_tick_ncpus
* The number of CPUs in a set. Each set is scheduled for tick execution
* on a separate processor.
*
* clock_tick_single_threaded
* Indicates whether or not tick accounting is single threaded.
*
* clock_tick_total_cpus
* Total number of online CPUs.
*
* clock_tick_cpus
* Array of online CPU pointers.
*
* clock_tick_cpu
* Per-CPU, cache-aligned data structures to facilitate multi-threading.
*
* clock_tick_active
* Counter that indicates the number of active tick processing softints
* in the system.
*
* clock_tick_pending
* Number of pending ticks that need to be accounted by the softint
* handlers.
*
* clock_tick_lock
* Mutex to synchronize between clock_tick_schedule() and
*
* clock_cpu_id
* CPU id of the clock() CPU. Used to detect when the clock CPU
* is offlined.
*
* clock_tick_online_cpuset
* CPU set of all online processors that can be X-called.
*
* clock_tick_proc_max
* Each process is allowed to accumulate a few ticks before checking
* for the task CPU time resource limit. We lower the number of calls
* to rctl_test() to make tick accounting more scalable. The tradeoff
* is that the limit may not get enforced in a timely manner. This is
* typically not a problem.
*
* clock_tick_set
* Per-set structures. Each structure contains the range of CPUs
* to be processed for the set.
*
* clock_tick_nsets;
* Number of sets.
*
* clock_tick_scan
* Where to begin the scan for single-threaded mode. In multi-threaded,
* the clock_tick_set itself contains a field for this.
*/
int clock_tick_threshold;
int clock_tick_ncpus;
int clock_tick_pending;
int clock_tick_nsets;
int clock_tick_scan;
static void clock_tick_execute_common(int, int, int, clock_t, int);
/*
* Clock tick initialization is done in two phases:
*
* 1. Before clock_init() is called, clock_tick_init_pre() is called to set
* up single-threading so the clock() can begin to do its job.
*
* 2. After the slave CPUs are initialized at boot time, we know the number
* of CPUs. clock_tick_init_post() is called to set up multi-threading if
* required.
*/
void
clock_tick_init_pre(void)
{
int i, n;
/*
* Perform initialization in case multi-threading is chosen later.
*/
if (&create_softint != NULL) {
}
clock_tick_cpu[i] = ctp;
if (&create_softint != NULL) {
}
ctp->ct_pending = 0;
}
/*
* Compute clock_tick_ncpus here. We need it to compute the
* maximum number of tick sets we need to support.
*/
ASSERT(clock_tick_ncpus >= 0);
if (clock_tick_ncpus == 0)
if (clock_tick_ncpus > max_ncpus)
/*
* Allocate and initialize the tick sets.
*/
for (i = 0; i < n; i++) {
csp = &clock_tick_set[i];
}
}
void
clock_tick_init_post(void)
{
/*
* If a platform does not provide create_softint() and invoke_softint(),
* then we assume single threaded.
*/
if (&invoke_softint == NULL)
clock_tick_threshold = 0;
ASSERT(clock_tick_threshold >= 0);
if (clock_tick_threshold == 0)
/*
* If a platform does not specify a threshold or if the number of CPUs
* at boot time does not exceed the threshold, tick accounting remains
* single-threaded.
*/
if (ncpus <= clock_tick_threshold) {
clock_tick_proc_max = 1;
return;
}
/*
* OK. Multi-thread tick processing. If a platform has not specified
* the CPU set size for multi-threading, then use the default value.
* This value has been arrived through measurements on large
* configuration systems.
*/
if (clock_tick_proc_max == 0) {
if (hires_tick)
clock_tick_proc_max *= 10;
}
}
static void
{
/*
* Schedule tick accounting for a set of CPUs.
*/
/*
* Return without waiting for the softint to finish.
*/
}
static void
{
kthread_t *t;
/*
* The locking here is rather tricky. thread_free_prevent()
* prevents the thread returned from being freed while we
* are looking at it. We can then check if the thread
* is exiting and get the appropriate p_lock if it
* is not. We have to be careful, though, because
* the _process_ can still be freed while we've
* prevented thread free. To avoid touching the
* proc structure we put a pointer to the p_lock in the
* thread structure. The p_lock is persistent so we
* can acquire it even if the process is gone. At that
* point we can check (again) if the thread is exiting
* and either drop the lock or do the tick processing.
*/
/*
* 't' will be the tick processing thread on this
* CPU. Use the pinned thread (if any) on this CPU
* as the target of the clock tick.
*/
t = t->t_intr;
}
/*
* We use thread_free_prevent to keep the currently running
* thread from being freed or recycled while we're
* looking at it.
*/
/*
* We cannot hold the cpu_lock to prevent the
* cpu_active from changing in the clock interrupt.
* As long as we don't block (or don't get pre-empted)
* the cpu_list will not change (all threads are paused
* before list modification).
*/
if (CLOCK_TICK_CPU_OFFLINE(cp)) {
return;
}
/*
* Make sure the thread is still on the CPU.
*/
if ((t != cp->cpu_thread) &&
/*
* We could not locate the thread. Skip this CPU. Race
* conditions while performing these checks are benign.
* These checks are not perfect and they don't need
* to be.
*/
return;
}
/*
* Thread is exiting (or uninteresting) so don't
* do tick processing.
*/
return;
}
/*
* OK, try to grab the process lock. See
* comments above for why we're not using
* ttoproc(t)->p_lockp here.
*/
/* See above comment. */
if (CLOCK_TICK_CPU_OFFLINE(cp)) {
return;
}
/*
* The thread may have exited between when we
* checked above, and when we got the p_lock.
*/
if (t->t_proc_flag & TP_LWPEXIT) {
return;
}
/*
* Either we have the p_lock for the thread's process,
* or we don't care about the thread structure any more.
* Either way we can allow thread free.
*/
/*
* If we haven't done tick processing for this
* lwp, then do it now. Since we don't hold the
* lwp down on a CPU it can migrate and show up
* more than once, hence the lbolt check. mylbolt
* is copied at the time of tick scheduling to prevent
* lbolt mismatches.
*
* Also, make sure that it's okay to perform the
* tick processing before calling clock_tick.
* Setting notick to a TRUE value (ie. not 0)
* results in tick processing not being performed for
* that thread.
*/
clock_tick(t, pending);
}
}
void
{
int i, end;
if (clock_tick_single_threaded) {
/*
* Each tick cycle, start the scan from a different
* CPU for the sake of fairness.
*/
if (clock_tick_scan >= end)
clock_tick_scan = 0;
LBOLT_NO_ACCOUNT, 1);
return;
}
/*
* If the previous invocation of handlers is not yet finished, then
* simply increment a pending count and return. Eventually when they
* finish, the pending count is passed down to the next set of
* handlers to process. This way, ticks that have already elapsed
* in the past are handled as quickly as possible to minimize the
* chances of threads getting away before their pending ticks are
* accounted. The other benefit is that if the pending count is
* more than one, it can be handled by a single invocation of
* clock_tick(). This is a good optimization for large configuration
* busy systems where tick accounting can get backed up for various
* reasons.
*/
if (active)
return;
/*
* We want to handle the clock CPU here. If we
* scheduled the accounting for the clock CPU to another
* processor, that processor will find only the clock() thread
* running and not account for any user thread below it. Also,
* we want to handle this before we block on anything and allow
* the pinned thread below the current thread to escape.
*/
/*
* Schedule each set on a separate processor.
*/
cp = clock_cpu_list;
for (i = 0; i < clock_tick_nsets; i++) {
csp = &clock_tick_set[i];
/*
* Pick the next online CPU in list for scheduling tick
* accounting. The clock_tick_lock is held by the caller.
* we are picking our CPU to X-call.
*/
/*
* Each tick cycle, start the scan from a different
* CPU for the sake of fairness.
*/
}
if (one_sec) {
/*
* Move the CPU pointer around every second. This is so
* all the CPUs can be X-called in a round-robin fashion
* to evenly distribute the X-calls. We don't do this
* at a faster rate than this because we don't want
* to affect cache performance negatively.
*/
}
clock_tick_pending = 0;
}
static void
int pending)
{
int i;
/*
* Handle the thread on current CPU first. This is to prevent a
* pinned thread from escaping if we ever block on something.
* Note that in the single-threaded mode, this handles the clock
* CPU.
*/
/*
* Perform tick accounting for the threads running on
* the scheduled CPUs.
*/
cp = clock_tick_cpus[i];
continue;
}
cp = clock_tick_cpus[i];
continue;
}
}
/*ARGSUSED*/
static uint_t
{
/*
* We could have raced with cpu offline. We don't want to
* process anything on an offlined CPU. If we got blocked
* on anything, we may not get scheduled when we wakeup
* later on.
*/
if (!CLOCK_TICK_XCALL_SAFE(CPU))
goto out;
if (pending == 0) {
/*
* If a CPU is busy at LOCK_LEVEL, then an invocation
* of this softint may be queued for some time. In that case,
* clock_tick_active will not be incremented.
* clock_tick_schedule() will then assume that the previous
* invocation is done and post a new softint. The first one
* that gets in will reset the pending count so the
* second one is a noop.
*/
goto out;
}
ctp->ct_pending = 0;
out:
/*
* Signal completion to the clock handler.
*/
return (1);
}
/*ARGSUSED*/
static int
{
int i, set;
/*
* time. The computed values are used during tick scheduling and
* execution phases. This avoids having to compute things on
* an every tick basis. The other benefit is that we perform the
* computations only for onlined CPUs (not offlined ones). As a
* result, no tick processing is attempted for offlined CPUs.
*
* Also, cpu_offline() calls this function before checking for
* active interrupt threads. This allows us to avoid posting
* cross calls to CPUs that are being offlined.
*/
switch (what) {
case CPU_ON:
membar_sync();
break;
case CPU_OFF:
if (&sync_softint != NULL)
i = 0;
ncp = cpu_active;
do {
continue;
clock_tick_cpus[i] = ncp;
i++;
ASSERT(i == clock_tick_total_cpus);
membar_sync();
break;
default:
break;
}
return (0);
}
void
clock_tick_mp_init(void)
{
cp = cpu_active;
do {
}