callout.c revision 454ab20244cd84c2b93aa273b462eab1166cf539
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/kmem_impl.h>
#include <sys/sysmacros.h>
/*
* Callout tables. See timeout(9F) for details.
*/
static int callout_min_resolution; /* Minimum resolution */
static char *callout_kstat_names[] = {
"callout_timeouts",
"callout_timeouts_pending",
"callout_untimeouts_unexpired",
"callout_untimeouts_executing",
"callout_untimeouts_expired",
"callout_expirations",
"callout_allocations",
};
{ \
\
else \
}
{ \
\
else \
}
{ \
\
else \
else \
}
/*
* These definitions help us queue callouts and callout lists. Here is
* the queueing rationale:
*
* - callouts are queued in a FIFO manner in the ID hash table.
* TCP timers are typically cancelled in the same order that they
* were issued. The FIFO queueing shortens the search for a callout
* during untimeout().
*
* - callouts are queued in a FIFO manner in their callout lists.
* This ensures that the callouts are executed in the same order that
* they were queued. This is fair. Plus, it helps to make each
* callout expiration timely. It also favors cancellations.
*
* - callout lists are queued in a LIFO manner in the callout list hash
* table. This ensures that long term timers stay at the rear of the
* hash lists.
*
* - callout lists are queued in a FIFO manner in the expired callouts
* list. This ensures that callout lists are executed in the order
* of expiration.
*/
/*
* Allocate a callout structure. We try quite hard because we
* can't sleep, and if we can't do the allocation, we're toast.
* Failing all, we try a KM_PANIC allocation. Note that we never
* deallocate a callout. See untimeout() for the reasoning.
*/
static callout_t *
{
}
ct->ct_allocations++;
return (cp);
}
/*
* Allocate a callout list structure. We try quite hard because we
* can't sleep, and if we can't do the allocation, we're toast.
* Failing all, we try a KM_PANIC allocation. Note that we never
* deallocate a callout list.
*/
static void
{
size = sizeof (callout_list_t);
}
}
/*
* Find the callout list that corresponds to an expiration. There can
* be only one.
*/
static callout_list_t *
{
return (cl);
}
return (NULL);
}
/*
* Find the callout list that corresponds to an expiration. There can
* be only one. If the callout list is null, free it. Else, return it.
*/
static callout_list_t *
{
/*
* There is exactly one callout list for every
* unique expiration. So, we are done.
*/
return (cl);
}
}
return (NULL);
}
/*
* Initialize a callout table's heap, if necessary. Preallocate some free
* entries so we don't have to check for NULL elsewhere.
*/
static void
{
ct->ct_heap_num = 0;
}
/*
* Reallocate the heap. We try quite hard because we can't sleep, and if
* we can't do the allocation, we're toast. Failing all, we try a KM_PANIC
* allocation. Note that the heap only expands, it never contracts.
*/
static void
{
/*
* Someone beat us to the allocation. Free what we
* just allocated and proceed.
*/
continue;
}
}
}
/*
* Move an expiration from the bottom of the heap to its correct place
* in the heap. If we reached the root doing this, return 1. Else,
* return 0.
*/
static int
{
return (1);
}
for (;;) {
/*
* We have an expiration later than our parent; we're done.
*/
if (current_expiration >= parent_expiration) {
return (0);
}
/*
* We need to swap with our parent, and continue up the heap.
*/
/*
* If we just reached the root, we're done.
*/
if (parent == 0) {
return (1);
}
}
/*NOTREACHED*/
}
/*
* Insert a new, unique expiration into a callout table's heap.
*/
static void
{
/*
* First, copy the expiration to the bottom of the heap.
*/
ct->ct_heap_num++;
/*
* Now, perform an upheap operation. If we reached the root, then
* the cyclic needs to be reprogrammed as we have an earlier
* expiration.
*
* Also, during the CPR suspend phase, do not reprogram the cyclic.
* We don't want any callout activity. When the CPR resume phase is
* entered, the cyclic will be programmed for the earliest expiration
* in the heap.
*/
}
/*
* Move an expiration from the top of the heap to its correct place
* in the heap.
*/
static void
{
current = 0;
for (;;) {
/*
* If we don't have a left child (i.e., we're a leaf), we're
* done.
*/
return;
/*
* Even if we don't have a right child, we still need to compare
* our expiration against that of our left child.
*/
goto comp_left;
/*
* We have both a left and a right child. We need to compare
* the expiration of the children to determine which
* expires earlier.
*/
if (right_expiration < left_expiration) {
/*
* Our right child is the earlier of our children.
* We'll now compare our expiration to its expiration.
* If ours is the earlier one, we're done.
*/
if (current_expiration <= right_expiration)
return;
/*
* Our right child expires earlier than we do; swap
* with our right child, and descend right.
*/
continue;
}
/*
* Our left child is the earlier of our children (or we have
* no right child). We'll now compare our expiration
* to its expiration. If ours is the earlier one, we're done.
*/
if (current_expiration <= left_expiration)
return;
/*
* Our left child expires earlier than we do; swap with our
* left child, and descend left.
*/
}
}
/*
* Delete and handle all past expirations in a callout table's heap.
*/
static void
{
int hash;
while (ct->ct_heap_num > 0) {
/*
* Find the callout list that corresponds to the expiration.
* If the callout list is empty, callout_list_check()
* will free the callout list and return NULL.
*/
/*
* If the root of the heap expires in the future, we are
* done. We are doing this check here instead of at the
* beginning because we want to first free all the
* empty callout lists at the top of the heap.
*/
if (expiration > now)
break;
/*
* Move the callout list for this expiration to the
* list of expired callout lists. It will be processed
* by the callout executor.
*/
}
/*
* Now delete the root. This is done by swapping the root with
* the last item in the heap and downheaping the item.
*/
ct->ct_heap_num--;
if (ct->ct_heap_num > 0) {
}
}
/*
* If this callout table is empty or callouts have been suspended
* by CPR, just return. The cyclic has already been programmed to
* infinity by the cyclic subsystem.
*/
return;
}
/*
* Common function used to create normal and realtime callouts.
*
* Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So,
* there is one restriction on a realtime callout handler - it should not
* directly or indirectly acquire cpu_lock. CPU offline waits for pending
* cyclic handlers to complete while holding cpu_lock. So, if a realtime
* callout handler were to try to get cpu_lock, there would be a deadlock
* during CPU offline.
*/
{
int hash;
ASSERT(resolution > 0);
/*
* Please see comment about minimum resolution in callout_init().
*/
if (resolution < callout_min_resolution)
/*
* We disable kernel preemption so that we remain on the same CPU
* throughout. If we needed to reprogram the callout table's cyclic,
* we can avoid X-calls if we are on the same CPU.
*
* Note that callout_alloc() releases and reacquires the callout
* table mutex. While reacquiring the mutex, it is possible for us
* to go to sleep and later migrate to another CPU. This should be
* pretty rare, though.
*/
/*
* The callout table has not yet been initialized fully.
* So, put this one on the boot callout table which is
* always initialized.
*/
}
else
/*
* Compute the expiration hrtime.
*/
if (flags & CALLOUT_FLAG_ABSOLUTE) {
ASSERT(expiration > 0);
} else {
expiration += now;
ASSERT(expiration > 0);
}
if (flags & CALLOUT_FLAG_ROUNDUP)
if (expiration <= 0) {
/*
* expiration hrtime overflow has occurred. Just set the
* expiration to infinity.
*/
}
/*
* Assign an ID to this callout
*/
if (flags & CALLOUT_FLAG_32BIT) {
if (interval > callout_longterm) {
} else {
}
} else {
if ((id & CALLOUT_COUNTER_HIGH) == 0) {
}
}
if (flags & CALLOUT_FLAG_HRESTIME)
/*
* Try to see if a callout list already exists for this expiration.
* Most of the time, this will be the case.
*/
/*
* Check if we have enough space in the heap to insert one
* expiration. If not, expand the heap.
*/
/*
* In the above call, we drop the lock, allocate and
* reacquire the lock. So, we could have been away
* for a while. In the meantime, someone could have
* inserted a callout list with the same expiration.
* So, the best course is to repeat the steps. This
* should be an infrequent event.
*/
goto again;
}
/*
* Check the free list. If we don't find one, we have to
* take the slow path and allocate from kmem.
*/
/*
* In the above call, we drop the lock, allocate and
* reacquire the lock. So, we could have been away
* for a while. In the meantime, someone could have
* inserted a callout list with the same expiration.
* Plus, the heap could have become full. So, the best
* course is to repeat the steps. This should be an
* infrequent event.
*/
goto again;
}
/*
* This is a new expiration. So, insert it into the heap.
* This will also reprogram the cyclic, if the expiration
* propagated to the root of the heap.
*/
}
ct->ct_timeouts++;
cp);
return (id);
}
{
/*
* Make sure the callout runs at least 1 tick in the future.
*/
if (delta <= 0)
delta = 1;
else if (delta > callout_max_ticks)
return ((timeout_id_t)id);
}
/*
* Convenience function that creates a normal callout with default parameters
* and returns a full ID.
*/
{
/*
* Make sure the callout runs at least 1 tick in the future.
*/
if (delta <= 0)
delta = 1;
else if (delta > callout_max_ticks)
nsec_per_tick, 0);
return (id);
}
{
/*
* Make sure the callout runs at least 1 tick in the future.
*/
if (delta <= 0)
delta = 1;
else if (delta > callout_max_ticks)
return ((timeout_id_t)id);
}
/*
* Convenience function that creates a realtime callout with default parameters
* and returns a full ID.
*/
{
/*
* Make sure the callout runs at least 1 tick in the future.
*/
if (delta <= 0)
delta = 1;
else if (delta > callout_max_ticks)
nsec_per_tick, 0);
return (id);
}
{
int hash;
/*
* Search the ID hash table for the callout.
*/
/*
* Match the ID and generation number.
*/
continue;
if ((xid & CALLOUT_EXECUTING) == 0) {
/*
* Delete the callout. If the callout list becomes
* NULL, we don't remove it from the table. This is
* so it can be reused. If the empty callout list
* corresponds to the top of the the callout heap, we
* don't reprogram the table cyclic here. This is in
* order to avoid lots of X-calls to the CPU associated
* with the callout table.
*/
expiration -= gethrtime();
"untimeout:ID %lx hrtime left %llx", id,
return (expiration < 0 ? 0 : expiration);
}
/*
* The callout we want to delete is currently executing.
* The DDI states that we must wait until the callout
* completes before returning, so we block on cl_done until the
* callout ID changes (to the old ID if it's on the freelist,
* or to a new callout ID if it's in use). This implicitly
* assumes that callout structures are persistent (they are).
*/
/*
* The timeout handler called untimeout() on itself.
* Stupid, but legal. We can't wait for the timeout
* to complete without deadlocking, so we just return.
*/
"untimeout_self:ID %x", id);
return (-1);
}
if (nowait == 0) {
/*
* We need to wait. Indicate that we are waiting by
* incrementing cl_waiting. This prevents the executor
* from doing a wakeup on cl_done if there are no
* waiters.
*/
}
}
"untimeout_executing:ID %lx", id);
return (-1);
}
"untimeout_bogus_id:ID %lx", id);
/*
* We didn't find the specified callout ID. This means either
* (1) the callout already fired, or (2) the caller passed us
* a bogus value. Perform a sanity check to detect case (2).
*/
panic("untimeout: impossible timeout id %llx",
(unsigned long long)id);
return (-1);
}
{
if (hleft < 0)
tleft = -1;
else if (hleft == 0)
tleft = 0;
else
return (tleft);
}
/*
* Convenience function to untimeout a timeout with a full ID with default
* parameters.
*/
{
if (hleft < 0)
tleft = -1;
else if (hleft == 0)
tleft = 0;
else
return (tleft);
}
/*
* Expire all the callouts queued in the specified callout list.
*/
static void
{
/*
* Indicate to untimeout() that a callout is
* being expired by the executor.
*/
ct->ct_expirations++;
/*
* Indicate completion for cl_done.
*/
/*
* Delete callout from ID hash table and the callout
* list, return to freelist, and tell any untimeout() that
* cares that we're done.
*/
if (cl->cl_waiting) {
cl->cl_waiting = 0;
}
}
}
/*
* Execute all expired callout lists for a callout table.
*/
static void
{
/*
* Multiple executor threads could be running at the same
* time. Each callout list is processed by only one thread.
* If this callout list is already being processed by another
* executor, go on to the next one.
*/
continue;
}
/*
* Expire all the callouts in this callout list.
*/
/*
* Free the callout list.
*/
}
}
/*
* The cyclic handlers below process callouts in two steps:
*
* 1. Find all expired callout lists and queue them in a separate
* list of expired callouts.
* 2. Execute the expired callout lists.
*
* This is done for two reasons:
*
* 1. We want to quickly find the next earliest expiration to program
* the cyclic to and reprogram it. We can do this right at the end
* of step 1.
* 2. The realtime cyclic handler expires callouts in place. However,
* for normal callouts, callouts are expired by a taskq thread.
* So, it is simpler and more robust to have the taskq thread just
* do step 2.
*/
/*
* Realtime callout cyclic handler.
*/
void
{
}
void
{
}
/*
* Normal callout cyclic handler.
*/
void
{
int exec;
if (exec) {
}
}
/*
* Suspend callout processing.
*/
static void
callout_suspend(void)
{
int t, f;
/*
* Traverse every callout table in the system and suspend callout
* processing.
*
* We need to suspend all the tables (including the inactive ones)
* so that if a table is made active while the suspend is still on,
* the table remains suspended.
*/
for (f = 0; f < max_ncpus; f++) {
for (t = 0; t < CALLOUT_NTYPES; t++) {
ct->ct_suspend++;
continue;
}
}
}
}
static void
{
/*
* In order to adjust the expirations, we null out the heap. Then,
* we reinsert adjusted expirations in the heap. Keeps it simple.
* Note that since the CALLOUT_TABLE_SUSPENDED flag is set by the
* caller, the heap insert does not result in cyclic reprogramming.
*/
ct->ct_heap_num = 0;
/*
* First, remove all the callout lists from the table and string them
* in a list.
*/
}
}
/*
* Now, traverse the callout lists and adjust their expirations.
*/
/*
* Set the new expiration and reinsert in the right
* hash bucket.
*/
expiration += delta;
}
}
/*
* Resume callout processing.
*/
static void
{
int t, f;
/*
* Traverse every callout table in the system and resume callout
* processing. For active tables, perform any hrtime adjustments
* necessary.
*/
for (f = 0; f < max_ncpus; f++) {
for (t = 0; t < CALLOUT_NTYPES; t++) {
ct->ct_suspend--;
continue;
}
if (delta)
ct->ct_suspend--;
if (ct->ct_suspend == 0) {
/*
* If the expired list is non-empty, then have
* the cyclic expire immediately. Else, program
* the cyclic based on the heap.
*/
else if (ct->ct_heap_num > 0)
else
exp = 0;
if (exp != 0)
exp);
}
}
}
}
/*
* Callback handler used by CPR to stop and resume callouts.
*/
/*ARGSUSED*/
static boolean_t
{
if (code == CB_CODE_CPR_CHKPT)
else
callout_resume(0);
return (B_TRUE);
}
/*
* Callback handler invoked when the debugger is entered or exited.
*/
/*ARGSUSED*/
static boolean_t
{
/*
* When the system enters the debugger. make a note of the hrtime.
* When it is resumed, compute how long the system was in the
* debugger. This interval should not be counted for callouts.
*/
if (code == 0) {
} else {
}
return (B_TRUE);
}
/*
* Move the hrestime callouts to the expired list. Then program the table's
* cyclic to expire immediately so that the callouts can be executed
* immediately.
*/
static void
{
int hash;
if (ct->ct_heap_num == 0) {
return;
}
continue;
}
}
}
if (ct->ct_suspend == 0)
} else {
}
}
/*
* This function is called whenever system time (hrestime) is changed
* explicitly. All the HRESTIME callouts must be expired at once.
*/
/*ARGSUSED*/
void
callout_hrestime(void)
{
int t, f;
/*
* Traverse every callout table in the system and process the hrestime
* callouts therein.
*
* We look at all the tables because we don't know which ones were
* onlined and offlined in the past. The offlined tables may still
* have active cyclics processing timers somewhere.
*/
for (f = 0; f < max_ncpus; f++) {
for (t = 0; t < CALLOUT_NTYPES; t++) {
}
}
}
/*
* Create the hash tables for this callout table.
*/
static void
{
}
/*
* Create per-callout table kstats.
*/
static void
{
int ndx;
(void *)ct);
} else {
}
}
static void
{
int t;
t = CALLOUT_TABLE_TYPE(ct);
/*
* Create the taskq thread if the table type is normal.
* Realtime tables are handled at PIL1 by a softint
* handler.
*/
if (t == CALLOUT_NORMAL) {
/*
* Each callout thread consumes exactly one
* task structure while active. Therefore,
* prepopulating with 2 * CALLOUT_THREADS tasks
* ensures that there's at least one task per
* thread that's either scheduled or on the
* freelist. In turn, this guarantees that
* taskq_dispatch() will always either succeed
* (because there's a free task structure) or
* be unnecessary (because "callout_excute(ct)"
* has already scheduled).
*/
}
/*
* callouts can only be created in a table whose
* cyclic has been initialized.
*/
/*
* Create the callout table cyclics.
*/
}
void
{
char s[KMEM_CACHE_NAMELEN];
int t;
/*
* Locate the cache corresponding to the onlined CPU's lgroup.
* Note that access to callout_caches is protected by cpu_lock.
*/
break;
}
/*
* If not found, create one. The caches are never destroyed.
*/
(long)hand);
(long)hand);
}
for (t = 0; t < CALLOUT_NTYPES; t++) {
/*
* Store convinience pointers to the kmem caches
* in the callout table. These assignments should always be
* done as callout tables can map to different physical
* CPUs each time.
*/
/*
* We use the heap pointer to check if stuff has been
* initialized for this callout table.
*/
}
/*
* Move the cyclic to this CPU by doing a bind.
*/
}
}
void
{
int t;
for (t = 0; t < CALLOUT_NTYPES; t++) {
/*
* Unbind the cyclic. This will allow the cyclic subsystem
* to juggle the cyclic during CPU offline.
*/
}
}
/*
* This is called to perform per-CPU initialization for slave CPUs at
* boot time.
*/
void
callout_mp_init(void)
{
cp = cpu_active;
do {
}
/*
* Initialize all callout tables. Called at boot time just before clkstart().
*/
void
callout_init(void)
{
int f, t;
int table_id;
/*
* Initialize callout globals.
*/
bits = 0;
bits++;
/*
* Because of the variability in timing behavior across systems with
* different architectures, we cannot allow arbitrarily low
* resolutions. The minimum resolution has to be determined in a
* platform-specific way. Until then, we define a blanket minimum
* resolution for callouts of CALLOUT_MIN_RESOLUTION.
*
* If, in the future, someone requires lower resolution timers, they
* can do one of two things:
*
* - Define a lower value for callout_min_resolution. This would
* affect all clients of the callout subsystem. If this done
* would affect only that customer.
*
* - Define a flag to be passed to timeout creation that allows
* the lower resolution. This involves code changes. But it
* would affect only the calling module. It is the developer's
* responsibility to test on all systems and make sure that
* everything works.
*/
if (callout_min_resolution <= 0)
/*
* Allocate all the callout tables based on max_ncpus. We have chosen
* to do boot-time allocation instead of dynamic allocation because:
*
* - the size of the callout tables is not too large.
* - there are race conditions involved in making this dynamic.
* - the hash tables that go with the callout tables consume
* most of the memory and they are only allocated in
* callout_cpu_online().
*
* Each CPU has two tables that are consecutive in the array. The first
* one is for realtime callouts and the second one is for normal ones.
*
* We do this alignment dance to make sure that callout table
* structures will always be on a cache line boundary.
*/
size += CALLOUT_ALIGN;
/*
* Now, initialize the tables for all the CPUs.
*/
for (f = 0; f < max_ncpus; f++) {
for (t = 0; t < CALLOUT_NTYPES; t++) {
table_id = CALLOUT_TABLE(t, f);
/*
* Precompute the base IDs for long and short-term
* legacy IDs. This makes ID generation during
* timeout() fast.
*/
/*
* Precompute the base ID for generation-based IDs.
* Note that when the first ID gets allocated, the
* ID will wrap. This will cause the generation
* number to be incremented to 1.
*/
/*
* Initialize the cyclic as NONE. This will get set
* during CPU online. This is so that partially
* populated systems will only have the required
* number of cyclics, not more.
*/
}
}
/*
* Add the callback for CPR. This is called during checkpoint
* resume to suspend and resume callouts.
*/
"callout_cpr");
"callout_debug");
/*
* Call the per-CPU initialization function for the boot CPU. This
* is done here because the function is not called automatically for
* CPU lock is taken here because of convention.
*/
}