callo.h revision 07247649496653b0ece32ce7ea710057f85fa9da
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_CALLO_H
#define _SYS_CALLO_H
#include <sys/t_lock.h>
#include <sys/taskq.h>
#include <sys/lgrp.h>
#include <sys/processor.h>
#include <sys/cyclic.h>
#include <sys/kstat.h>
#include <sys/systm.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _KERNEL
typedef struct callout_list callout_list_t;
/*
* The callout mechanism provides general-purpose event scheduling:
* an arbitrary function is called in a specified amount of time.
* The expiration time for a callout is kept in its callout list
* structure.
*/
typedef struct callout {
struct callout *c_idnext; /* next in ID hash, or on freelist */
struct callout *c_idprev; /* prev in ID hash */
struct callout *c_clnext; /* next in callout list */
struct callout *c_clprev; /* prev in callout list */
callout_id_t c_xid; /* extended callout ID; see below */
callout_list_t *c_list; /* callout list */
void (*c_func)(void *); /* function to call */
void *c_arg; /* argument to function */
kthread_t *c_executor; /* executing thread */
kcondvar_t c_done; /* signal callout completion */
ushort_t c_waiting; /* untimeout waiting flag */
} callout_t;
/*
* The callout ID (callout_id_t) uniquely identifies a callout. The callout
* ID is always 64 bits internally. The lower 32 bits contain an ID value.
* The upper 32 bits contain a generation number and flags. When the ID value
* wraps the generation number is incremented during ID generation. This
* protects callers from ID collisions that can happen as a result of the wrap.
*
* The kernel internal interface, timeout_generic(), always returns a
* callout_id_t. But the legacy interfaces, timeout() and realtime_timeout()
* return a timeout_id_t. On a 64-bit system, timeout_id_t is also 64 bits.
* So, the full 64-bit ID (sans the flags) can be returned. However, on 32-bit
* systems, timeout_id_t is 32 bits. So, only the lower 32 bits can be
* returned. In such cases, a default generation number of 0 is assigned to
* the legacy IDs.
*
* The lower 32-bit ID space is partitioned into two spaces - one for 32-bit
* IDs and the other for 64-bit IDs. The 32-bit ID space is further divided
* into two spaces - one for short-term callouts and one for long-term.
*
* Here is the bit layout for the callout ID:
*
* 63 62 ... 32 31 30 29 .. X+1 X ... 1 0
* ----------------------------------------------------------------
* | Exec | Generation | Long | Counter | ID bits | Table | Type |
* | | number | term | High | | number | |
* ----------------------------------------------------------------
*
* Exec(uting):
* This is the executing bit which is only set in the extended callout
* ID. This bit indicates that the callout handler is currently being
* executed.
*
* Generation number:
* This is the generation part of the ID.
*
* Long term:
* This bit indicates whether this is a short-term or a long-term callout.
* The long-term bit exists to address the problem of callout ID collision
* on 32-bit systems. This is an issue because the system typically
* generates a large number of timeout() requests, which means that callout
* IDs eventually get recycled. Most timeouts are very short-lived, so that
* ID recycling isn't a problem; but there are a handful of timeouts which
* are sufficiently long-lived to see their own IDs reused. We use the
* long-term bit to partition the ID namespace into pieces; the short-term
* space gets all the heavy traffic and can wrap frequently (i.e., on the
* order of a day) with no ill effects; the long-term space gets very little
* traffic and thus never wraps. That said, we need to future proof callouts
* in case 32-bit systems grow in size and are able to consume callout IDs
* at faster rates. So, we should make all the kernel clients that use
* callouts to use the internal interface so that they can use IDs outside
* of the legacy space with a proper generation number.
*
* Counter High + ID counter bits:
* These bits represent the actual ID bits in the callout ID.
* The highest bit of the running counter is always set; this ensures that
* the callout ID is always non-zero, thus eliminating the need for an
* explicit wrap-around test during ID generation.
*
* Table number:
* These bits carry the table number for the callout table where the callout
* is queued. Each CPU has its own callout table. So, the callout tables are
* numbered from 0 - (max_ncpus - 1). Because max_ncpus is different on
* different systems, the actual number of table number bits will vary
* accordingly. And so will the ID counter bits.
*
* Type:
* This bit represents the callout (table) type. Each CPU has one realtime
* and one normal callout table.
*/
#define CALLOUT_EXECUTING 0x8000000000000000ULL
#define CALLOUT_ID_MASK ~(CALLOUT_EXECUTING)
#define CALLOUT_GENERATION_LOW 0x100000000ULL
#define CALLOUT_LONGTERM 0x80000000
#define CALLOUT_COUNTER_HIGH 0x40000000
#define CALLOUT_TYPE_BITS 1
#define CALLOUT_NTYPES (1 << CALLOUT_TYPE_BITS)
#define CALLOUT_TYPE_MASK (CALLOUT_NTYPES - 1)
#define CALLOUT_COUNTER_SHIFT callout_table_bits
#define CALLOUT_TABLE(t, f) (((f) << CALLOUT_TYPE_BITS) | (t))
#define CALLOUT_TABLE_NUM(ct) ((ct) - callout_table)
#define CALLOUT_TABLE_TYPE(ct) (CALLOUT_TABLE_NUM(ct) & CALLOUT_TYPE_MASK)
#define CALLOUT_TABLE_SEQID(ct) (CALLOUT_TABLE_NUM(ct) >> CALLOUT_TYPE_BITS)
/*
* We assume that during any period of CALLOUT_LONGTERM_TICKS ticks, at most
* (CALLOUT_COUNTER_HIGH / callout_counter_low) callouts will be generated.
*/
#define CALLOUT_LONGTERM_TICKS 0x4000UL
#define CALLOUT_BUCKET_SHIFT 9
#define CALLOUT_BUCKETS (1 << CALLOUT_BUCKET_SHIFT)
#define CALLOUT_BUCKET_MASK (CALLOUT_BUCKETS - 1)
#define CALLOUT_HASH(x) ((x) & CALLOUT_BUCKET_MASK)
#define CALLOUT_IDHASH(x) CALLOUT_HASH((x) >> CALLOUT_COUNTER_SHIFT)
/*
* The multiply by 0 and 1 below are cosmetic. Just to align things better
* and make it more readable. The multiplications will be done at compile
* time.
*/
#define CALLOUT_CLHASH(x) \
CALLOUT_HASH( \
((x)>>(CALLOUT_BUCKET_SHIFT*0)) ^ \
((x)>>(CALLOUT_BUCKET_SHIFT*1)) ^ \
((x)>>(CALLOUT_BUCKET_SHIFT*2)) ^ \
((x)>>(CALLOUT_BUCKET_SHIFT*3)))
#define CALLOUT_ID_TO_TABLE(id) ((id) & callout_table_mask)
#define CALLOUT_SHORT_ID(table) \
((callout_id_t)(table) | CALLOUT_COUNTER_HIGH)
#define CALLOUT_LONG_ID(table) \
(CALLOUT_SHORT_ID(table) | CALLOUT_LONGTERM)
#define CALLOUT_THREADS 2 /* keep it simple for now */
#define CALLOUT_REALTIME 0 /* realtime callout type */
#define CALLOUT_NORMAL 1 /* normal callout type */
/*
* callout_t's are cache-aligned structures allocated from kmem caches. One kmem
* cache is created per lgrp and is shared by all CPUs in that lgrp. Benefits:
* - cache pages are mapped only in the TLBs of the CPUs of the lgrp
* - data in cache pages is present only in those CPU caches
* - memory access performance improves with locality-awareness in kmem
*
* The following structure is used to manage per-lgroup kmem caches.
*
* NOTE: Free callout_t's go to a callout table's freelist. CPUs map to callout
* tables via their sequence IDs, not CPU IDs. DR operations can cause a
* free list to have callouts from multiple lgrp caches. This takes away some
* performance, but is no worse than if we did not use lgrp caches at all.
*/
typedef struct callout_cache {
struct callout_cache *cc_next; /* link in the global list */
lgrp_handle_t cc_hand; /* lgroup handle */
kmem_cache_t *cc_cache; /* kmem cache pointer */
kmem_cache_t *cc_lcache; /* kmem cache pointer */
} callout_cache_t;
/*
* The callout hash structure is used for queueing both callouts and
* callout lists. That is why the fields are declared as void *.
*/
typedef struct callout_hash {
void *ch_head;
void *ch_tail;
} callout_hash_t;
struct callout_list {
callout_list_t *cl_next; /* next in clhash */
callout_list_t *cl_prev; /* prev in clhash */
hrtime_t cl_expiration; /* expiration for callouts in list */
callout_hash_t cl_callouts; /* list of callouts */
int cl_flags; /* callout flags */
};
/*
* Per-callout table kstats.
*
* CALLOUT_TIMEOUTS
* Callouts created since boot.
* CALLOUT_TIMEOUTS_PENDING
* Number of outstanding callouts.
* CALLOUT_UNTIMEOUTS_UNEXPIRED
* Number of cancelled callouts that have not expired.
* CALLOUT_UNTIMEOUTS_EXECUTING
* Number of cancelled callouts that were executing at the time of
* cancellation.
* CALLOUT_UNTIMEOUTS_EXPIRED
* Number of cancelled callouts that had already expired at the time
* of cancellations.
* CALLOUT_EXPIRATIONS
* Number of callouts that expired.
* CALLOUT_ALLOCATIONS
* Number of callout structures allocated.
*/
typedef enum callout_stat_type {
CALLOUT_TIMEOUTS,
CALLOUT_TIMEOUTS_PENDING,
CALLOUT_UNTIMEOUTS_UNEXPIRED,
CALLOUT_UNTIMEOUTS_EXECUTING,
CALLOUT_UNTIMEOUTS_EXPIRED,
CALLOUT_EXPIRATIONS,
CALLOUT_ALLOCATIONS,
CALLOUT_NUM_STATS
} callout_stat_type_t;
/*
* Callout flags:
*
* CALLOUT_FLAG_ROUNDUP
* Roundup the expiration time to the next resolution boundary.
* If this flag is not specified, the expiration time is rounded down.
* CALLOUT_FLAG_ABSOLUTE
* Normally, the expiration passed to the timeout API functions is an
* expiration interval. If this flag is specified, then it is
* interpreted as the expiration time itself.
* CALLOUT_FLAG_HRESTIME
* Normally, callouts are not affected by changes to system time
* (hrestime). This flag is used to create a callout that is affected
* by system time. If system time changes, these timers must be
* handled in a special way (see callout.c). These are used by condition
* variables and LWP timers that need this behavior.
* CALLOUT_FLAG_32BIT
* Legacy interfaces timeout() and realtime_timeout() pass this flag
* to timeout_generic() to indicate that a 32-bit ID should be allocated.
*/
#define CALLOUT_FLAG_ROUNDUP 0x1
#define CALLOUT_FLAG_ABSOLUTE 0x2
#define CALLOUT_FLAG_HRESTIME 0x4
#define CALLOUT_FLAG_32BIT 0x8
#define CALLOUT_LIST_FLAGS (CALLOUT_FLAG_ABSOLUTE | CALLOUT_FLAG_HRESTIME)
/*
* On 32-bit systems, the legacy interfaces, timeout() and realtime_timeout(),
* must pass CALLOUT_FLAG_32BIT to timeout_generic() so that a 32-bit ID
* can be generated.
*/
#ifdef _LP64
#define CALLOUT_LEGACY 0
#else
#define CALLOUT_LEGACY CALLOUT_FLAG_32BIT
#endif
/*
* All of the state information associated with a callout table.
* The fields are ordered with cache performance in mind.
*/
typedef struct callout_table {
kmutex_t ct_mutex; /* protects all callout state */
callout_t *ct_free; /* free callout structures */
callout_list_t *ct_lfree; /* free callout list structures */
callout_id_t ct_short_id; /* most recently issued short-term ID */
callout_id_t ct_long_id; /* most recently issued long-term ID */
callout_hash_t *ct_idhash; /* ID hash chains */
callout_hash_t *ct_clhash; /* callout list hash */
kstat_named_t *ct_kstat_data; /* callout kstat data */
uint_t ct_type; /* callout table type */
uint_t ct_suspend; /* suspend count */
cyclic_id_t ct_cyclic; /* cyclic for this table */
hrtime_t *ct_heap; /* callout expiration heap */
ulong_t ct_heap_num; /* occupied slots in the heap */
ulong_t ct_heap_max; /* end of the heap */
kmem_cache_t *ct_cache; /* callout kmem cache */
kmem_cache_t *ct_lcache; /* callout list kmem cache */
callout_id_t ct_gen_id; /* generation based ID */
callout_hash_t ct_expired; /* list of expired callout lists */
taskq_t *ct_taskq; /* taskq to execute normal callouts */
kstat_t *ct_kstats; /* callout kstats */
#ifdef _LP64
ulong_t ct_pad[4]; /* cache alignment */
#else
ulong_t ct_pad[7]; /* cache alignment */
#endif
} callout_table_t;
/*
* Short hand definitions for the callout kstats.
*/
#define ct_timeouts \
ct_kstat_data[CALLOUT_TIMEOUTS].value.ui64
#define ct_timeouts_pending \
ct_kstat_data[CALLOUT_TIMEOUTS_PENDING].value.ui64
#define ct_untimeouts_unexpired \
ct_kstat_data[CALLOUT_UNTIMEOUTS_UNEXPIRED].value.ui64
#define ct_untimeouts_executing \
ct_kstat_data[CALLOUT_UNTIMEOUTS_EXECUTING].value.ui64
#define ct_untimeouts_expired \
ct_kstat_data[CALLOUT_UNTIMEOUTS_EXPIRED].value.ui64
#define ct_expirations \
ct_kstat_data[CALLOUT_EXPIRATIONS].value.ui64
#define ct_allocations \
ct_kstat_data[CALLOUT_ALLOCATIONS].value.ui64
#define CALLOUT_CHUNK 128
#define CALLOUT_HEAP_PARENT(index) (((index) - 1) >> 1)
#define CALLOUT_HEAP_RIGHT(index) (((index) + 1) << 1)
#define CALLOUT_HEAP_LEFT(index) ((((index) + 1) << 1) - 1)
#define CALLOUT_CYCLIC_HANDLER(t) \
((t == CALLOUT_REALTIME) ? callout_realtime : callout_normal)
/*
* We define a blanket minimum resolution for callouts of 1 millisecond.
* 1 millisecond is a safe value as it is already supported when the clock
* resolution is set to high.
*/
#define CALLOUT_MIN_RESOLUTION 1000000ULL
#define CALLOUT_TCP_RESOLUTION 10000000ULL
#define CALLOUT_ALIGN 64 /* cache line size */
#ifdef _LP64
#define CALLOUT_MAX_TICKS NSEC_TO_TICK(CY_INFINITY);
#else
#define CALLOUT_MAX_TICKS LONG_MAX
#endif
extern void callout_init(void);
extern void membar_sync(void);
extern void callout_cpu_online(cpu_t *);
extern void callout_cpu_offline(cpu_t *);
extern void callout_hrestime(void);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_CALLO_H */