/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
/*
* Introduction
* This file implements a CPU event notification mechanism to signal clients
* which are interested in CPU related events.
* Currently it only supports CPU idle state change events which will be
* triggered just before CPU entering hardware idle state and just after CPU
* wakes up from hardware idle state.
* Please refer to PSARC/2009/115 for detail information.
*
* Lock Strategy
* 1) cpu_idle_prop_busy/free are protected by cpu_idle_prop_lock.
* 2) No protection for cpu_idle_cb_state because it's per-CPU data.
* 3) cpu_idle_cb_busy is protected by cpu_idle_cb_lock.
* 4) cpu_idle_cb_array is protected by pause_cpus/start_cpus logic.
* 5) cpu_idle_cb_max/curr are protected by both cpu_idle_cb_lock and
* pause_cpus/start_cpus logic.
* We have optimized the algorithm for hot path on read side access.
* In the current algorithm, it's lock free on read side access.
* On write side, we use pause_cpus() to keep other CPUs in the pause thread,
* which will guarantee that no other threads will access
* cpu_idle_cb_max/curr/array data structure.
*/
#include <sys/machcpuvar.h>
#include <sys/sysmacros.h>
#if defined(__sparc)
#include <sys/machsystm.h>
#include <sys/archsystm.h>
#endif
#include <sys/cpu_event.h>
/* Define normal state for CPU on different platforms. */
#if defined(__x86)
/*
* At the time of this implementation IDLE_STATE_NORMAL is defined
* in mach_startup.c, and not in a header file. So if we find it is
* undefined, then we set it to the value as defined in mach_startup.c
* Should it eventually be defined, we will pick it up.
*/
#ifndef IDLE_STATE_NORMAL
#define IDLE_STATE_NORMAL 0
#endif
#endif
/*
* To improve cache efficiency and avoid cache false sharing, CPU idle
* properties are grouped into cache lines as below:
* | CPU0 | CPU1 |.........| CPUn |
* | cache line 0 | cache line 1 |.........| cache line n |
* | v0 | ... | vm | v0 | ... | vm |.........| v0 | ... | vm |
* To access value of property m for CPU n, using following value as index:
* index = seq_id_of_CPUn * CPU_IDLE_VALUE_GROUP_SIZE + m.
*/
#define CPU_IDLE_VALUE_GROUP_SIZE \
(CPU_CACHE_COHERENCE_SIZE / sizeof (cpu_idle_prop_value_t))
/* Get callback context handle for current CPU. */
/* Get CPU sequential id from ctx. */
/* Compute index from callback context handle. */
/*
* When cpu_idle_cb_array is NULL or full, increase CPU_IDLE_ARRAY_CAPACITY_INC
* entries every time. Here we prefer linear growth instead of exponential.
*/
typedef struct cpu_idle_prop_impl {
char *name;
void *private;
typedef struct cpu_idle_prop_item {
char *name;
void *arg;
/* Structure to maintain registered callbacks in list. */
typedef struct cpu_idle_cb_impl {
void *argument;
int priority;
/*
* Structure to maintain registered callbacks in priority order and also
* optimized for cache efficiency for reading access.
*/
typedef struct cpu_idle_cb_item {
void *arg;
/* Per-CPU state aligned to CPU_CACHE_COHERENCE_SIZE to avoid false sharing. */
typedef union cpu_idle_cb_state {
struct {
/* Index of already invoked callbacks. */
int index;
/* Invoke registered callbacks if true. */
/* Property values are valid if true. */
/* Pointers to per-CPU properties. */
} v;
#ifdef _LP64
#else
#endif
static int cpu_idle_cb_curr = 0;
static int cpu_idle_cb_max = 0;
#ifdef __x86
/*
* cpuset used to intercept CPUs before powering them off.
* The control CPU sets the bit corresponding to the target CPU and waits
* until the bit is cleared.
* The target CPU disables interrupts before clearing corresponding bit and
* then loops for ever.
*/
#endif
{
},
{
},
{
},
{
},
{
},
{
},
{
},
{
},
};
#define CPU_IDLE_PROP_IDX_IDLE_STATE 0
/*ARGSUSED*/
static void
{
int state;
}
/*ARGSUSED*/
static void
{
}
};
extern void tlb_going_idle(void);
extern void tlb_service(void);
};
#endif
void
cpu_event_init(void)
{
int i, idx;
/* Create internal properties. */
for (i = 0, ip = cpu_idle_prop_array;
i < sizeof (cpu_idle_prop_array) / sizeof (cpu_idle_prop_array[0]);
i++, ip++) {
}
/* Allocate buffer and align to CPU_CACHE_COHERENCE_SIZE. */
/* Cache frequently used property value pointers. */
idx = CPU_IDLE_CTX2IDX(i);
#define ___INIT_P(f, i) \
}
/* Register built-in callbacks. */
"cpu_idle: failed to register callback for dtrace.");
}
"cpu_idle: failed to register callback for tlb_flush.");
}
#endif
}
/*
* This function is called to initialize per CPU state when starting CPUs.
*/
void
{
}
/*
* This function is called to clean up per CPU state when stopping CPUs.
*/
void
{
}
static void
{
/*
* Expand array if it's full.
* Memory must be allocated out of pause/start_cpus() scope because
* kmem_zalloc() can't be called with KM_SLEEP flag within that scope.
*/
if (cpu_idle_cb_curr == cpu_idle_cb_max) {
sizeof (cpu_idle_cb_item_t), KM_SLEEP);
}
/* Try to acquire cpu_lock if not held yet. */
if (!MUTEX_HELD(&cpu_lock)) {
unlock = 1;
}
/*
* Pause all other CPUs (and let them run pause thread).
* It's guaranteed that no other threads will access cpu_idle_cb_array
* after pause_cpus().
*/
if (!cpus_paused()) {
unpause = 1;
}
/* Copy content to new buffer if needed. */
buf_old = (char *)cpu_idle_cb_array;
sizeof (cpu_idle_cb_item_t) * cnt_old);
}
}
/* Insert into array according to priority. */
for (i = cpu_idle_cb_curr; i > 0; i--) {
break;
}
}
/* Resume other CPUs from paused state if needed. */
if (unpause) {
start_cpus();
}
if (unlock) {
}
/* Free old resource if needed. */
}
}
static void
{
int i, found = 0;
/* Try to acquire cpu_lock if not held yet. */
if (!MUTEX_HELD(&cpu_lock)) {
unlock = 1;
}
/*
* Pause all other CPUs.
* It's guaranteed that no other threads will access cpu_idle_cb_array
* after pause_cpus().
*/
if (!cpus_paused()) {
unpause = 1;
}
/* Remove cip from array. */
for (i = 0; i < cpu_idle_cb_curr; i++) {
if (found == 0) {
found = 1;
}
} else {
}
}
/*
* Reset property ready flag for all CPUs if no registered callback
* left because cpu_idle_enter/exit will stop updating property if
* there's no callback registered.
*/
if (cpu_idle_cb_curr == 0) {
}
}
/* Resume other CPUs from paused state if needed. */
if (unpause) {
start_cpus();
}
if (unlock) {
}
}
int
{
/* First validate parameters. */
"!cpu_event: register_callback called from callback.");
return (EBUSY);
"!cpu_event: NULL parameters in register_callback.");
return (EINVAL);
} else if (prio < CPU_IDLE_CB_PRIO_LOW_BASE ||
"!cpu_event: priority 0x%x out of range.", prio);
return (EINVAL);
"!cpu_event: callback version %d is not supported.",
return (EINVAL);
}
/* Check whether callback with priority exists if not dynamic. */
if (prio != CPU_IDLE_CB_PRIO_DYNAMIC) {
"priority 0x%x already exists.", prio);
return (EEXIST);
}
}
}
return (0);
}
int
{
"!cpu_event: unregister_callback called from callback.");
return (EBUSY);
"!cpu_event: hdl is NULL in unregister_callback.");
return (EINVAL);
}
rc = 0;
break;
}
}
if (rc == 0) {
} else {
"!cpu_event: callback handle %p not found.", (void *)hdl);
}
return (rc);
}
static int
{
return (0);
}
return (1);
}
static void
{
}
/*ARGSUSED*/
int
{
int i;
#if defined(__x86)
#endif
#if defined(__x86)
/* Intercept CPU at a safe point before powering off it. */
iflags = intr_clear();
/*CONSTCOND*/
while (1) {
SMT_PAUSE();
}
}
#endif
return (0);
}
/*
* On x86, cpu_idle_enter can be called from idle thread with either
* interrupts enabled or disabled, so we need to make sure interrupts
* are disabled here.
* On SPARC, cpu_idle_enter will be called from idle thread with
* interrupt disabled, so no special handling necessary.
*/
#if defined(__x86)
iflags = intr_clear();
#endif
/* Skip calling callback if state is not ready for current CPU. */
#if defined(__x86)
#endif
return (0);
}
/*
* Increase index so corresponding idle_exit callback
* will be invoked should interrupt happen during
* idle_enter callback.
*/
/* Call idle_enter callback function if it's not NULL. */
/*
* cpu_idle_enter runs with interrupts
* disabled, so the idle_enter callbacks will
* also be called with interrupts disabled.
* It is permissible for the callbacks to
* enable the interrupts, if they can also
* handle the condition if the interrupt
* occurs.
*
* However, if an interrupt occurs and we
* return here without dealing with it, we
* return to the cpu_idle_enter() caller
* with an EBUSY, and the caller will not
* enter the idle state.
*
* We detect the interrupt, by checking the
* index value of the state pointer. If it
* is not the index we incremented above,
* then it was cleared while processing
* the interrupt.
*
* Also note, that at this point of the code
* the normal index value will be one greater
* than the variable 'i' in the loop, as it
* hasn't yet been incremented.
*/
#if defined(__x86)
#endif
return (EBUSY);
}
}
}
#if defined(__x86)
#endif
return (0);
}
void
{
int i;
#if defined(__x86)
#endif
#if defined(__sparc)
/*
* On SPARC, cpu_idle_exit will only be called from idle thread
* with interrupt disabled.
*/
cip = &cpu_idle_cb_array[i];
}
}
}
/*
* On x86, cpu_idle_exit will be called from idle thread or interrupt
* handler. When called from interrupt handler, interrupts will be
* disabled. When called from idle thread, interrupts may be disabled
* or enabled.
*/
/* Called from interrupt, interrupts are already disabled. */
if (flag & CPU_IDLE_CB_FLAG_INTR) {
/*
* return if cpu_idle_exit already called or
* there is no registered callback.
*/
return;
}
cip = &cpu_idle_cb_array[i];
}
}
/* Called from idle thread, need to disable interrupt. */
} else {
iflags = intr_clear();
cip = &cpu_idle_cb_array[i];
}
}
}
}
#endif
}
cpu_idle_get_context(void)
{
return (CPU_IDLE_GET_CTX(CPU));
}
/*
* Allocate property structure in group of CPU_IDLE_VALUE_GROUP_SIZE to improve
* cache efficiency. To simplify implementation, allocated memory for property
* structure won't be freed.
*/
static void
{
int i;
KM_SLEEP);
}
}
int
{
"!cpu_event: NULL parameters in create_property.");
return (EINVAL);
}
"!cpu_event: property %s already exists.", name);
break;
}
}
if (cpu_idle_prop_free == NULL) {
}
rc = 0;
}
return (rc);
}
int
{
"!cpu_event: hdl is NULL in destroy_property.");
return (EINVAL);
}
rc = 0;
} else {
}
break;
}
}
return (rc);
}
int
{
"!cpu_event: NULL parameters in create_handle.");
return (EINVAL);
}
/* Hold one refcount on object. */
rc = 0;
break;
}
}
return (rc);
}
int
{
"!cpu_event: hdl is NULL in destroy_handle.");
return (EINVAL);
}
/* Release refcnt held in create_handle. */
rc = 0;
break;
}
}
return (rc);
}
{
}
const char *
{
}
int
{
return (EINVAL);
}
/* CPU's idle enter timestamp as sequence number. */
}
if (rc == 0) {
}
return (rc);
}
{
int idx;
}
{
int idx;
}
{
int idx;
}
{
int idx;
}
void
{
int idx;
}
void
{
int i, idx;
for (i = 0; i < max_ncpus; i++) {
idx = CPU_IDLE_CTX2IDX(i);
}
}
/*ARGSUSED*/
{
int i;
}
return (0);
}
{
return ((uint_t)cpu_idle_prop_get_uint32(
CPU_IDLE_GET_CTX(cp)));
}
#if defined(__x86)
/*
* Intercept CPU at a safe point in idle() before powering it off.
*/
void
{
/* Set flag to intercept CPU. */
/* Wake up CPU from possible sleep state. */
DELAY(1);
}
/*
* Now target CPU is spinning in a pause loop with interrupts disabled.
*/
}
#endif