cpu_idle.c revision 444f66e774d0e4f449866c7f5e64095f2fb0def0
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2009, Intel Corporation.
* All rights reserved.
*/
#include <sys/x86_archext.h>
#include <sys/machsystm.h>
#include <sys/cpu_acpi.h>
#include <sys/cpu_idle.h>
#include <sys/cpu_event.h>
#include <sys/archsystm.h>
#define CSTATE_USING_HPET 1
#define CSTATE_USING_LAT 2
extern void cpu_idle_adaptive(void);
static int cpu_idle_init(cpu_t *);
static void cpu_idle_fini(cpu_t *);
static void cpu_idle_stop(cpu_t *);
/*
* the flag of always-running local APIC timer.
* the flag of HPET Timer use in deep cstate.
*/
/*
* Interfaces for modules implementing Intel's deep c-state.
*/
"Generic ACPI C-state Support",
NULL,
};
static kmutex_t cpu_idle_callb_mutex;
static callb_id_t cpu_deep_idle_callb_id;
static callb_id_t cpu_idle_cpr_callb_id;
static uint_t cpu_idle_cfg_state;
static kmutex_t cpu_idle_mutex;
{ "address_space_id", KSTAT_DATA_STRING },
{ "latency", KSTAT_DATA_UINT32 },
{ "power", KSTAT_DATA_UINT32 },
};
/*
* kstat update function of the c-state info
*/
static int
{
if (flag == KSTAT_WRITE) {
return (EACCES);
}
"FFixedHW");
"SystemIO");
} else {
"Unsupported");
}
return (0);
}
/*
* Used during configuration callbacks to manage implementation specific
* details of the hardware timer used during Deep C-state.
*/
{
if (cpu_cstate_arat) {
return (B_TRUE);
} else if (cpu_cstate_hpet) {
}
return (B_FALSE);
}
/*
* Some Local APIC Timers do not work during Deep C-states.
* The Deep C-state idle function uses this function to ensure it is using a
* hardware timer that works during Deep C-states. This function also
* switches the timer back to the LACPI Timer after Deep C-state.
*/
static boolean_t
{
if (cpu_cstate_arat)
return (B_TRUE);
/*
* We have to return B_FALSE if no arat or hpet support
*/
if (!cpu_cstate_hpet)
return (B_FALSE);
switch (timer) {
case CSTATE_USING_HPET:
case CSTATE_USING_LAT:
return (B_TRUE);
default:
return (B_FALSE);
}
}
/*
* c-state wakeup function.
* Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals
* with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State.
*/
void
{
/*
* Clear the halted bit for that CPU since it will be woken up
* in a moment.
*/
/*
* Clear the halted bit for that CPU since it will be
* poked in a moment.
*/
/*
* We may find the current CPU present in the halted cpuset
* if we're in the context of an interrupt that occurred
* before we had a chance to clear our bit in cpu_idle().
* Waking ourself is obviously unnecessary, since if
* we're here, we're not halted.
*/
/*
* Use correct wakeup mechanism
*/
if ((mcpu_mwait != NULL) &&
(*mcpu_mwait == MWAIT_HALTED))
else
}
return;
} else {
/*
* This cpu isn't halted, but it's idle or undergoing a
* context switch. No need to awaken anyone else.
*/
return;
}
/*
* No need to wake up other CPUs if the thread we just enqueued
* is bound.
*/
if (bound)
return;
/*
* See if there's any other halted CPUs. If there are, then
* select one, and awaken it.
* It's possible that after we find a CPU, somebody else
* will awaken it before we get the chance.
* In that case, look again.
*/
do {
return;
cpu_found) < 0);
/*
* Must use correct wakeup mechanism to avoid lost wakeup of
* alternate cpu.
*/
else
}
}
/*
* Function called by CPU idle notification framework to check whether CPU
* has been awakened. It will be called with interrupt disabled.
* If CPU has been awakened, call cpu_idle_exit() to notify CPU idle
* notification framework.
*/
static void
acpi_cpu_mwait_check_wakeup(void *arg)
{
if (*mcpu_mwait != MWAIT_HALTED) {
/*
* CPU has been awakened, notify CPU idle notification system.
*/
} else {
/*
* Toggle interrupt flag to detect pending interrupts.
* If interrupt happened, do_interrupt() will notify CPU idle
* notification framework so no need to call cpu_idle_exit()
* here.
*/
sti();
SMT_PAUSE();
cli();
}
}
static void
{
if (*mcpu_mwait != MWAIT_WAKEUP_IPI) {
/*
* CPU has been awakened, notify CPU idle notification system.
*/
} else {
/*
* Toggle interrupt flag to detect pending interrupts.
* If interrupt happened, do_interrupt() will notify CPU idle
* notification framework so no need to call cpu_idle_exit()
* here.
*/
sti();
SMT_PAUSE();
cli();
}
}
/*ARGSUSED*/
static void
acpi_cpu_check_wakeup(void *arg)
{
/*
* Toggle interrupt flag to detect pending interrupts.
* If interrupt happened, do_interrupt() will notify CPU idle
* notification framework so no need to call cpu_idle_exit() here.
*/
sti();
SMT_PAUSE();
cli();
}
/*
* enter deep c-state handler
*/
static void
{
int hset_update = 1;
/*
* Set our mcpu_mwait here, so we can tell if anyone tries to
* wake us between now and when we call mwait. No other cpu will
* attempt to set our mcpu_mwait until we add ourself to the haltset.
*/
if (mcpu_mwait) {
if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
} else {
}
}
/*
* If this CPU is online, and there are multiple CPUs
* in the system, then we should note our halting
* by adding ourselves to the partition's halted CPU
* work becomes available.
*/
hset_update = 0;
/*
* Add ourselves to the partition's halted CPUs bitmask
* and set our HALTED flag, if necessary.
*
* When a thread becomes runnable, it is placed on the queue
* and then the halted cpuset is checked to determine who
* (if anyone) should be awakened. We therefore need to first
* add ourselves to the halted cpuset, and and then check if there
* is any work available.
*
* Note that memory barriers after updating the HALTED flag
* are not necessary since an atomic operation (updating the bitmap)
* immediately follows. On x86 the atomic operation acts as a
* memory barrier for the update of cpu_disp_flags.
*/
if (hset_update) {
}
/*
* Check to make sure there's really nothing to do.
* Work destined for this CPU may become available after
* this check. We'll be notified through the clearing of our
* bit in the halted CPU bitmask, and a write to our mcpu_mwait.
*
* disp_anywork() checks disp_nrunnable, so we do not have to later.
*/
if (disp_anywork()) {
if (hset_update) {
}
return;
}
/*
* We're on our way to being halted.
*
* The local APIC timer can stop in ACPI C2 and deeper c-states.
* Try to program the HPET hardware to substitute for this CPU's
* LAPIC timer.
* cstate_use_timer() could disable the LAPIC Timer. Make sure
* to start the LAPIC Timer again before leaving this function.
*
* Disable interrupts here so we will awaken immediately after halting
* if someone tries to poke us between now and the time we actually
* halt.
*/
cli();
/*
* We check for the presence of our bit after disabling interrupts.
* If it's cleared, we'll return. If the bit is cleared after
* we check then the cstate_wakeup() will pop us out of the halted
* state.
*
* This means that the ordering of the cstate_wakeup() and the clearing
* of the bit by cpu_wakeup is important.
* cpu_wakeup() must clear our mc_haltset bit, and then call
* cstate_wakeup().
* acpi_cpu_cstate() must disable interrupts, then check for the bit.
*/
(void) cstate_use_timer(&lapic_expire,
sti();
return;
}
/*
* The check for anything locally runnable is here for performance
* and isn't needed for correctness. disp_nrunnable ought to be
* in our cache still, so it's inexpensive to check, and if there
* is anything runnable we won't have to wait for the poke.
*/
(void) cstate_use_timer(&lapic_expire,
sti();
if (hset_update) {
}
return;
}
if (using_timer == B_FALSE) {
(void) cstate_use_timer(&lapic_expire,
sti();
/*
* We are currently unable to program the HPET to act as this
* CPU's proxy LAPIC timer. This CPU cannot enter C2 or deeper
* because no timer is set to wake it up while its LAPIC timer
* stalls in deep C-States.
* Enter C1 instead.
*
* cstate_wake_cpu() will wake this CPU with an IPI which
* works with MWAIT.
*/
i86_monitor(mcpu_mwait, 0, 0);
if (cpu_idle_enter(IDLE_STATE_C1, 0,
check_func, (void *)mcpu_mwait) == 0) {
if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) ==
MWAIT_HALTED) {
i86_mwait(0, 0);
}
}
}
/*
* We're no longer halted
*/
if (hset_update) {
}
return;
}
if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) {
/*
* We're on our way to being halted.
* To avoid a lost wakeup, arm the monitor before checking
* if another cpu wrote to mcpu_mwait to wake us up.
*/
i86_monitor(mcpu_mwait, 0, 0);
if (*mcpu_mwait == MWAIT_HALTED) {
check_func, (void *)mcpu_mwait) == 0) {
if (*mcpu_mwait == MWAIT_HALTED) {
}
}
}
} else if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
check_func, (void *)mcpu_mwait) == 0) {
if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
(void) cpu_acpi_read_port(
(void) cpu_acpi_read_port(
&value, 32);
}
}
}
}
/*
* The LAPIC timer may have stopped in deep c-state.
* Reprogram this CPU's LAPIC here before enabling interrupts.
*/
sti();
/*
* We're no longer halted
*/
if (hset_update) {
}
}
/*
* Idle the present CPU, deep c-state is supported
*/
void
cpu_acpi_idle(void)
{
int cpu_max_cstates;
if (cpu_max_cstates > CPU_MAX_CSTATES)
(*non_deep_idle_cpu)();
return;
}
start = gethrtime_unscaled();
switch (cs_type) {
default:
/* FALLTHROUGH */
case CPU_ACPI_C1:
(*non_deep_idle_cpu)();
break;
case CPU_ACPI_C2:
break;
case CPU_ACPI_C3:
/*
* All supported Intel processors maintain cache coherency
* during C3. Currently when entering C3 processors flush
* core caches to higher level shared cache. The shared cache
* maintains state and supports probes during C3.
* Consequently there is no need to handle cache coherency
* and Bus Master activity here with the cache flush, BM_RLD
* bit, BM_STS bit, nor PM2_CNT.ARB_DIS mechanisms described
* in section 8.1.4 of the ACPI Specification 4.0.
*/
break;
}
end = gethrtime_unscaled();
/*
* Update statistics
*/
}
{
extern int idle_cpu_no_deep_c;
if (idle_cpu_no_deep_c)
return (B_FALSE);
if (!cpuid_deep_cstates_supported())
return (B_FALSE);
if (cpuid_arat_supported()) {
return (B_TRUE);
}
hpet.install_proxy()) {
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Validate that this processor supports deep cstate and if so,
* get the c-state data from ACPI and cache it.
*/
static int
{
char name[KSTAT_STRLEN];
int cpu_max_cstates, i;
int ret;
/*
* Cache the C-state specific ACPI data.
*/
if (ret < 0)
"!Support for CPU deep idle states is being "
"disabled due to errors parsing ACPI C-state "
"objects exported by BIOS.");
return (-1);
}
for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
/*
* Allocate, initialize and install cstate kstat
*/
name, "misc",
sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
} else {
cstate++;
}
}
if (cpu_deep_cstates_supported()) {
if (cpu_deep_idle_callb_id == (callb_id_t)0)
if (cpu_idle_cpr_callb_id == (callb_id_t)0)
/*
* All supported CPUs (Nehalem and later) will remain in C3
* during Bus Master activity.
* All CPUs set ACPI_BITREG_BUS_MASTER_RLD to 0 here if it
* is not already 0 before enabling Deeper C-states.
*/
if (value & 1)
}
return (0);
}
/*
* Free resources allocated by cpu_idle_init().
*/
static void
{
/*
* idle cpu points back to the generic one
*/
if (cstate) {
for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
cstate++;
}
}
if (cpu_deep_idle_callb_id != (callb_id_t)0) {
(void) callb_delete(cpu_deep_idle_callb_id);
}
if (cpu_idle_cpr_callb_id != (callb_id_t)0) {
(void) callb_delete(cpu_idle_cpr_callb_id);
}
}
static void
{
if (cstate) {
for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
cstate++;
}
}
}
/*ARGSUSED*/
static boolean_t
{
switch (code) {
case PM_DEFAULT_CPU_DEEP_IDLE:
/*
* Default policy is same as enable
*/
/*FALLTHROUGH*/
case PM_ENABLE_CPU_DEEP_IDLE:
if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0)
break;
} else {
}
break;
case PM_DISABLE_CPU_DEEP_IDLE:
break;
}
break;
default:
code);
break;
}
return (rslt);
}
/*ARGSUSED*/
static boolean_t
{
switch (code) {
case CB_CODE_CPR_RESUME:
/*
* Do not enable dispatcher hooks if disabled by user.
*/
break;
} else {
}
break;
case CB_CODE_CPR_CHKPT:
(void) cstate_timer_callback(CB_CODE_CPR_CHKPT);
break;
default:
break;
}
return (rslt);
}
/*
* handle _CST notification
*/
void
{
#ifndef __xpv
int result = 0;
if (mach_state == NULL) {
return;
}
/*
* Do for all the CPU's in the domain
*/
do {
if (cpu_id == CPUSET_NOTINSET)
break;
mach_state = (cpupm_mach_state_t *)
return;
}
/*
* re-evaluate cstate object
*/
if (cpu_acpi_cache_cstate_data(handle) != 0) {
" object Instance: %d", cpu_id);
}
(void) cstate_timer_callback(
}
} while (result < 0);
#endif
}
/*
* handle the number or the type of available processor power states change
*/
void
cpuidle_manage_cstates(void *ctx)
{
if (mach_state == NULL) {
return;
}
/*
* We currently refuse to power manage if the CPU is not ready to
* take cross calls (cross calls fail silently if CPU is not ready
* for it).
*
* Additionally, for x86 platforms we cannot power manage an instance,
* until it has been initialized.
*/
if (!is_ready)
return;
}