cpu_idle.c revision f34a71784df3fbc5d1227a7b6201fd318ad1667e
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2009, Intel Corporation.
* All rights reserved.
*/
#include <sys/x86_archext.h>
#include <sys/machsystm.h>
#include <sys/cpu_acpi.h>
#include <sys/cpu_idle.h>
#include <sys/archsystm.h>
#define CSTATE_USING_HPET 1
#define CSTATE_USING_LAT 2
extern void cpu_idle_adaptive(void);
static int cpu_idle_init(cpu_t *);
static void cpu_idle_fini(cpu_t *);
/*
* the flag of always-running local APIC timer.
* the flag of HPET Timer use in deep cstate.
*/
/*
* Interfaces for modules implementing Intel's deep c-state.
*/
"Generic ACPI C-state Support",
};
static kmutex_t cpu_idle_callb_mutex;
static callb_id_t cpu_deep_idle_callb_id;
static callb_id_t cpu_idle_cpr_callb_id;
static uint_t cpu_idle_cfg_state;
static kmutex_t cpu_idle_mutex;
{ "address_space_id", KSTAT_DATA_STRING },
{ "latency", KSTAT_DATA_UINT32 },
{ "power", KSTAT_DATA_UINT32 },
};
/*
* kstat update function of the c-state info
*/
static int
{
if (flag == KSTAT_WRITE) {
return (EACCES);
}
"FFixedHW");
"SystemIO");
} else {
"Unsupported");
}
return (0);
}
/*
* Used during configuration callbacks to manage implementation specific
* details of the hardware timer used during Deep C-state.
*/
{
if (cpu_cstate_arat) {
return (B_TRUE);
} else if (cpu_cstate_hpet) {
}
return (B_FALSE);
}
/*
* Some Local APIC Timers do not work during Deep C-states.
* The Deep C-state idle function uses this function to ensure it is using a
* hardware timer that works during Deep C-states. This function also
* switches the timer back to the LACPI Timer after Deep C-state.
*/
static boolean_t
{
if (cpu_cstate_arat)
return (B_TRUE);
/*
* We have to return B_FALSE if no arat or hpet support
*/
if (!cpu_cstate_hpet)
return (B_FALSE);
switch (timer) {
case CSTATE_USING_HPET:
case CSTATE_USING_LAT:
return (B_TRUE);
default:
return (B_FALSE);
}
}
/*
* c-state wakeup function.
* Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals
* with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State.
*/
void
{
/*
* Clear the halted bit for that CPU since it will be woken up
* in a moment.
*/
/*
* Clear the halted bit for that CPU since it will be
* poked in a moment.
*/
/*
* We may find the current CPU present in the halted cpuset
* if we're in the context of an interrupt that occurred
* before we had a chance to clear our bit in cpu_idle().
* Waking ourself is obviously unnecessary, since if
* we're here, we're not halted.
*/
/*
* Use correct wakeup mechanism
*/
if ((mcpu_mwait != NULL) &&
(*mcpu_mwait == MWAIT_HALTED))
else
}
return;
} else {
/*
* This cpu isn't halted, but it's idle or undergoing a
* context switch. No need to awaken anyone else.
*/
return;
}
/*
* No need to wake up other CPUs if the thread we just enqueued
* is bound.
*/
if (bound)
return;
/*
* See if there's any other halted CPUs. If there are, then
* select one, and awaken it.
* It's possible that after we find a CPU, somebody else
* will awaken it before we get the chance.
* In that case, look again.
*/
do {
return;
cpu_found) < 0);
/*
* Must use correct wakeup mechanism to avoid lost wakeup of
* alternate cpu.
*/
else
}
}
/*
* enter deep c-state handler
*/
static void
{
int hset_update = 1;
/*
* Set our mcpu_mwait here, so we can tell if anyone tries to
* wake us between now and when we call mwait. No other cpu will
* attempt to set our mcpu_mwait until we add ourself to the haltset.
*/
if (mcpu_mwait) {
if (type == ACPI_ADR_SPACE_SYSTEM_IO)
else
}
/*
* If this CPU is online, and there are multiple CPUs
* in the system, then we should note our halting
* by adding ourselves to the partition's halted CPU
* work becomes available.
*/
hset_update = 0;
/*
* Add ourselves to the partition's halted CPUs bitmask
* and set our HALTED flag, if necessary.
*
* When a thread becomes runnable, it is placed on the queue
* and then the halted cpuset is checked to determine who
* (if anyone) should be awakened. We therefore need to first
* add ourselves to the halted cpuset, and and then check if there
* is any work available.
*
* Note that memory barriers after updating the HALTED flag
* are not necessary since an atomic operation (updating the bitmap)
* immediately follows. On x86 the atomic operation acts as a
* memory barrier for the update of cpu_disp_flags.
*/
if (hset_update) {
}
/*
* Check to make sure there's really nothing to do.
* Work destined for this CPU may become available after
* this check. We'll be notified through the clearing of our
* bit in the halted CPU bitmask, and a write to our mcpu_mwait.
*
* disp_anywork() checks disp_nrunnable, so we do not have to later.
*/
if (disp_anywork()) {
if (hset_update) {
}
return;
}
/*
* We're on our way to being halted.
*
* The local APIC timer can stop in ACPI C2 and deeper c-states.
* Try to program the HPET hardware to substitute for this CPU's
* LAPIC timer.
* cstate_use_timer() could disable the LAPIC Timer. Make sure
* to start the LAPIC Timer again before leaving this function.
*
* Disable interrupts here so we will awaken immediately after halting
* if someone tries to poke us between now and the time we actually
* halt.
*/
cli();
/*
* We check for the presence of our bit after disabling interrupts.
* If it's cleared, we'll return. If the bit is cleared after
* we check then the cstate_wakeup() will pop us out of the halted
* state.
*
* This means that the ordering of the cstate_wakeup() and the clearing
* of the bit by cpu_wakeup is important.
* cpu_wakeup() must clear our mc_haltset bit, and then call
* cstate_wakeup().
* acpi_cpu_cstate() must disable interrupts, then check for the bit.
*/
(void) cstate_use_timer(&lapic_expire,
sti();
return;
}
/*
* The check for anything locally runnable is here for performance
* and isn't needed for correctness. disp_nrunnable ought to be
* in our cache still, so it's inexpensive to check, and if there
* is anything runnable we won't have to wait for the poke.
*/
(void) cstate_use_timer(&lapic_expire,
sti();
if (hset_update) {
}
return;
}
if (using_timer == B_FALSE) {
(void) cstate_use_timer(&lapic_expire,
sti();
/*
* We are currently unable to program the HPET to act as this
* CPU's proxy LAPIC timer. This CPU cannot enter C2 or deeper
* because no timer is set to wake it up while its LAPIC timer
* stalls in deep C-States.
* Enter C1 instead.
*
* cstate_wake_cpu() will wake this CPU with an IPI which
* works with MWAIT.
*/
i86_monitor(mcpu_mwait, 0, 0);
i86_mwait(0, 0);
tlb_service();
}
/*
* We're no longer halted
*/
if (hset_update) {
}
return;
}
if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) {
/*
* We're on our way to being halted.
* To avoid a lost wakeup, arm the monitor before checking
* if another cpu wrote to mcpu_mwait to wake us up.
*/
i86_monitor(mcpu_mwait, 0, 0);
if (*mcpu_mwait == MWAIT_HALTED) {
tlb_service();
}
} else if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
&value, 8);
(void) cpu_acpi_read_port(
tlb_service();
}
}
/*
* The LAPIC timer may have stopped in deep c-state.
* Reprogram this CPU's LAPIC here before enabling interrupts.
*/
sti();
/*
* We're no longer halted
*/
if (hset_update) {
}
}
/*
* indicate when bus masters are active
*/
static uint32_t
cpu_acpi_bm_sts(void)
{
if (bm_sts)
return (bm_sts);
}
/*
* Idle the present CPU, deep c-state is supported
*/
void
cpu_acpi_idle(void)
{
int cpu_max_cstates;
if (cpu_max_cstates > CPU_MAX_CSTATES)
(*non_deep_idle_cpu)();
return;
}
start = gethrtime_unscaled();
/*
* OSPM uses the BM_STS bit to determine the power state to enter
* if C3 is determined, bus master activity demotes the power state
* to C2.
*/
--cs_indx;
/*
* BM_RLD determines if the Cx power state was exited as a result of
* bus master requests. Set this bit when using a C3 power state, and
* clear it when using a C1 or C2 power state.
*/
}
(cs_type >= CPU_ACPI_C3)) {
}
switch (cs_type) {
default:
/* FALLTHROUGH */
case CPU_ACPI_C1:
(*non_deep_idle_cpu)();
break;
case CPU_ACPI_C2:
break;
case CPU_ACPI_C3:
/*
* recommended in ACPI spec, providing hardware mechanisms
* to prevent master from writing to memory (UP-only)
*/
if ((ncpus_online == 1) &&
/*
* Today all Intel's processor support C3 share cache.
*/
} else if (x86_vendor != X86_VENDOR_Intel) {
}
}
break;
}
end = gethrtime_unscaled();
/*
* Update statistics
*/
}
{
extern int idle_cpu_no_deep_c;
if (idle_cpu_no_deep_c)
return (B_FALSE);
if (!cpuid_deep_cstates_supported())
return (B_FALSE);
if (cpuid_arat_supported()) {
return (B_TRUE);
}
hpet.install_proxy()) {
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Validate that this processor supports deep cstate and if so,
* get the c-state data from ACPI and cache it.
*/
static int
{
char name[KSTAT_STRLEN];
int cpu_max_cstates, i;
/*
* Cache the C-state specific ACPI data.
*/
if (cpu_acpi_cache_cstate_data(handle) != 0) {
"!cpu_idle_init: Failed to cache ACPI C-state data\n");
return (-1);
}
/*
* Check the bus master arbitration control ability.
*/
for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
/*
* Allocate, initialize and install cstate kstat
*/
name, "misc",
sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
} else {
cstate++;
}
}
if (cpu_deep_cstates_supported()) {
if (cpu_deep_idle_callb_id == (callb_id_t)0)
if (cpu_idle_cpr_callb_id == (callb_id_t)0)
}
return (0);
}
/*
* Free resources allocated by cpu_idle_init().
*/
static void
{
/*
* idle cpu points back to the generic one
*/
if (cstate) {
for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
cstate++;
}
}
if (cpu_deep_idle_callb_id != (callb_id_t)0) {
(void) callb_delete(cpu_deep_idle_callb_id);
}
if (cpu_idle_cpr_callb_id != (callb_id_t)0) {
(void) callb_delete(cpu_idle_cpr_callb_id);
}
}
/*ARGSUSED*/
static boolean_t
{
switch (code) {
case PM_DEFAULT_CPU_DEEP_IDLE:
/*
* Default policy is same as enable
*/
/*FALLTHROUGH*/
case PM_ENABLE_CPU_DEEP_IDLE:
if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0)
break;
} else {
}
break;
case PM_DISABLE_CPU_DEEP_IDLE:
break;
}
break;
default:
code);
break;
}
return (rslt);
}
/*ARGSUSED*/
static boolean_t
{
switch (code) {
case CB_CODE_CPR_RESUME:
/*
* Do not enable dispatcher hooks if disabled by user.
*/
break;
} else {
}
break;
case CB_CODE_CPR_CHKPT:
(void) cstate_timer_callback(CB_CODE_CPR_CHKPT);
break;
default:
break;
}
return (rslt);
}
/*
* handle _CST notification
*/
void
{
#ifndef __xpv
int result = 0;
if (mach_state == NULL) {
return;
}
/*
* Do for all the CPU's in the domain
*/
do {
if (cpu_id == CPUSET_NOTINSET)
break;
mach_state = (cpupm_mach_state_t *)
return;
}
/*
* re-evaluate cstate object
*/
if (cpu_acpi_cache_cstate_data(handle) != 0) {
" object Instance: %d", cpu_id);
}
(void) cstate_timer_callback(
}
} while (result < 0);
#endif
}
/*
* handle the number or the type of available processor power states change
*/
void
cpuidle_manage_cstates(void *ctx)
{
if (mach_state == NULL) {
return;
}
/*
* We currently refuse to power manage if the CPU is not ready to
* take cross calls (cross calls fail silently if CPU is not ready
* for it).
*
* Additionally, for x86 platforms we cannot power manage
* any one instance, until all instances have been initialized.
* That's because we don't know what the CPU domains look like
* until all instances have been initialized.
*/
if (!is_ready)
return;
}