cpu.c revision 004231970c4b01e49120935d0c0158cfb2ebb647
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Architecture-independent CPU control functions.
*/
#include <sys/processor.h>
#include <sys/pool_pset.h>
#include <sys/archsystm.h>
#include <sys/x86_archext.h>
#endif
extern int mp_cpu_start(cpu_t *);
extern int mp_cpu_stop(cpu_t *);
extern int mp_cpu_poweron(cpu_t *);
extern int mp_cpu_poweroff(cpu_t *);
extern int mp_cpu_configure(int);
extern int mp_cpu_unconfigure(int);
extern void mp_cpu_faulted_enter(cpu_t *);
extern void mp_cpu_faulted_exit(cpu_t *);
#ifdef __sparcv9
#endif
/*
* cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active,
* and dispatch queue reallocations. The lock ordering with respect to
* related locks is:
*
* cpu_lock --> thread_free_lock ---> p_lock ---> thread_lock()
*
* Warning: Certain sections of code do not use the cpu_lock when
* traversing the cpu_list (e.g. mutex_vector_enter(), clock()). Since
* all cpus are paused during modifications to this list, a solution
* to protect the list is too either disable kernel preemption while
* walking the list, *or* recheck the cpu_next pointer at each
* iteration in the loop. Note that in no cases can any cached
* copies of the cpu pointers be kept as they may become invalid.
*/
/*
* max_ncpus keeps the max cpus the system can have. Initially
* it's NCPU, but since most archs scan the devtree for cpus
* fairly early on during boot, the real max can be known before
* ncpus is set (useful for early NCPU based allocations).
*/
/*
* platforms that set max_ncpus to maxiumum number of cpus that can be
* dynamically added will set boot_max_ncpus to the number of cpus found
* at device tree scan time during boot.
*/
int boot_max_ncpus = -1;
/*
* Maximum possible CPU id. This can never be >= NCPU since NCPU is
* used to size arrays that are indexed by CPU id.
*/
int ncpus = 1;
int ncpus_online = 1;
/*
* CPU that we're trying to offline. Protected by cpu_lock.
*/
/*
* Can be raised to suppress further weakbinding, which are instead
* while individual thread weakbinding synchronisation is done under thread
* lock.
*/
int weakbindingbarrier;
/*
* values for safe_list. Pause state that CPUs are in.
*/
#define PAUSE_IDLE 0 /* normal state */
/*
* Variables used in pause_cpus().
*/
static struct _cpu_pause_info {
int cp_spl; /* spl saved in pause_cpus() */
volatile int cp_go; /* Go signal sent after all ready */
int cp_count; /* # of CPUs to pause */
static kmutex_t pause_free_mutex;
static kcondvar_t pause_free_cv;
static struct cpu_sys_stats_ks_data {
{ "cpu_ticks_idle", KSTAT_DATA_UINT64 },
{ "cpu_ticks_user", KSTAT_DATA_UINT64 },
{ "cpu_ticks_kernel", KSTAT_DATA_UINT64 },
{ "cpu_ticks_wait", KSTAT_DATA_UINT64 },
{ "cpu_nsec_idle", KSTAT_DATA_UINT64 },
{ "cpu_nsec_user", KSTAT_DATA_UINT64 },
{ "cpu_nsec_kernel", KSTAT_DATA_UINT64 },
{ "wait_ticks_io", KSTAT_DATA_UINT64 },
{ "bread", KSTAT_DATA_UINT64 },
{ "bwrite", KSTAT_DATA_UINT64 },
{ "lread", KSTAT_DATA_UINT64 },
{ "lwrite", KSTAT_DATA_UINT64 },
{ "phread", KSTAT_DATA_UINT64 },
{ "phwrite", KSTAT_DATA_UINT64 },
{ "pswitch", KSTAT_DATA_UINT64 },
{ "trap", KSTAT_DATA_UINT64 },
{ "intr", KSTAT_DATA_UINT64 },
{ "syscall", KSTAT_DATA_UINT64 },
{ "sysread", KSTAT_DATA_UINT64 },
{ "syswrite", KSTAT_DATA_UINT64 },
{ "sysfork", KSTAT_DATA_UINT64 },
{ "sysvfork", KSTAT_DATA_UINT64 },
{ "sysexec", KSTAT_DATA_UINT64 },
{ "readch", KSTAT_DATA_UINT64 },
{ "writech", KSTAT_DATA_UINT64 },
{ "rcvint", KSTAT_DATA_UINT64 },
{ "xmtint", KSTAT_DATA_UINT64 },
{ "mdmint", KSTAT_DATA_UINT64 },
{ "rawch", KSTAT_DATA_UINT64 },
{ "canch", KSTAT_DATA_UINT64 },
{ "outch", KSTAT_DATA_UINT64 },
{ "msg", KSTAT_DATA_UINT64 },
{ "sema", KSTAT_DATA_UINT64 },
{ "namei", KSTAT_DATA_UINT64 },
{ "ufsiget", KSTAT_DATA_UINT64 },
{ "ufsdirblk", KSTAT_DATA_UINT64 },
{ "ufsipage", KSTAT_DATA_UINT64 },
{ "ufsinopage", KSTAT_DATA_UINT64 },
{ "procovf", KSTAT_DATA_UINT64 },
{ "intrthread", KSTAT_DATA_UINT64 },
{ "intrblk", KSTAT_DATA_UINT64 },
{ "intrunpin", KSTAT_DATA_UINT64 },
{ "idlethread", KSTAT_DATA_UINT64 },
{ "inv_swtch", KSTAT_DATA_UINT64 },
{ "nthreads", KSTAT_DATA_UINT64 },
{ "cpumigrate", KSTAT_DATA_UINT64 },
{ "xcalls", KSTAT_DATA_UINT64 },
{ "mutex_adenters", KSTAT_DATA_UINT64 },
{ "rw_rdfails", KSTAT_DATA_UINT64 },
{ "rw_wrfails", KSTAT_DATA_UINT64 },
{ "modload", KSTAT_DATA_UINT64 },
{ "modunload", KSTAT_DATA_UINT64 },
{ "bawrite", KSTAT_DATA_UINT64 },
{ "iowait", KSTAT_DATA_UINT64 },
};
static struct cpu_vm_stats_ks_data {
{ "pgrec", KSTAT_DATA_UINT64 },
{ "pgfrec", KSTAT_DATA_UINT64 },
{ "pgin", KSTAT_DATA_UINT64 },
{ "pgpgin", KSTAT_DATA_UINT64 },
{ "pgout", KSTAT_DATA_UINT64 },
{ "pgpgout", KSTAT_DATA_UINT64 },
{ "swapin", KSTAT_DATA_UINT64 },
{ "pgswapin", KSTAT_DATA_UINT64 },
{ "swapout", KSTAT_DATA_UINT64 },
{ "pgswapout", KSTAT_DATA_UINT64 },
{ "zfod", KSTAT_DATA_UINT64 },
{ "dfree", KSTAT_DATA_UINT64 },
{ "scan", KSTAT_DATA_UINT64 },
{ "rev", KSTAT_DATA_UINT64 },
{ "hat_fault", KSTAT_DATA_UINT64 },
{ "as_fault", KSTAT_DATA_UINT64 },
{ "maj_fault", KSTAT_DATA_UINT64 },
{ "cow_fault", KSTAT_DATA_UINT64 },
{ "prot_fault", KSTAT_DATA_UINT64 },
{ "softlock", KSTAT_DATA_UINT64 },
{ "kernel_asflt", KSTAT_DATA_UINT64 },
{ "pgrrun", KSTAT_DATA_UINT64 },
{ "execpgin", KSTAT_DATA_UINT64 },
{ "execpgout", KSTAT_DATA_UINT64 },
{ "execfree", KSTAT_DATA_UINT64 },
{ "anonpgin", KSTAT_DATA_UINT64 },
{ "anonpgout", KSTAT_DATA_UINT64 },
{ "anonfree", KSTAT_DATA_UINT64 },
{ "fspgin", KSTAT_DATA_UINT64 },
{ "fspgout", KSTAT_DATA_UINT64 },
{ "fsfree", KSTAT_DATA_UINT64 },
};
/*
* Force the specified thread to migrate to the appropriate processor.
* Called with thread lock held, returns with it dropped.
*/
static void
{
swtch();
} else {
}
}
}
/*
* Set affinity for a specified CPU.
* A reference count is incremented and the affinity is held until the
* reference count is decremented to zero by thread_affinity_clear().
* This is so regions of code requiring affinity can be nested.
* Caller needs to ensure that cpu_id remains valid, which can be
* done by holding cpu_lock across this call, unless the caller
* specifies CPU_CURRENT in which case the cpu_lock will be acquired
* by thread_affinity_set and CPU->cpu_id will be the target CPU.
*/
void
{
int c;
if ((c = cpu_id) == CPU_CURRENT) {
}
/*
* We should be asserting that cpu_lock is held here, but
* the NCA code doesn't acquire it. The following assert
* should be uncommented when the NCA code is fixed.
*
* ASSERT(MUTEX_HELD(&cpu_lock));
*/
/*
* If there is already a hard affinity requested, and this affinity
* conflicts with that, panic.
*/
thread_lock(t);
panic("affinity_set: setting %p but already bound to %p",
(void *)cp, (void *)t->t_bound_cpu);
}
t->t_affinitycnt++;
t->t_bound_cpu = cp;
/*
* Make sure we're running on the right CPU.
*/
force_thread_migrate(t); /* drops thread lock */
} else {
thread_unlock(t);
}
if (c == CPU_CURRENT)
}
/*
* Wrapper for backward compatibility.
*/
void
affinity_set(int cpu_id)
{
}
/*
* Decrement the affinity reservation count and if it becomes zero,
* clear the CPU affinity for the current thread, or set it to the user's
* software binding request.
*/
void
{
register processorid_t binding;
thread_lock(t);
if (--t->t_affinitycnt == 0) {
/*
* Adjust disp_max_unbound_pri if necessary.
*/
t->t_bound_cpu = NULL;
return;
}
} else {
/*
* Make sure the thread is running on the bound CPU.
*/
if (t->t_cpu != t->t_bound_cpu) {
return; /* already dropped lock */
}
}
}
thread_unlock(t);
}
/*
* Wrapper for backward compatibility.
*/
void
affinity_clear(void)
{
}
/*
* Weak cpu affinity. Bind to the "current" cpu for short periods
* of time during which the thread must not block (but may be preempted).
* Use this instead of kpreempt_disable() when it is only "no migration"
* rather than "no preemption" semantics that are required - disabling
* preemption holds higher priority threads off of cpu and if the
* operation that is protected is more than momentary this is not good
* for realtime etc.
*
* Weakly bound threads will not prevent a cpu from being offlined -
* we'll only run them on the cpu to which they are weakly bound but
* (because they do not block) we'll always be able to move them on to
* another cpu at offline time if we give them just a short moment to
* run during which they will unbind. To give a cpu a chance of offlining,
* however, we require a barrier to weak bindings that may be raised for a
* existing weak bindings to drop); the cpu_inmotion pointer is that barrier.
*
* There are few restrictions on the calling context of thread_nomigrate.
* The caller must not hold the thread lock. Calls may be nested.
*
* After weakbinding a thread must not perform actions that may block.
* In particular it must not call thread_affinity_set; calling that when
* already weakbound is nonsensical anyway.
*
* If curthread is prevented from migrating for other reasons
* (kernel preemption disabled; high pil; strongly bound; interrupt thread)
* then the weak binding will succeed even if this cpu is the target of an
*/
void
thread_nomigrate(void)
{
kthread_id_t t = curthread;
/*
* A highlevel interrupt must not modify t_nomigrate or
* t_weakbound_cpu of the thread it has interrupted. A lowlevel
* interrupt thread cannot migrate and we can avoid the
* thread_lock call below by short-circuiting here. In either
* case we can just return since no migration is possible and
* the condition will persist (ie, when we test for these again
* in thread_allowmigrate they can't have changed). Migration
* is also impossible if we're at or above DISP_LEVEL pil.
*/
getpil() >= DISP_LEVEL) {
return;
}
/*
* We must be consistent with existing weak bindings. Since we
* may be interrupted between the increment of t_nomigrate and
* the store to t_weakbound_cpu below we cannot assume that
* t_weakbound_cpu will be set if t_nomigrate is. Note that we
* cannot assert t_weakbound_cpu == t_bind_cpu since that is not
* always the case.
*/
if (!panicstr)
panic("thread_nomigrate: binding to %p but already "
"bound to %p", (void *)cp,
(void *)t->t_weakbound_cpu);
}
/*
* At this point we have preemption disabled and we don't yet hold
* the thread lock. So it's possible that somebody else could
* set t_bind_cpu here and not be able to force us across to the
* new cpu (since we have preemption disabled).
*/
/*
* If further weak bindings are being (temporarily) suppressed then
* we'll settle for disabling kernel preemption (which assures
* no migration provided the thread does not block which it is
* not allowed to if using thread_nomigrate). We must remember
* this disposition so we can take appropriate action in
* thread_allowmigrate. If this is a nested call and the
* thread is already weakbound then fall through as normal.
* We remember the decision to settle for kpreempt_disable through
* negative nesting counting in t_nomigrate. Once a thread has had one
* weakbinding request satisfied in this way any further (nested)
* requests will continue to be satisfied in the same way,
* even if weak bindings have recommenced.
*/
--t->t_nomigrate;
return; /* with kpreempt_disable still active */
}
/*
* We hold thread_lock so t_bind_cpu cannot change. We could,
* however, be running on a different cpu to which we are t_bound_cpu
* to (as explained above). If we grant the weak binding request
* in that case then the dispatcher must favour our weak binding
* over our strong (in which case, just as when preemption is
* disabled, we can continue to run on a cpu other than the one to
* which we are strongbound; the difference in this case is that
* this thread can be preempted and so can appear on the dispatch
* queues of a cpu other than the one it is strongbound to).
*
* If the cpu we are running on does not appear to be a current
* offline target (we check cpu_inmotion to determine this - since
* we don't hold cpu_lock we may not see a recent store to that,
* so it's possible that we at times can grant a weak binding to a
* cpu that is an offline target, but that one request will not
* prevent the offline from succeeding) then we will always grant
* the weak binding request. This includes the case above where
* we grant a weakbinding not commensurate with our strong binding.
*
* If our cpu does appear to be an offline target then we're inclined
* not to grant the weakbinding request just yet - we'd prefer to
* migrate to another cpu and grant the request there. The
* exceptions are those cases where going through preemption code
* will not result in us changing cpu:
*
* . interrupts have already bypassed this case (see above)
* . we are already weakbound to this cpu (dispatcher code will
* always return us to the weakbound cpu)
* . preemption was disabled even before we disabled it above
* . we are strongbound to this cpu (if we're strongbound to
* another and not yet running there the trip through the
* dispatcher will move us to the strongbound cpu and we
* will grant the weak binding there)
*/
t->t_bound_cpu == cp) {
/*
* Don't be tempted to store to t_weakbound_cpu only on
* the first nested bind request - if we're interrupted
* after the increment of t_nomigrate and before the
* store to t_weakbound_cpu and the interrupt calls
* thread_nomigrate then the assertion in thread_allowmigrate
* would fail.
*/
t->t_nomigrate++;
t->t_weakbound_cpu = cp;
/*
* Now that we have dropped the thread_lock another thread
* can set our t_weakbound_cpu, and will try to migrate us
* to the strongbound cpu (which will not be prevented by
* preemption being disabled since we're about to enable
* preemption). We have granted the weakbinding to the current
* cpu, so again we are in the position that is is is possible
* that our weak and strong bindings differ. Again this
* is catered for by dispatcher code which will favour our
* weak binding.
*/
} else {
/*
* Move to another cpu before granting the request by
* forcing this thread through preemption code. When we
* get to set{front,back}dq called from CL_PREEMPT()
* cpu_choose() will be used to select a cpu to queue
* us on - that will see cpu_inmotion and take
* steps to avoid returning us to this cpu.
*/
kpreempt_enable(); /* will call preempt() */
goto again;
}
}
void
thread_allowmigrate(void)
{
kthread_id_t t = curthread;
(t->t_nomigrate < 0 && t->t_preempt > 0) ||
getpil() >= DISP_LEVEL);
getpil() >= DISP_LEVEL)
return;
if (t->t_nomigrate < 0) {
/*
* This thread was granted "weak binding" in the
* stronger form of kernel preemption disabling.
* Undo a level of nesting for both t_nomigrate
* and t_preempt.
*/
++t->t_nomigrate;
} else if (--t->t_nomigrate == 0) {
/*
* Time to drop the weak binding. We need to cater
* for the case where we're weakbound to a different
* cpu than that to which we're strongbound (a very
* temporary arrangement that must only persist until
* weak binding drops). We don't acquire thread_lock
* here so even as this code executes t_bound_cpu
* may be changing. So we disable preemption and
* a) in the case that t_bound_cpu changes while we
* have preemption disabled kprunrun will be set
* asynchronously, and b) if before disabling
* preemption we were already on a different cpu to
* our t_bound_cpu then we set kprunrun ourselves
* to force a trip through the dispatcher when
* preemption is enabled.
*/
if (t->t_bound_cpu &&
t->t_weakbound_cpu != t->t_bound_cpu)
t->t_weakbound_cpu = NULL;
}
}
/*
* weakbinding_stop can be used to temporarily cause weakbindings made
* with thread_nomigrate to be satisfied through the stronger action of
* kpreempt_disable. weakbinding_start recommences normal weakbinding.
*/
void
weakbinding_stop(void)
{
weakbindingbarrier = 1;
membar_producer(); /* make visible before subsequent thread_lock */
}
void
weakbinding_start(void)
{
weakbindingbarrier = 0;
}
/*
* This routine is called to place the CPUs in a safe place so that
* one of them can be taken off line or placed on line. What we are
* trying to do here is prevent a thread from traversing the list
* of active CPUs while we are changing it or from getting placed on
* the run queue of a CPU that has just gone off line. We do this by
* creating a thread with the highest possible prio for each CPU and
* having it call this routine. The advantage of this method is that
* we can eliminate all checks for CPU_ACTIVE in the disp routines.
* This makes disp faster at the expense of making p_online() slower
* which is a good trade off.
*/
static void
{
int s;
*safe = PAUSE_READY;
membar_enter(); /* make sure stores are flushed */
/*
* Wait here until all pause threads are running. That
* indicates that it's safe to do the spl. Until
* cpu_pause_info.cp_go is set, we don't want to spl
* because that might block clock interrupts needed
* to preempt threads on other CPUs.
*/
;
/*
* Even though we are at the highest disp prio, we need
* to block out all interrupts below LOCK_LEVEL so that
* an intr doesn't come in, wake up a thread, and call
*/
s = splhigh();
/*
* This cpu is now safe.
*/
*safe = PAUSE_WAIT;
membar_enter(); /* make sure stores are flushed */
/*
* Now we wait. When we are allowed to continue, safe will
* be set to PAUSE_IDLE.
*/
while (*safe != PAUSE_IDLE)
;
splx(s);
/*
* Waiting is at an end. Switch out of cpu_pause
* loop and resume useful work.
*/
swtch();
}
*safe = PAUSE_DEAD;
}
/*
* Allow the cpus to start running again.
*/
void
{
int i;
for (i = 0; i < NCPU; i++)
safe_list[i] = PAUSE_IDLE;
membar_enter(); /* make sure stores are flushed */
}
/*
* Allocate a pause thread for a CPU.
*/
static void
{
kthread_id_t t;
/*
* Note, v.v_nglobpris will not change value as long as I hold
* cpu_lock.
*/
thread_lock(t);
t->t_bound_cpu = cp;
t->t_affinitycnt = 1;
t->t_preempt = 1;
thread_unlock(t);
cp->cpu_pause_thread = t;
/*
* Registering a thread in the callback table is usually done
* in the initialization code of the thread. In this
* case, we do it right after thread creation because the
* thread itself may never run, and we need to register the
* fact that it is safe for cpr suspend.
*/
CALLB_CPR_INIT_SAFE(t, "cpu_pause");
}
/*
* Free a pause thread for a CPU.
*/
static void
{
kthread_id_t t;
/*
* We have to get the thread and tell him to die.
*/
return;
}
thread_lock(t);
setbackdq(t);
/*
* If we don't wait for the thread to actually die, it may try to
* run on the wrong cpu as part of an actual call to pause_cpus().
*/
}
}
/*
* Initialize basic structures for pausing CPUs.
*/
void
{
/*
* Create initial CPU pause thread.
*/
}
/*
* Start the threads used to pause another CPU.
*/
static int
{
int i;
int cpu_count = 0;
for (i = 0; i < NCPU; i++) {
kthread_id_t t;
safe_list[i] = PAUSE_WAIT;
continue;
}
/*
* Skip CPU if it is quiesced or not yet started.
*/
safe_list[i] = PAUSE_WAIT;
continue;
}
/*
* Start this CPU's pause thread.
*/
t = cp->cpu_pause_thread;
thread_lock(t);
/*
* Reset the priority, since nglobpris may have
* changed since the thread was created, if someone
* has loaded the RT (or some other) scheduling
* class.
*/
setbackdq(t);
++cpu_count;
}
return (cpu_count);
}
/*
* Pause all of the CPUs except the one we are on by creating a high
* priority thread bound to those CPUs.
*
* Note that one must be extremely careful regarding code
* executed while CPUs are paused. Since a CPU may be paused
* while a thread scheduling on that CPU is holding an adaptive
* lock, code executed with CPUs paused must not acquire adaptive
* (or low-level spin) locks. Also, such code must not block,
* since the thread that is supposed to initiate the wakeup may
* never run.
*
* With a few exceptions, the restrictions on code executed with CPUs
* paused match those for code executed at high-level interrupt
* context.
*/
void
{
int i;
for (i = 0; i < NCPU; i++)
safe_list[i] = PAUSE_IDLE;
/*
* If running on the cpu that is going offline, get off it.
* This is so that it won't be necessary to rechoose a CPU
* when done.
*/
else
/*
* Start the pause threads and record how many were started
*/
/*
* Now wait for all CPUs to be running the pause thread.
*/
/*
* Spin reading the count without grabbing the disp
* lock to make sure we don't prevent the pause
* threads from getting the lock.
*/
;
}
/*
* Now wait for all CPUs to spl. (Transition from PAUSE_READY
* to PAUSE_WAIT.)
*/
for (i = 0; i < NCPU; i++) {
while (safe_list[i] != PAUSE_WAIT)
;
}
}
/*
* Check whether the current thread has CPUs paused
*/
int
cpus_paused(void)
{
return (1);
}
return (0);
}
static cpu_t *
{
return (NULL);
}
/*
* Check whether cpun is a valid processor id and whether it should be
* visible from the current zone. If it is, return a pointer to the
* associated CPU structure.
*/
cpu_t *
{
cpu_t *c;
c = cpu_get_all(cpun);
return (NULL);
return (c);
}
/*
* The following functions should be used to check CPU states in the kernel.
* They should be invoked with cpu_lock held. Kernel subsystems interested
* states. Those are for user-land (and system call) use only.
*/
/*
* Determine whether the CPU is online and handling interrupts.
*/
int
{
}
/*
* Determine whether the CPU is offline (this includes spare and faulted).
*/
int
{
}
/*
* Determine whether the CPU is powered off.
*/
int
{
}
/*
* Determine whether the CPU is handling interrupts.
*/
int
{
}
/*
* Determine whether the CPU is active (scheduling threads).
*/
int
{
}
/*
* Same as above, but these require cpu_flags instead of cpu_t pointers.
*/
int
{
return (cpu_flagged_active(cpu_flags) &&
(cpu_flags & CPU_ENABLE));
}
int
{
return (((cpu_flags & CPU_POWEROFF) == 0) &&
}
int
{
}
int
{
return (cpu_flagged_active(cpu_flags) &&
(cpu_flags & CPU_ENABLE) == 0);
}
int
{
}
/*
* Bring the indicated CPU online.
*/
int
{
int error = 0;
/*
* Handle on-line request.
* This code must put the new CPU on the active list before
* starting it because it will not be paused, and will start
* using the active list immediately. The real start occurs
* when the CPU_QUIESCED flag is turned off.
*/
/*
* Put all the cpus into a known safe place.
* No mutexes can be entered while CPUs are paused.
*/
if (error == 0) {
}
start_cpus();
}
return (error);
}
/*
* Take the indicated CPU offline.
*/
int
{
int error = 0;
int intr_enable;
int cyclic_off = 0;
int loop_count;
int no_quiesce = 0;
int (*bound_func)(struct cpu *, int);
kthread_t *t;
proc_t *p;
int lgrp_diff_lpl;
/*
* If we're going from faulted or spare to offline, just
* clear these flags and update CPU state.
*/
}
return (0);
}
/*
* Handle off-line request.
*/
/*
* Don't offline last online CPU in partition
*/
return (EBUSY);
/*
* Unbind all thread bound to our CPU if we were asked to.
*/
return (error);
/*
* We shouldn't be bound to this CPU ourselves.
*/
return (EBUSY);
/*
* Tell interested parties that this CPU is going offline.
*/
/*
* Take the CPU out of interrupt participation so we won't find
* bound kernel threads. If the architecture cannot completely
* shut off interrupts on the CPU, don't quiesce it, but don't
* run anything but interrupt thread... this is indicated by
* the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being
* off.
*/
if (intr_enable)
/*
* Record that we are aiming to offline this cpu. This acts as
* a barrier to further weak binding requests in thread_nomigrate
* and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to
* lean away from this cpu. Further strong bindings are already
* avoided since we hold cpu_lock. Since threads that are set
* runnable around now and others coming off the target cpu are
* directed away from the target, existing strong and weak bindings
* (especially the latter) to the target cpu stand maximum chance of
* being able to unbind during the short delay loop below (if other
* unbound threads compete they may not see cpu in time to unbind
* even if they would do so immediately.
*/
cpu_inmotion = cp;
membar_enter();
/*
* Check for kernel threads (strong or weak) bound to that CPU.
* Strongly bound threads may not unbind, and we'll have to return
* EBUSY. Weakly bound threads should always disappear - we've
* stopped more weak binding with cpu_inmotion and existing
* bindings will drain imminently (they may not block). Nonetheless
* we will wait for a fixed period for all bound threads to disappear.
* Inactive interrupt threads are OK (they'll be in TS_FREE
* state). If test finds some bound threads, wait a few ticks
* to give short-lived threads (such as interrupts) chance to
* complete. Note that if no_quiesce is set, i.e. this cpu
* is required to service interrupts, then we take the route
* that permits interrupt threads to be active (or bypassed).
*/
if (loop_count >= 5) {
break;
}
/*
* If some threads were assigned, give them
* a chance to complete or move.
*
* This assumes that the clock_thread is not bound
* to any CPU, because the clock_thread is needed to
* do the delay(hz/100).
*
* Note: we still hold the cpu_lock while waiting for
* the next clock tick. This is OK since it isn't
* needed for anything else except processor_bind(2),
* and system initialization. If we drop the lock,
* we would risk another p_online disabling the last
* processor.
*/
}
if (error == 0 && cyclic_off == 0) {
if (!cyclic_offline(cp)) {
/*
* We must have bound cyclics...
*/
goto out;
}
cyclic_off = 1;
}
/*
* Call mp_cpu_stop() to perform any special operations
* needed for this machine architecture to offline a CPU.
*/
if (error == 0)
/*
* If that all worked, take the CPU offline and decrement
* ncpus_online.
*/
if (error == 0) {
/*
* Put all the cpus into a known safe place.
* No mutexes can be entered while CPUs are paused.
*/
pause_cpus(cp);
/*
* Repeat the operation, if necessary, to make sure that
* all outstanding low-level interrupts run to completion
* before we set the CPU_QUIESCED flag. It's also possible
* that a thread has weak bound to the cpu despite our raising
* cpu_inmotion above since it may have loaded that
* value before the barrier became visible (this would have
* to be the thread that was on the target cpu at the time
* we raised the barrier).
*/
start_cpus();
(void) mp_cpu_start(cp);
goto again;
}
/*
* Remove the CPU from the list of active CPUs.
*/
/*
* Walk the active process list and look for threads
* whose home lgroup needs to be updated, or
* the last CPU they run on is the one being offlined now.
*/
t = p->p_tlist;
if (t == NULL)
continue;
lgrp_diff_lpl = 0;
do {
/*
* Taking last CPU in lpl offline
* Rehome thread if it is in this lpl
* Otherwise, update the count of how many
* threads are in this CPU's lgroup but have
* a different lpl.
*/
lgrp_choose(t,
t->t_cpupart), 0);
else if (t->t_lpl->lpl_lgrpid ==
}
/*
* Update CPU last ran on if it was this CPU
*/
t->t_weakbound_cpu == cp);
t = t->t_forw;
} while (t != p->p_tlist);
/*
* Didn't find any threads in the same lgroup as this
* CPU with a different lpl, so remove the lgroup from
* the process lgroup bitmask.
*/
if (lgrp_diff_lpl == 0)
}
/*
* Walk thread list looking for threads that need to be
* rehomed, since there are some threads that are not in
* their process's p_tlist.
*/
t = curthread;
do {
/*
* Rehome threads with same lpl as this CPU when this
* is the last CPU in the lpl.
*/
/*
* Update CPU last ran on if it was this CPU
*/
}
t->t_weakbound_cpu == cp);
t = t->t_next;
} while (t != curthread);
if (!no_quiesce)
ncpus_online--;
cpu_inmotion = NULL;
start_cpus();
}
out:
cpu_inmotion = NULL;
/*
* If we failed, re-enable interrupts.
* Do this even if cpu_intr_disable returned an error, because
* it may have partially disabled interrupts.
*/
if (error && intr_enable)
/*
* If we failed, but managed to offline the cyclic subsystem on this
* CPU, bring it back online.
*/
if (error && cyclic_off)
/*
* If we failed, we need to notify everyone that this CPU is back on.
*/
if (error != 0)
return (error);
}
/*
* Mark the indicated CPU as faulted, taking it offline.
*/
int
{
int error = 0;
if (cpu_is_offline(cp)) {
return (0);
}
}
return (error);
}
/*
* Mark the indicated CPU as a spare, taking it offline.
*/
int
{
int error = 0;
if (cpu_is_offline(cp)) {
}
return (0);
}
}
return (error);
}
/*
* Take the indicated CPU from poweroff to offline.
*/
int
{
if (error == 0)
return (error);
}
/*
* Take the indicated CPU from any inactive state to powered off.
*/
int
{
return (EBUSY); /* not completely idle */
if (error == 0)
return (error);
}
/*
* Initialize the CPU lists for the first CPU.
*/
void
{
cpu_active = cp;
}
/*
* Insert a CPU into the list of available CPUs.
*/
void
{
int seqid;
/*
* Note: most users of the cpu_list will grab the
* cpu_lock to insure that it isn't modified. However,
* certain users can't or won't do that. To allow this
* we pause the other cpus. Users who walk the list
* without cpu_lock, must disable kernel preemption
* to insure that the list isn't modified underneath
* them. Also, any cached pointers to cpu structures
* must be revalidated by checking to see if the
* cpu_next pointer points to itself. This check must
* be done with the cpu_lock held or kernel preemption
* disabled. This check relies upon the fact that
* old cpu structures are not free'ed or cleared after
* then are removed from the cpu_list.
*
* Note that the clock code walks the cpu list dereferencing
* the cpu_part pointer, so we need to initialize it before
* adding the cpu to the list.
*/
(void) pause_cpus(NULL);
start_cpus();
continue;
ncpus++;
/*
* allocate a pause thread for this CPU.
*/
/*
* So that new CPUs won't have NULL prev_onln and next_onln pointers,
* link them into a list of just that CPU.
* This is so that disp_lowpri_cpu will work for thread_create in
* pause_cpus() when called from the startup thread in a new CPU.
*/
pool_pset_mod = gethrtime();
}
/*
* Do the opposite of cpu_add_unit().
*/
void
cpu_del_unit(int cpuid)
{
/*
* Destroy kstat stuff.
*/
/*
* Free up pause thread.
*/
/*
* The clock thread and mutex_vector_enter cannot hold the
* cpu_lock while traversing the cpu list, therefore we pause
* all other threads by pausing the other cpus. These, and any
* other routines holding cpu pointers while possibly sleeping
* must be sure to call kpreempt_disable before processing the
* list and be sure to check that the cpu has not been deleted
* after any sleeps (check cp->cpu_next != NULL). We guarantee
* to keep the deleted cpu structure around.
*
* Note that this MUST be done AFTER cpu_available
* has been updated so that we don't waste time
* trying to pause the cpu we're trying to delete.
*/
(void) pause_cpus(NULL);
/*
* Signals that the cpu has been deleted (see above).
*/
start_cpus();
ncpus--;
pool_pset_mod = gethrtime();
}
/*
* Add a CPU to the list of active CPUs.
* This routine must not get any locks, because other CPUs are paused.
*/
static void
{
ncpus_online++;
if (pp->cp_cpulist) {
} else {
}
ASSERT(cp_numparts_nonempty != 0);
}
}
/*
* Add a CPU to the list of active CPUs.
* This is called from machine-dependent layers when a new CPU is started.
*/
void
{
start_cpus();
}
/*
* Remove a CPU from the list of active CPUs.
* This routine must not get any locks, because other CPUs are paused.
*/
/* ARGSUSED */
static void
{
if (cpu_active == cp) {
}
}
ASSERT(cp_numparts_nonempty != 0);
}
}
/*
* Routine used to setup a newly inserted CPU in preparation for starting
* it running code.
*/
int
cpu_configure(int cpuid)
{
int retval = 0;
/*
* Some structures are statically allocated based upon
* the maximum number of cpus the system supports. Do not
* try to add anything beyond this limit.
*/
return (EINVAL);
}
return (EALREADY);
}
return (retval);
}
if (retval != 0)
(void) mp_cpu_unconfigure(cpuid);
return (retval);
}
/*
* Routine used to cleanup a CPU that has been powered off. This will
* destroy all per-cpu information related to this cpu.
*/
int
cpu_unconfigure(int cpuid)
{
int error;
return (ENODEV);
}
return (EALREADY);
}
return (EBUSY);
}
}
if (error != 0)
return (error);
return (mp_cpu_unconfigure(cpuid));
}
/*
* Routines for registering and de-registering cpu_setup callback functions.
*
* Caller's context
* These routines must not be called from a driver's attach(9E) or
* detach(9E) entry point.
*
* NOTE: CPU callbacks should not block. They are called with cpu_lock held.
*/
/*
* Ideally, these would be dynamically allocated and put into a linked
* list; however that is not feasible because the registration routine
* has to be available before the kmem allocator is working (in fact,
* it is called by the kmem allocator init code). In any case, there
* are quite a few extra entries for future users.
*/
#define NCPU_SETUPS 20
struct cpu_setup {
void *arg;
void
{
int i;
for (i = 0; i < NCPU_SETUPS; i++)
break;
if (i >= NCPU_SETUPS)
}
void
{
int i;
for (i = 0; i < NCPU_SETUPS; i++)
break;
if (i >= NCPU_SETUPS)
"deregister");
cpu_setups[i].arg = 0;
}
/*
* Call any state change hooks for this CPU, ignore any errors.
*/
void
{
int i;
for (i = 0; i < NCPU_SETUPS; i++) {
}
}
}
/*
* Call any state change hooks for this CPU, undo it if error found.
*/
static int
{
int i;
int retval = 0;
for (i = 0; i < NCPU_SETUPS; i++) {
cpu_setups[i].arg);
if (retval) {
for (i--; i >= 0; i--) {
}
break;
}
}
}
return (retval);
}
/*
* Export information about this CPU via the kstat mechanism.
*/
static struct {
#if defined(__sparcv9)
#endif
#endif
} cpu_info_template = {
{ "state", KSTAT_DATA_CHAR },
{ "state_begin", KSTAT_DATA_LONG },
{ "cpu_type", KSTAT_DATA_CHAR },
{ "fpu_type", KSTAT_DATA_CHAR },
{ "clock_MHz", KSTAT_DATA_LONG },
{ "chip_id", KSTAT_DATA_LONG },
{ "implementation", KSTAT_DATA_STRING },
{ "brand", KSTAT_DATA_STRING },
{ "core_id", KSTAT_DATA_LONG },
#if defined(__sparcv9)
{ "device_ID", KSTAT_DATA_UINT64 },
{ "cpu_fru", KSTAT_DATA_STRING },
#endif
{ "vendor_id", KSTAT_DATA_STRING },
{ "family", KSTAT_DATA_INT32 },
{ "model", KSTAT_DATA_INT32 },
{ "stepping", KSTAT_DATA_INT32 },
{ "clog_id", KSTAT_DATA_INT32 },
#endif
};
static kmutex_t cpu_info_template_lock;
static int
{
const char *pi_state;
if (rw == KSTAT_WRITE)
return (EACCES);
case P_ONLINE:
break;
case P_POWEROFF:
break;
case P_NOINTR:
break;
case P_FAULTED:
break;
case P_SPARE:
break;
case P_OFFLINE:
break;
default:
pi_state = "unknown";
}
#if defined(__sparcv9)
#endif
#endif
return (0);
}
static void
{
if (pool_pset_enabled())
else
sizeof (cpu_info_template) / sizeof (kstat_named_t),
#if defined(__sparcv9)
#endif
#endif
}
}
static void
{
}
/*
* Create and install kstats for the boot CPU.
*/
void
{
}
/*
* Make visible to the zone that subset of the cpu information that would be
* initialized when a cpu is configured (but still offline).
*/
void
{
zone->zone_ncpus++;
}
}
/*
* Make visible to the zone that subset of the cpu information that would be
* initialized when a previously configured cpu is onlined.
*/
void
{
}
!= NULL) {
}
}
}
NULL) {
}
}
/*
* Update relevant kstats such that cpu is now visible to processes
* executing in specified zone.
*/
void
{
if (cpu_is_active(cp))
}
/*
* Make invisible to the zone that subset of the cpu information that would be
* torn down when a previously offlined cpu is unconfigured.
*/
void
{
zone->zone_ncpus--;
}
if (cp->cpu_info_kstat)
}
/*
* Make invisible to the zone that subset of the cpu information that would be
* torn down when a cpu is offlined (but still configured).
*/
void
{
}
NULL) {
}
}
}
!= NULL) {
}
}
/*
* Update relevant kstats such that cpu is no longer visible to processes
* executing in specified zone.
*/
void
{
if (cpu_is_active(cp))
}
/*
* Bind a thread to a CPU as requested.
*/
int
int *error)
{
/*
* Record old binding, but change the obind, which was initialized
* to PBIND_NONE, only if this thread has a binding. This avoids
* reporting PBIND_NONE for a process when some LWPs are bound.
*/
if (binding != PBIND_NONE)
if (bind == PBIND_QUERY) {
return (0);
}
/*
* problems, just note that and return success so that the
* processor_bind() is defined to work.
*
* Binding will get EPERM if the thread is of system class
* or hasprocperm() fails.
*/
return (0);
}
if (binding != PBIND_NONE) {
/*
* Make sure binding is in right partition.
*/
return (0);
}
}
/*
* If there is no system-set reason for affinity, set
* the t_bound_cpu field to reflect the binding.
*/
if (tp->t_affinitycnt == 0) {
if (binding == PBIND_NONE) {
/*
* We may need to adjust disp_max_unbound_pri
* since we're becoming unbound.
*/
/*
* Move thread to lgroup with strongest affinity
* after unbinding
*/
if (tp->t_lgrp_affinity)
} else {
/*
* Set home to lgroup with most affinity containing CPU
* that thread is being bound or minimum bounding
* lgroup if no affinities set
*/
if (tp->t_lgrp_affinity)
else
/* can't grab cpu_lock */
}
/*
* Make the thread switch to the bound CPU.
* If the thread is runnable, we need to
* requeue it even if t_cpu is already set
* to the right CPU, since it may be on a
* kpreempt queue and need to move to a local
* queue. We could check t_disp_queue to
* avoid unnecessary overhead if it's already
* on the right queue, but since this isn't
* a performance-critical operation it doesn't
* seem worth the extra code and complexity.
*
* If the thread is weakbound to the cpu then it will
* resist the new binding request until the weak
* binding drops. The cpu_surrender or requeueing
* below could be skipped in such cases (since it
* will have no effect), but that would require
* thread_allowmigrate to acquire thread_lock so
* we'll take the very occasional hit here instead.
*/
/*
* Either on the bound CPU's disp queue now,
* or swapped out or on the swap queue.
*/
!= TS_LOAD);
}
}
}
/*
* Our binding has changed; set TP_CHANGEBIND.
*/
return (0);
}
#if CPUSET_WORDS > 1
/*
* Functions for implementing cpuset operations when a cpuset is more
* than one word. On platforms where a cpuset is a single word these
* are implemented as macros in cpuvar.h.
*/
void
cpuset_all(cpuset_t *s)
{
int i;
for (i = 0; i < CPUSET_WORDS; i++)
}
void
{
cpuset_all(s);
CPUSET_DEL(*s, cpu);
}
void
{
CPUSET_ZERO(*s);
CPUSET_ADD(*s, cpu);
}
int
{
int i;
for (i = 0; i < CPUSET_WORDS; i++)
if (s->cpub[i] != 0)
return (0);
return (1);
}
int
{
int i;
for (i = 0; i < CPUSET_WORDS; i++)
return (0);
return (1);
}
cpuset_find(cpuset_t *s)
{
uint_t i;
/*
* Find a cpu in the cpuset
*/
for (i = 0; i < CPUSET_WORDS; i++) {
break;
}
}
return (cpu);
}
void
{
int i, j;
/*
* First, find the smallest cpu id in the set.
*/
for (i = 0; i < CPUSET_WORDS; i++) {
if (s->cpub[i] != 0) {
/*
* Now find the largest cpu id in
* the set and return immediately.
* Done in an inner loop to avoid
* having to break out of the first
* loop.
*/
for (j = CPUSET_WORDS - 1; j >= i; j--) {
if (s->cpub[j] != 0) {
return;
}
}
/*
* If this code is reached, a
* smallestid was found, but not a
* largestid. The cpuset must have
* been changed during the course
* of this function call.
*/
ASSERT(0);
}
}
}
#endif /* CPUSET_WORDS */
/*
* Unbind all user threads bound to a given CPU.
*/
int
{
int ret = 0;
/*
* Skip zombies, kernel processes, and processes in
* other zones, if called from a non-global zone.
*/
continue;
}
do {
continue;
if (ret == 0)
}
if (ret == 0)
return (ret);
}
/*
* Destroy all remaining bound threads on a cpu.
*/
void
{
/*
* Destroy all remaining bound threads on the cpu. This
* should include both the interrupt threads and the idle thread.
* This requires some care, since we need to traverse the
* thread list with the pidlock mutex locked, but thread_free
* also locks the pidlock mutex. So, we collect the threads
* we're going to reap in a list headed by "tlist", then we
* unlock the pidlock mutex and traverse the tlist list,
* doing thread_free's on the thread's. Simple, n'est pas?
* Also, this depends on thread_free not mucking with the
* t_next and t_prev links of the thread.
*/
do {
if (t->t_bound_cpu == cp) {
/*
* We've found a bound thread, carefully unlink
* it out of the thread list, and add it to
* our "tlist". We "know" we don't have to
* worry about unlinking curthread (the thread
* that is executing this code).
*/
tlist = t;
/* wake up anyone blocked in thread_join */
cv_broadcast(&t->t_joincv);
/*
* t_lwp set by interrupt threads and not
* cleared.
*/
/*
* Pause and idle threads always have
* t_state set to TS_ONPROC.
*/
}
thread_free(t);
}
}
}
/*
* processor_info(2) and p_online(2) status support functions
* The constants returned by the cpu_get_state() and cpu_get_state_str() are
* for use in communicating processor state information to userland. Kernel
* subsystems should only be using the cpu_flags value directly. Subsystems
* modifying cpu_flags should record the state change via a call to the
* cpu_set_state().
*/
/*
* Update the pi_state of this CPU. This function provides the CPU status for
* the information returned by processor_info(2).
*/
void
{
pool_cpu_mod = gethrtime();
}
/*
* communication with user applications; cpu_flags provides the in-kernel
* interface.
*/
int
{
return (P_POWEROFF);
return (P_FAULTED);
return (P_SPARE);
return (P_OFFLINE);
return (P_ONLINE);
else
return (P_NOINTR);
}
/*
* Return processor_info(2) state as a string.
*/
const char *
{
const char *string;
switch (cpu_get_state(cpu)) {
case P_ONLINE:
break;
case P_POWEROFF:
break;
case P_NOINTR:
break;
case P_SPARE:
break;
case P_FAULTED:
string = PS_FAULTED;
break;
case P_OFFLINE:
string = PS_OFFLINE;
break;
default:
string = "unknown";
break;
}
return (string);
}
/*
* Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named
* kstats, respectively. This is done when a CPU is initialized or placed
* online via p_online(2).
*/
static void
{
char *module = "cpu";
char *class = "misc";
if (pool_pset_enabled())
else
/*
* Create named kstats
*/
zoneid); \
kstat_install(ksp); \
} else \
/*
* Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat.
*/
}
}
static void
{
char ks_name[KSTAT_STRLEN];
}
static int
{
struct cpu_sys_stats_ks_data *csskd;
int i;
if (rw == KSTAT_WRITE)
return (EACCES);
/*
* Read CPU mstate, but compare with the last values we
* received to make sure that the returned kstats never
* decrease.
*/
sizeof (cpu_sys_stats_ks_data_template));
for (i = 0; i < PIL_MAX; i++)
for (i = 0; i < LOCK_LEVEL; i++)
return (0);
}
static int
{
struct cpu_vm_stats_ks_data *cvskd;
if (rw == KSTAT_WRITE)
return (EACCES);
sizeof (cpu_vm_stats_ks_data_template));
return (0);
}
static int
{
int i;
if (rw == KSTAT_WRITE)
return (EACCES);
/*
* Read CPU mstate, but compare with the last values we
* received to make sure that the returned kstats never
* decrease.
*/
for (i = 0; i < PIL_MAX; i++)
for (i = 0; i < LOCK_LEVEL; i++)
return (0);
}