cmt.c revision 3e81cacf8672b40d79c410d4b7858729d77912ff
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/processor.h>
/*
* CMT scheduler / dispatcher support
*
* This file implements CMT scheduler support using Processor Groups.
* The CMT processor group class creates and maintains the CMT class
* specific processor group pg_cmt_t.
*
* ---------------------------- <-- pg_cmt_t *
* | pghw_t |
* ----------------------------
* | CMT class specific data |
* | - hierarchy linkage |
* | - CMT load balancing data|
* ----------------------------
*
* The scheduler/dispatcher leverages knowledge of the performance
* relevant CMT sharing relationships existing between cpus to implement
* optimized affinity and load balancing policies.
*
* Load balancing policy seeks to improve performance by minimizing
* contention over shared processor resources / facilities, while the
* affinity policies seek to improve cache and TLB utilization.
*
* The CMT PGs created by this class are already arranged into a
* hierarchy (which is done in the pghw layer). To implement the top-down
* CMT load balancing algorithm, the CMT PGs additionally maintain
* parent, child and sibling hierarchy relationships.
* Parent PGs always contain a superset of their children(s) resources,
* each PG can have at most one parent, and siblings are the group of PGs
* sharing the same parent.
*
* On NUMA systems, the CMT load balancing algorithm balances across the
* CMT PGs within their respective lgroups. On UMA based system, there
* exists a top level group of PGs to balance across. On NUMA systems multiple
* top level groups are instantiated, where the top level balancing begins by
* balancng across the CMT PGs within their respective (per lgroup) top level
* groups.
*/
typedef struct cmt_lgrp {
int cl_npgs; /* # of top level PGs in the lgroup */
} cmt_lgrp_t;
/* used for null_proc_lpa */
/*
* Set this to non-zero to disable CMT scheduling
*/
static int cmt_sched_disabled = 0;
static pg_t *pg_cmt_alloc();
static void pg_cmt_free(pg_t *);
static void pg_cmt_cpu_init(cpu_t *);
static void pg_cmt_cpu_fini(cpu_t *);
static void pg_cmt_cpu_active(cpu_t *);
static void pg_cmt_cpu_inactive(cpu_t *);
static void pg_cmt_hier_pack(void **, int);
static int pg_cmt_hw(pghw_type_t);
/*
* Macro to test if PG is managed by the CMT PG class
*/
/*
* CMT PG ops
*/
struct pg_ops pg_ops_cmt = {
NULL, /* cpupart_out */
};
/*
* Initialize the CMT PG class
*/
void
pg_cmt_class_init(void)
{
if (cmt_sched_disabled)
return;
}
/*
* Called to indicate a new CPU has started up so
* that either t0 or the slave startup thread can
* be accounted for.
*/
void
{
}
/*
* Adjust the CMT load in the CMT PGs in which the CPU belongs
* Note that "n" can be positive in the case of increasing
* load, or negative in the case of decreasing load.
*/
void
{
}
}
/*
* Return non-zero if thread can migrate between "from" and "to"
* without a performance penalty
*/
int
{
return (1);
return (0);
}
/*
* CMT class specific PG allocation
*/
static pg_t *
pg_cmt_alloc(void)
{
}
/*
* Class specific PG de-allocation
*/
static void
{
}
/*
* Return 1 if CMT scheduling policies should be impelmented
* for the specified hardware sharing relationship.
*/
static int
{
return (pg_plat_cmt_load_bal_hw(hw) ||
}
/*
* CMT class callback for a new CPU entering the system
*/
static void
{
/*
* A new CPU is coming into the system.
* Interrogate the platform to see if the CPU
* has any performance relevant CMT sharing
* relationships
*/
/*
* We're only interested in CMT hw sharing relationships
*/
continue;
/*
* Find (or create) the PG associated with
* the hw sharing relationship in which cp
* belongs.
*
* Determine if a suitable PG already
* exists, or if one needs to be created.
*/
/*
* Create a new one.
* Initialize the common...
*/
/* ... physical ... */
/*
* ... and CMT specific portions of the
* structure.
*/
} else {
}
/* Add the CPU to the PG */
/*
*/
}
/*
* Build a lineage of CMT PGs for load balancing
*/
if (pg_plat_cmt_load_bal_hw(hw)) {
nlevels++;
}
/* Cache this for later */
if (hw == PGHW_CACHE)
}
/*
* Pack out any gaps in the constructed lineage,
* then size it out.
*
* Gaps may exist where the architecture knows
* about a hardware sharing relationship, but such a
* relationship either isn't relevant for load
* balancing or doesn't exist between CPUs on the system.
*/
/*
* Find the lgrp that encapsulates this CPU's CMT hierarchy.
*/
/*
* For each of the PGs in the CPU's lineage:
* - Add an entry in the CPU's CMT PG group
* which is used by the dispatcher to implement load balancing
* policy.
* - Tie the PG into the CMT hierarchy by connecting
* it to it's parent and siblings.
*/
int err;
if (level == 0)
/* Already initialized */
continue;
}
} else {
/*
* A good parent keeps track of their children.
* The parent's children group is also the PG's
* siblings.
*/
}
}
}
/*
* Cache the chip and core IDs in the cpu_t->cpu_physid structure
* for fast lookups later.
*/
if (cp->cpu_physid) {
/*
* If this cpu has a PG representing shared cache, then set
* cpu_cacheid to that PG's logical id
*/
if (pg_cache)
}
/* CPU0 only initialization */
if (is_cpu0) {
is_cpu0 = 0;
}
}
/*
* Class callback when a CPU is leaving the system (deletion)
*/
static void
{
group_iter_t i;
/*
* Find the lgroup that encapsulates this CPU's CMT hierarchy
*/
/*
* One might wonder how we could be deconfiguring the
* only CPU in the system.
*
* On Starcat systems when null_proc_lpa is detected,
* the boot CPU (which is already configured into a leaf
* lgroup), is moved into the root lgroup. This is done by
* deconfiguring it from both lgroups and processor
* groups), and then later reconfiguring it back in. This
* call to pg_cmt_cpu_fini() is part of that deconfiguration.
*
* This special case is detected by noting that the platform
* has changed the CPU's lgrp affiliation (since it now
* belongs in the root). In this case, use the cmt_lgrp_t
* cached for the boot CPU, since this is what needs to be
* torn down.
*/
}
/*
* First, clean up anything load balancing specific for each of
* the CPU's PGs that participated in CMT load balancing
*/
/*
* Remove the PG from the CPU's load balancing lineage
*/
/*
* If it's about to become empty, destroy it's children
* group, and remove it's reference from it's siblings.
* This is done here (rather than below) to avoid removing
* our reference from a PG that we just eliminated.
*/
else
}
}
}
/*
* Now that the load balancing lineage updates have happened,
* remove the CPU from all it's PGs (destroying any that become
* empty).
*/
group_iter_init(&i);
continue;
/*
* Deleting the CPU from the PG changes the CPU's
* PG group over which we are actively iterating
* Re-initialize the iteration
*/
group_iter_init(&i);
/*
* The PG has become zero sized, so destroy it.
*/
}
}
}
/*
* Class callback when a CPU is entering a cpu partition
*/
static void
{
group_iter_t i;
/*
* Ensure that the new partition's PG bitset
* is large enough for all CMT PG's to which cp
* belongs
*/
group_iter_init(&i);
continue;
}
}
/*
* Class callback when a CPU is actually moving partitions
*/
static void
{
/*
* Iterate over the CPUs CMT PGs
*/
continue;
/*
* Add the PG to the bitset in the new partition.
*/
/*
* Remove the PG from the bitset in the old partition
* if the last of the PG's CPUs have left.
*/
continue;
if (CPU_ACTIVE(cpp) &&
break;
}
}
if (!found)
}
}
/*
* Class callback when a CPU becomes active (online)
*
* This is called in a context where CPUs are paused
*/
static void
{
int err;
group_iter_t i;
group_iter_init(&i);
/*
* Iterate over the CPU's PGs
*/
continue;
/*
* If this is the first active CPU in the PG, and it
* represents a hardware sharing relationship over which
* CMT load balancing is performed, add it as a candidate
* for balancing with it's siblings.
*/
/*
* If this is a top level PG, add it as a balancing
* candidate when balancing within the root lgroup
*/
}
}
/*
* Notate the CPU in the PGs active CPU bitset.
* Also notate the PG as being active in it's associated
* partition
*/
}
}
/*
* Class callback when a CPU goes inactive (offline)
*
* This is called in a context where CPUs are paused
*/
static void
{
int err;
group_iter_t i;
group_iter_init(&i);
continue;
/*
* Remove the CPU from the CMT PGs active CPU group
* bitmap
*/
/*
* If there are no more active CPUs in this PG over which
* load was balanced, remove it as a balancing candidate.
*/
}
}
/*
* Assert the number of active CPUs does not exceed
* the total number of CPUs in the PG
*/
/*
* Update the PG bitset in the CPU's old partition
*/
continue;
if (CPU_ACTIVE(cpp) &&
break;
}
}
if (!found) {
}
}
}
/*
* Return non-zero if the CPU belongs in the given PG
*/
static int
{
/*
* The CPU belongs if, given the nature of the hardware sharing
* relationship represented by the PG, the CPU has that
* relationship with some other CPU already in the PG
*/
return (1);
return (0);
}
/*
* Hierarchy packing utility routine. The hierarchy order is preserved.
*/
static void
{
int i, j;
for (i = 0; i < sz; i++) {
continue;
for (j = i; j < sz; j++) {
break;
}
}
if (j == sz)
break;
}
}
/*
* Return a cmt_lgrp_t * given an lgroup handle.
*/
static cmt_lgrp_t *
{
break;
}
return (lgrp);
}
/*
* Create a cmt_lgrp_t with the specified handle.
*/
static cmt_lgrp_t *
{
return (lgrp);
}
/*
* Perform multi-level CMT load balancing of running threads.
*
* tp is the thread being enqueued.
* cp is a hint CPU, against which CMT load balancing will be performed.
*
* Returns cp, or a CPU better than cp with respect to balancing
* running thread load.
*/
cpu_t *
{
int self = 0;
int level = 0;
if (GROUP_SIZE(cmt_pgs) == 0)
return (cp); /* nothing to do */
self = 1;
/*
* Balance across siblings in the CPUs CMT lineage
* If the thread is homed to the root lgroup, perform
* top level balancing against other top level PGs
* in the system. Otherwise, start with the default
* top level siblings group, which is within the leaf lgroup
*/
else
/*
* Traverse down the lineage until we find a level that needs
* balancing, or we get to the end.
*/
for (;;) {
if (nsiblings == 1)
goto next_level;
if (self &&
pg_nrun--; /* Ignore curthread's effect */
/*
* Find a balancing candidate from among our siblings
* "hint" is a hint for where to start looking
*/
i = hint;
do {
/*
* The candidate must not be us, and must
* have some CPU resources in the thread's
* partition
*/
break;
}
if (++i >= nsiblings)
i = 0;
} while (i != hint);
if (!tpg)
goto next_level; /* no candidates at this level */
/*
* Check if the balancing target is underloaded
* Decide to balance if the target is running fewer
* threads, or if it's running the same number of threads
* with more online CPUs
*/
break;
}
break;
}
if (tpg) {
/*
* Select an idle CPU from the target
*/
do {
break;
}
cpu = 0;
}
return (cp);
}