intr.c revision a288e5a9793fdffe5e842d7e61ab45263e75eaca
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#include <sys/sysmacros.h>
#include <sys/privregs.h>
#include <sys/archsystm.h>
#include <sys/machsystm.h>
#include <sys/kdi_impl.h>
#include <sys/ddi_periodic.h>
#include <sys/cpu_sgnblk_defs.h>
/* Global locks which protect the interrupt distribution lists */
static kmutex_t intr_dist_lock;
static kmutex_t intr_dist_cpu_lock;
/* Head of the interrupt distribution lists */
static int siron_cpu_setup(cpu_setup_t, int, void *);
extern uint_t softlevel1();
/*
* is received.
*/
/*
* Note:-
* siron_pending was originally created to prevent a resource over consumption
* bug in setsoftint(exhaustion of interrupt pool free list).
* It's original intention is obsolete with the use of iv_pending in
* setsoftint. However, siron_pending stayed around, acting as a second
* gatekeeper preventing soft interrupts from being queued. In this capacity,
* it can lead to hangs on MP systems, where due to global visibility issues
* it can end up set while iv_pending is reset, preventing soft interrupts from
* ever being processed. In addition to its gatekeeper role, init_intr also
* uses it to flag the situation where siron() was called before siron_inum has
* been defined.
*
* siron() does not need an extra gatekeeper; any cpu that wishes should be
* allowed to queue a soft interrupt. It is softint()'s job to ensure
* correct handling of the queues. Therefore, siron_pending has been
* stripped of its gatekeeper task, retaining only its intr_init job, where
* it indicates that there is a pending need to call siron().
*/
static int siron1_pending; /* backward compatibility */
int intr_dist_debug = 0;
int intr_dist_weight_maxfactor = 2;
/*
* intr_init() - Interrupt initialization
* Initialize the system's interrupt vector table.
*/
void
{
int i;
extern uint_t softlevel1();
init_ivintr();
/*
* Register these software interrupts for ddi timer.
* Software interrupts up to the level 10 are supported.
*/
for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
}
/*
* A soft interrupt may have been requested prior to the initialization
* of soft interrupts. Soft interrupts can't be dispatched until after
* init_intr(), so we have to wait until now before we can dispatch the
* pending soft interrupt (if any).
*/
for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
if (siron_pending[i-1]) {
siron_pending[i-1] = 0;
sir_on(i);
}
}
if (siron1_pending) {
siron1_pending = 0;
siron();
}
}
/*
* poke_cpu_intr - fall through when poke_cpu calls
*/
/* ARGSUSED */
{
return (1);
}
/*
* Trigger software interrupts dedicated to ddi timer.
*/
void
{
else
}
/*
* kmdb uses siron (and thus setsoftint) while the world is stopped in order to
* inform its driver component that there's work to be done. We need to keep
* DTrace from instrumenting kmdb's siron and setsoftint. We duplicate siron,
* giving kmdb's version a kdi_ prefix to keep DTrace at bay. The
* implementation of setsoftint is complicated enough that we don't want to
* duplicate it, but at the same time we don't want to preclude tracing either.
* The meat of setsoftint() therefore goes into kdi_setsoftint, with
* setsoftint() implemented as a wrapper. This allows tracing, while still
* providing a way for kmdb to sneak in unmolested.
*/
void
kdi_siron(void)
{
if (siron1_inum != 0)
else
siron1_pending = 1;
}
void
{
}
/*
* Generates softlevel1 interrupt on current CPU if it
* is not pending already.
*/
void
siron(void)
{
if (siron1_inum != 0) {
/*
* Once siron_cpu_inum has been allocated, we can
* use per-CPU siron inum.
*/
else
inum = siron1_inum;
} else
siron1_pending = 1;
}
static void
siron_init(void)
{
/*
* We just allocate memory for per-cpu siron right now. Rest of
* the work is done when CPU is configured.
*/
}
/*
* This routine creates per-CPU siron inum for CPUs which are
* configured during boot.
*/
void
{
cpu_t *c;
/*
* Get the memory for per-CPU siron inums
*/
siron_init();
c = cpu_list;
do {
}
/*
* siron_poke_cpu_intr - cross-call handler.
*/
/* ARGSUSED */
{
/* generate level1 softint */
siron();
return (1);
}
/*
* This routine generates a cross-call on target CPU(s).
*/
void
{
siron();
if (CPUSET_ISNULL(poke))
return;
}
}
/*
* This callback function allows us to create per-CPU siron inum.
*/
/* ARGSUSED */
static int
{
switch (what) {
case CPU_CONFIG:
break;
case CPU_UNCONFIG:
break;
default:
break;
}
return (0);
}
/*
* no_ivintr()
* called by setvecint_tl1() through sys_trap()
* vector interrupt received but not valid or not
* registered in intr_vec_table
* considered as a spurious mondo interrupt
*/
/* ARGSUSED */
void
{
if (!ignore_invalid_vecintr)
#ifdef DEBUG_VEC_INTR
#endif /* DEBUG_VEC_INTR */
}
void
{
/* Find a matching entry in the list */
break;
}
/* Remove entry from list */
else
}
/* Clear pending interrupts at this level if the list is empty */
}
}
/*
* Send a directed interrupt of specified interrupt number id to a cpu.
*/
void
int cpuix, /* cpu to be interrupted */
int intr_id) /* interrupt number id */
{
}
/*
* Take the specified CPU out of participation in interrupts.
* Called by p_online(2) when a processor is being taken off-line.
* This allows interrupt threads being handled on the processor to
* complete before the processor is idled.
*/
int
{
/*
* Turn off the CPU_ENABLE flag before calling the redistribution
* function, since it checks for this in the cpu flags.
*/
return (0);
}
/*
* Allow the specified CPU to participate in interrupts.
* Called by p_online(2) if a processor could not be taken off-line
* because of bound threads, in order to resume processing interrupts.
* Also called after starting a processor.
*/
void
{
}
/*
* Add function to callback list for intr_redist_all_cpus. We keep two lists,
* one for weighted callbacks and one for normal callbacks. Weighted callbacks
* are issued to redirect interrupts of a specified weight, from heavy to
* light. This allows all the interrupts of a given weight to be redistributed
* for all weighted nexus drivers prior to those of less weight.
*/
static void
{
/* Add to tail so that redistribution occurs in original order. */
/* check for problems as we locate the tail */
/*NOTREACHED*/
}
}
}
void
{
}
void
{
}
/*
* Search for the interrupt distribution structure with the specified
* mondo vec reg in the interrupt distribution list. If a match is found,
* then delete the entry from the list. The caller is responsible for
* modifying the mondo vector registers.
*/
static void
{
return;
}
}
if (!panicstr)
}
void
{
}
void
{
}
/*
* Initiate interrupt redistribution. Redistribution improves the isolation
* associated with interrupt weights by ordering operations from heavy weight
* to light weight. When a CPUs orientation changes relative to interrupts,
* there is *always* a redistribution to accommodate this change (call to
* that a redistribution could improve the quality of an initialization. For
* example, if you are not using a NIC it may not be attached with s10 (devfs).
* If you then configure the NIC (ifconfig), this may cause the NIC to attach
* and plumb interrupts. The CPU assignment for the NIC's interrupts is
* occurring late, so optimal "isolation" relative to weight is not occurring.
* The same applies to detach, although in this case doing the redistribution
* might improve "spread" for medium weight devices since the "isolation" of
* a higher weight device may no longer be present.
*
* NB: We should provide a utility to trigger redistribution (ala "intradm -r").
*
* NB: There is risk associated with automatically triggering execution of the
* redistribution code at arbitrary times. The risk comes from the fact that
* there is a lot of low-level hardware interaction associated with a
* redistribution. At some point we may want this code to perform automatic
* weight delta is large enough, and call cv_signal from timeout - causing
* thead to call i_ddi_intr_redist_all_cpus()) but this is considered too
* risky at this time.
*/
void
{
}
/*
* Redistribute all interrupts
*
* This function redistributes all interrupting devices, running the
* parent callback functions for each node.
*/
void
intr_redist_all_cpus(void)
{
/*
* zero cpu_intr_weight on all cpus - it is safe to traverse
* cpu_list since we hold cpu_lock.
*/
do {
cp->cpu_intr_weight = 0;
/*
* Assume that this redistribution may encounter a device weight
* via driver.conf tuning of "ddi-intr-weight" that is at most
* intr_dist_weight_maxfactor times larger.
*/
intr_dist_weight_max = 1;
"intr_redist_all_cpus: %d-0\n", max_weight));
/*
* Redistribute weighted, from heavy to light. The callback that
* specifies a weight equal to weight_max should redirect all
* interrupts of weight weight_max or greater [weight_max, inf.).
* Interrupts of lesser weight should be processed on the call with
* the matching weight. This allows all the heaver weight interrupts
* on all weighted busses (multiple pci busses) to be redirected prior
* to any lesser weight interrupts.
*/
/* redistribute normal (non-weighted) interrupts */
}
void
{
}
/*
* Determine what CPU to target, based on interrupt policy.
*
* INTR_FLAT_DIST: hold a current CPU pointer in a static variable and
* advance through interrupt enabled cpus (round-robin).
*
* INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest
* cpu_intr_weight, round robin when all equal.
*
* Weighted interrupt distribution provides two things: "spread" of weight
* (associated with algorithm itself) and "isolation" (associated with a
* particular device weight). A redistribution is what provides optimal
* "isolation" of heavy weight interrupts, optimal "spread" of weight
* (relative to what came before) is always occurring.
*
* An interrupt weight is a subjective number that represents the
* percentage of a CPU required to service a device's interrupts: the
* default weight is 0% (however the algorithm still maintains
* round-robin), a network interface controller (NIC) may have a large
* weight (35%). Interrupt weight only has meaning relative to the
* interrupt weight of other devices: a CPU can be weighted more than
* 100%, and a single device might consume more than 100% of a CPU.
*
* A coarse interrupt weight can be defined by the parent nexus driver
* based on bus specific information, like pci class codes. A nexus
* driver that supports device interrupt weighting for its children
* should call intr_dist_cpuid_add/rem_device_weight(), which adds
* and removes the weight of a device from the CPU that an interrupt
* is directed at. The quality of initialization improves when the
* device interrupt weights more accuracy reflect actual run-time weights,
* and as the assignments are ordered from is heavy to light.
*
* The implementation also supports interrupt weight being specified in
* driver.conf files via the property "ddi-intr-weight", which takes
* precedence over the nexus supplied weight. This support is added to
* permit possible tweaking in the product in response to customer
* problems. This is not a formal or committed interface.
*
* While a weighted approach chooses the CPU providing the best spread
* given past weights, less than optimal isolation can result in cases
* where heavy weight devices show up last. The nexus driver's interrupt
* redistribution logic should use intr_dist_add/rem_weighted so that
* interrupts can be redistributed heavy first for optimal isolation.
*/
intr_dist_cpuid(void)
{
int cpuid = -1;
/* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */
switch (intr_policy) {
case INTR_CURRENT_CPU:
break;
case INTR_BOOT_CPU:
panic("INTR_BOOT_CPU no longer supported.");
/*NOTREACHED*/
case INTR_FLAT_DIST:
case INTR_WEIGHTED_DIST:
default:
/*
* Ensure that curr_cpu is valid - cpu_next will be NULL if
* the cpu has been deleted (cpu structs are never freed).
*/
/*
* Advance to online CPU after curr_cpu (round-robin). For
* INTR_WEIGHTED_DIST we choose the cpu with the lightest
* weight. For a nexus that does not support weight the
* default weight of zero is used. We degrade to round-robin
* behavior among equal weightes. The default weight is zero
* and round-robin behavior continues.
*
* Disable preemption while traversing cpu_next_onln to
* ensure the list does not change. This works because
* modifiers of this list and other lists in a struct cpu
* call pause_cpus() before making changes.
*/
do {
/* Skip CPUs with interrupts disabled */
continue;
if (intr_policy == INTR_FLAT_DIST) {
/* select CPU */
break;
/* Choose if lighter weight */
}
/* update static pointer for next round-robin */
break;
}
return (cpuid);
}
/*
* Add or remove the the weight of a device from a CPUs interrupt weight.
*
* We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for
* their children to improve the overall quality of interrupt initialization.
*
* If a nexues shares the CPU returned by a single intr_dist_cpuid() call
* among multiple devices (sharing ino) then the nexus should call
* intr_dist_cpuid_add/rem_device_weight for each device separately. Devices
* that share must specify the same cpuid.
*
* If a nexus driver is unable to determine the cpu at remove_intr time
* for some of its interrupts, then it should not call add_device_weight -
* intr_dist_cpuid will still provide round-robin.
*
* An established device weight (from dev_info node) takes precedence over
* the weight passed in. If a device weight is not already established
* then the passed in nexus weight is established.
*/
void
{
/*
* For non-weighted policy everything has weight of zero (and we get
* round-robin distribution from intr_dist_cpuid).
* NB: intr_policy is limited to this file. A weighted nexus driver is
* calls this rouitne even if intr_policy has been patched to
* INTR_FLAG_DIST.
*/
if (intr_policy != INTR_WEIGHTED_DIST)
return;
/* if no establish weight, establish nexus weight */
if (eweight < 0) {
if (nweight > 0)
else
nweight = 0;
} else
/* Establish exclusion for cpu_intr_weight manipulation */
/* update intr_dist_weight_max */
if (nweight > intr_dist_weight_max)
}
void
{
if (intr_policy != INTR_WEIGHTED_DIST)
return;
/* remove weight of device from cpu */
if (weight < 0)
weight = 0;
/* Establish exclusion for cpu_intr_weight manipulation */
if (cp->cpu_intr_weight < 0)
}
{
}
void
{
else
}
void
{
(void) rem_softintr(inum);
}
void
{
}