px_ib.c revision d17daf0b0e859a92357ab542e9070251d02dfa1f
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* PX Interrupt Block implementation
*/
#include <sys/types.h>
#include <sys/kmem.h>
#include <sys/async.h>
#include <sys/systm.h> /* panicstr */
#include <sys/spl.h>
#include <sys/sunddi.h>
#include <sys/machsystm.h> /* intr_dist_add */
#include <sys/ddi_impldefs.h>
#include <sys/cpuvar.h>
#include <sys/time.h>
#include "px_obj.h"
/*LINTLIBRARY*/
static void px_ib_intr_redist(void *arg, int32_t weight_max, int32_t weight);
static void px_ib_cpu_ticks_to_ih_nsec(px_ib_t *ib_p, px_ih_t *ih_p,
uint32_t cpu_id);
static uint_t px_ib_intr_reset(void *arg);
static void px_fill_in_intr_devs(pcitool_intr_dev_t *dev, char *driver_name,
char *path_name, int instance);
extern uint64_t xc_tick_jump_limit;
int
px_ib_attach(px_t *px_p)
{
dev_info_t *dip = px_p->px_dip;
px_ib_t *ib_p;
sysino_t sysino;
px_fault_t *fault_p = &px_p->px_fault;
DBG(DBG_IB, dip, "px_ib_attach\n");
if (px_lib_intr_devino_to_sysino(px_p->px_dip,
px_p->px_inos[PX_INTR_PEC], &sysino) != DDI_SUCCESS)
return (DDI_FAILURE);
/*
* Allocate interrupt block state structure and link it to
* the px state structure.
*/
ib_p = kmem_zalloc(sizeof (px_ib_t), KM_SLEEP);
px_p->px_ib_p = ib_p;
ib_p->ib_px_p = px_p;
ib_p->ib_ino_lst = (px_ino_t *)NULL;
mutex_init(&ib_p->ib_intr_lock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ib_p->ib_ino_lst_mutex, NULL, MUTEX_DRIVER, NULL);
bus_func_register(BF_TYPE_RESINTR, px_ib_intr_reset, ib_p);
intr_dist_add_weighted(px_ib_intr_redist, ib_p);
/*
* Initialize PEC fault data structure
*/
fault_p->px_fh_dip = dip;
fault_p->px_fh_sysino = sysino;
fault_p->px_err_func = px_err_dmc_pec_intr;
fault_p->px_intr_ino = px_p->px_inos[PX_INTR_PEC];
return (DDI_SUCCESS);
}
void
px_ib_detach(px_t *px_p)
{
px_ib_t *ib_p = px_p->px_ib_p;
dev_info_t *dip = px_p->px_dip;
DBG(DBG_IB, dip, "px_ib_detach\n");
bus_func_unregister(BF_TYPE_RESINTR, px_ib_intr_reset, ib_p);
intr_dist_rem_weighted(px_ib_intr_redist, ib_p);
mutex_destroy(&ib_p->ib_ino_lst_mutex);
mutex_destroy(&ib_p->ib_intr_lock);
px_ib_free_ino_all(ib_p);
px_p->px_ib_p = NULL;
kmem_free(ib_p, sizeof (px_ib_t));
}
void
px_ib_intr_enable(px_t *px_p, cpuid_t cpu_id, devino_t ino)
{
px_ib_t *ib_p = px_p->px_ib_p;
sysino_t sysino;
/*
* Determine the cpu for the interrupt
*/
mutex_enter(&ib_p->ib_intr_lock);
DBG(DBG_IB, px_p->px_dip,
"px_ib_intr_enable: ino=%x cpu_id=%x\n", ino, cpu_id);
if (px_lib_intr_devino_to_sysino(px_p->px_dip, ino,
&sysino) != DDI_SUCCESS) {
DBG(DBG_IB, px_p->px_dip,
"px_ib_intr_enable: px_intr_devino_to_sysino() failed\n");
mutex_exit(&ib_p->ib_intr_lock);
return;
}
PX_INTR_ENABLE(px_p->px_dip, sysino, cpu_id);
px_lib_intr_setstate(px_p->px_dip, sysino, INTR_IDLE_STATE);
mutex_exit(&ib_p->ib_intr_lock);
}
/*ARGSUSED*/
void
px_ib_intr_disable(px_ib_t *ib_p, devino_t ino, int wait)
{
sysino_t sysino;
mutex_enter(&ib_p->ib_intr_lock);
DBG(DBG_IB, ib_p->ib_px_p->px_dip, "px_ib_intr_disable: ino=%x\n", ino);
/* Disable the interrupt */
if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip, ino,
&sysino) != DDI_SUCCESS) {
DBG(DBG_IB, ib_p->ib_px_p->px_dip,
"px_ib_intr_disable: px_intr_devino_to_sysino() failed\n");
mutex_exit(&ib_p->ib_intr_lock);
return;
}
PX_INTR_DISABLE(ib_p->ib_px_p->px_dip, sysino);
mutex_exit(&ib_p->ib_intr_lock);
}
void
px_ib_intr_dist_en(dev_info_t *dip, cpuid_t cpu_id, devino_t ino,
boolean_t wait_flag)
{
uint32_t old_cpu_id;
sysino_t sysino;
intr_valid_state_t enabled = 0;
hrtime_t start_time, prev, curr, interval, jump;
hrtime_t intr_timeout;
intr_state_t intr_state;
int e = DDI_SUCCESS;
DBG(DBG_IB, dip, "px_ib_intr_dist_en: ino=0x%x\n", ino);
if (px_lib_intr_devino_to_sysino(dip, ino, &sysino) != DDI_SUCCESS) {
DBG(DBG_IB, dip, "px_ib_intr_dist_en: "
"px_intr_devino_to_sysino() failed, ino 0x%x\n", ino);
return;
}
/* Skip enabling disabled interrupts */
if (px_lib_intr_getvalid(dip, sysino, &enabled) != DDI_SUCCESS) {
DBG(DBG_IB, dip, "px_ib_intr_dist_en: px_intr_getvalid() "
"failed, sysino 0x%x\n", sysino);
return;
}
if (!enabled)
return;
/* Done if redistributed onto the same cpuid */
if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) != DDI_SUCCESS) {
DBG(DBG_IB, dip, "px_ib_intr_dist_en: "
"px_intr_gettarget() failed\n");
return;
}
if (cpu_id == old_cpu_id)
return;
if (!wait_flag)
goto done;
/* Busy wait on pending interrupts */
PX_INTR_DISABLE(dip, sysino);
intr_timeout = px_intrpend_timeout;
jump = TICK_TO_NSEC(xc_tick_jump_limit);
for (curr = start_time = gethrtime(); !panicstr &&
((e = px_lib_intr_getstate(dip, sysino, &intr_state)) ==
DDI_SUCCESS) &&
(intr_state == INTR_DELIVERED_STATE); /* */) {
/*
* If we have a really large jump in hrtime, it is most
* probably because we entered the debugger (or OBP,
* in general). So, we adjust the timeout accordingly
* to prevent declaring an interrupt timeout. The
* master-interrupt mechanism in OBP should deliver
* the interrupts properly.
*/
prev = curr;
curr = gethrtime();
interval = curr - prev;
if (interval > jump)
intr_timeout += interval;
if (curr - start_time > intr_timeout) {
cmn_err(CE_WARN,
"%s%d: px_ib_intr_dist_en: sysino 0x%lx(ino 0x%x) "
"from cpu id 0x%x to 0x%x timeout",
ddi_driver_name(dip), ddi_get_instance(dip),
sysino, ino, old_cpu_id, cpu_id);
e = DDI_FAILURE;
break;
}
}
if (e != DDI_SUCCESS)
DBG(DBG_IB, dip, "px_ib_intr_dist_en: failed, "
"ino 0x%x sysino 0x%x\n", ino, sysino);
done:
PX_INTR_ENABLE(dip, sysino, cpu_id);
}
static void
px_ib_cpu_ticks_to_ih_nsec(px_ib_t *ib_p, px_ih_t *ih_p, uint32_t cpu_id)
{
extern kmutex_t pxintr_ks_template_lock;
hrtime_t ticks;
/*
* Because we are updating two fields in ih_t we must lock
* pxintr_ks_template_lock to prevent someone from reading the
* kstats after we set ih_ticks to 0 and before we increment
* ih_nsec to compensate.
*
* We must also protect against the interrupt arriving and incrementing
* ih_ticks between the time we read it and when we reset it to 0.
* To do this we use atomic_swap.
*/
ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
mutex_enter(&pxintr_ks_template_lock);
ticks = atomic_swap_64(&ih_p->ih_ticks, 0);
ih_p->ih_nsec += (uint64_t)tick2ns(ticks, cpu_id);
mutex_exit(&pxintr_ks_template_lock);
}
/*
* Redistribute interrupts of the specified weight. The first call has a weight
* of weight_max, which can be used to trigger initialization for
* redistribution. The inos with weight [weight_max, inf.) should be processed
* on the "weight == weight_max" call. This first call is followed by calls
* of decreasing weights, inos of that weight should be processed. The final
* call specifies a weight of zero, this can be used to trigger processing of
* stragglers.
*/
static void
px_ib_intr_redist(void *arg, int32_t weight_max, int32_t weight)
{
px_ib_t *ib_p = (px_ib_t *)arg;
px_t *px_p = ib_p->ib_px_p;
dev_info_t *dip = px_p->px_dip;
px_ino_t *ino_p;
px_ino_pil_t *ipil_p;
px_ih_t *ih_lst;
int32_t dweight = 0;
int i;
/* Redistribute internal interrupts */
if (weight == 0) {
mutex_enter(&ib_p->ib_intr_lock);
px_ib_intr_dist_en(dip, intr_dist_cpuid(),
px_p->px_inos[PX_INTR_PEC], B_FALSE);
mutex_exit(&ib_p->ib_intr_lock);
px_hp_intr_redist(px_p);
}
/* Redistribute device interrupts */
mutex_enter(&ib_p->ib_ino_lst_mutex);
px_msiq_redist(px_p);
for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next_p) {
/*
* Recomputes the sum of interrupt weights of devices that
* share the same ino upon first call marked by
* (weight == weight_max).
*/
if (weight == weight_max) {
ino_p->ino_intr_weight = 0;
for (ipil_p = ino_p->ino_ipil_p; ipil_p;
ipil_p = ipil_p->ipil_next_p) {
for (i = 0, ih_lst = ipil_p->ipil_ih_head;
i < ipil_p->ipil_ih_size; i++,
ih_lst = ih_lst->ih_next) {
dweight = i_ddi_get_intr_weight(
ih_lst->ih_dip);
if (dweight > 0)
ino_p->ino_intr_weight +=
dweight;
}
}
}
/*
* As part of redistributing weighted interrupts over cpus,
* nexus redistributes device interrupts and updates
* cpu weight. The purpose is for the most light weighted
* cpu to take the next interrupt and gain weight, therefore
* attention demanding device gains more cpu attention by
* making itself heavy.
*/
if ((weight == ino_p->ino_intr_weight) ||
((weight >= weight_max) &&
(ino_p->ino_intr_weight >= weight_max))) {
uint32_t orig_cpuid = ino_p->ino_cpuid;
if (cpu[orig_cpuid] == NULL)
orig_cpuid = CPU->cpu_id;
DBG(DBG_IB, dip, "px_ib_intr_redist: sysino 0x%llx "
"current cpuid 0x%x current default cpuid 0x%x\n",
ino_p->ino_sysino, ino_p->ino_cpuid,
ino_p->ino_default_cpuid);
/* select target cpuid and mark ino established */
if (ino_p->ino_default_cpuid == -1)
ino_p->ino_cpuid = ino_p->ino_default_cpuid =
intr_dist_cpuid();
else if ((ino_p->ino_cpuid !=
ino_p->ino_default_cpuid) &&
cpu[ino_p->ino_default_cpuid] &&
cpu_intr_on(cpu[ino_p->ino_default_cpuid]))
ino_p->ino_cpuid = ino_p->ino_default_cpuid;
else if (!cpu_intr_on(cpu[ino_p->ino_cpuid]))
ino_p->ino_cpuid = intr_dist_cpuid();
DBG(DBG_IB, dip, "px_ib_intr_redist: sysino 0x%llx "
"new cpuid 0x%x new default cpuid 0x%x\n",
ino_p->ino_sysino, ino_p->ino_cpuid,
ino_p->ino_default_cpuid);
/* Add device weight to targeted cpu. */
for (ipil_p = ino_p->ino_ipil_p; ipil_p;
ipil_p = ipil_p->ipil_next_p) {
for (i = 0, ih_lst = ipil_p->ipil_ih_head;
i < ipil_p->ipil_ih_size; i++,
ih_lst = ih_lst->ih_next) {
dweight = i_ddi_get_intr_weight(
ih_lst->ih_dip);
intr_dist_cpuid_add_device_weight(
ino_p->ino_cpuid, ih_lst->ih_dip,
dweight);
/*
* Different cpus may have different
* clock speeds. to account for this,
* whenever an interrupt is moved to a
* new CPU, we convert the accumulated
* ticks into nsec, based upon the clock
* rate of the prior CPU.
*
* It is possible that the prior CPU no
* longer exists. In this case, fall
* back to using this CPU's clock rate.
*
* Note that the value in ih_ticks has
* already been corrected for any power
* savings mode which might have been
* in effect.
*/
px_ib_cpu_ticks_to_ih_nsec(ib_p, ih_lst,
orig_cpuid);
}
}
/* enable interrupt on new targeted cpu */
px_ib_intr_dist_en(dip, ino_p->ino_cpuid,
ino_p->ino_ino, B_TRUE);
}
}
mutex_exit(&ib_p->ib_ino_lst_mutex);
}
/*
* Reset interrupts to IDLE. This function is called during
* panic handling after redistributing interrupts; it's needed to
* support dumping to network devices after 'sync' from OBP.
*
* N.B. This routine runs in a context where all other threads
* are permanently suspended.
*/
static uint_t
px_ib_intr_reset(void *arg)
{
px_ib_t *ib_p = (px_ib_t *)arg;
DBG(DBG_IB, ib_p->ib_px_p->px_dip, "px_ib_intr_reset\n");
if (px_lib_intr_reset(ib_p->ib_px_p->px_dip) != DDI_SUCCESS)
return (BF_FATAL);
return (BF_NONE);
}
/*
* Locate px_ino_t structure on ib_p->ib_ino_lst according to ino#
* returns NULL if not found.
*/
px_ino_t *
px_ib_locate_ino(px_ib_t *ib_p, devino_t ino_num)
{
px_ino_t *ino_p = ib_p->ib_ino_lst;
ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
for (; ino_p && ino_p->ino_ino != ino_num; ino_p = ino_p->ino_next_p)
;
return (ino_p);
}
px_ino_t *
px_ib_alloc_ino(px_ib_t *ib_p, devino_t ino_num)
{
sysino_t sysino;
px_ino_t *ino_p;
if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip,
ino_num, &sysino) != DDI_SUCCESS)
return (NULL);
ino_p = kmem_zalloc(sizeof (px_ino_t), KM_SLEEP);
ino_p->ino_next_p = ib_p->ib_ino_lst;
ib_p->ib_ino_lst = ino_p;
ino_p->ino_ino = ino_num;
ino_p->ino_sysino = sysino;
ino_p->ino_ib_p = ib_p;
ino_p->ino_unclaimed_intrs = 0;
ino_p->ino_lopil = 0;
ino_p->ino_cpuid = ino_p->ino_default_cpuid = (cpuid_t)-1;
return (ino_p);
}
px_ino_pil_t *
px_ib_new_ino_pil(px_ib_t *ib_p, devino_t ino_num, uint_t pil, px_ih_t *ih_p)
{
px_ino_pil_t *ipil_p = kmem_zalloc(sizeof (px_ino_pil_t), KM_SLEEP);
px_ino_t *ino_p;
if ((ino_p = px_ib_locate_ino(ib_p, ino_num)) == NULL)
ino_p = px_ib_alloc_ino(ib_p, ino_num);
ASSERT(ino_p != NULL);
ih_p->ih_next = ih_p;
ipil_p->ipil_pil = pil;
ipil_p->ipil_ih_head = ih_p;
ipil_p->ipil_ih_tail = ih_p;
ipil_p->ipil_ih_start = ih_p;
ipil_p->ipil_ih_size = 1;
ipil_p->ipil_ino_p = ino_p;
ipil_p->ipil_next_p = ino_p->ino_ipil_p;
ino_p->ino_ipil_p = ipil_p;
ino_p->ino_ipil_size++;
if ((ino_p->ino_lopil == 0) || (ino_p->ino_lopil > pil))
ino_p->ino_lopil = pil;
return (ipil_p);
}
void
px_ib_delete_ino_pil(px_ib_t *ib_p, px_ino_pil_t *ipil_p)
{
px_ino_t *ino_p = ipil_p->ipil_ino_p;
ushort_t pil = ipil_p->ipil_pil;
px_ino_pil_t *prev, *next;
ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
if (ino_p->ino_ipil_p == ipil_p)
ino_p->ino_ipil_p = ipil_p->ipil_next_p;
else {
for (prev = next = ino_p->ino_ipil_p; next != ipil_p;
prev = next, next = next->ipil_next_p)
;
if (prev)
prev->ipil_next_p = ipil_p->ipil_next_p;
}
kmem_free(ipil_p, sizeof (px_ino_pil_t));
if ((--ino_p->ino_ipil_size) && (ino_p->ino_lopil == pil)) {
for (next = ino_p->ino_ipil_p, pil = next->ipil_pil;
next; next = next->ipil_next_p) {
if (pil > next->ipil_pil)
pil = next->ipil_pil;
}
/*
* Value stored in pil should be the lowest pil.
*/
ino_p->ino_lopil = pil;
}
if (ino_p->ino_ipil_size)
return;
ino_p->ino_lopil = 0;
if (ino_p->ino_msiq_p)
return;
if (ib_p->ib_ino_lst == ino_p)
ib_p->ib_ino_lst = ino_p->ino_next_p;
else {
px_ino_t *list = ib_p->ib_ino_lst;
for (; list->ino_next_p != ino_p; list = list->ino_next_p)
;
list->ino_next_p = ino_p->ino_next_p;
}
}
/*
* Free all ino when we are detaching.
*/
void
px_ib_free_ino_all(px_ib_t *ib_p)
{
px_ino_t *ino_p = ib_p->ib_ino_lst;
px_ino_t *next = NULL;
while (ino_p) {
next = ino_p->ino_next_p;
kmem_free(ino_p, sizeof (px_ino_t));
ino_p = next;
}
}
/*
* Locate px_ino_pil_t structure on ino_p->ino_ipil_p according to ino#
* returns NULL if not found.
*/
px_ino_pil_t *
px_ib_ino_locate_ipil(px_ino_t *ino_p, uint_t pil)
{
px_ino_pil_t *ipil_p = ino_p->ino_ipil_p;
for (; ipil_p && ipil_p->ipil_pil != pil; ipil_p = ipil_p->ipil_next_p)
;
return (ipil_p);
}
int
px_ib_ino_add_intr(px_t *px_p, px_ino_pil_t *ipil_p, px_ih_t *ih_p)
{
px_ino_t *ino_p = ipil_p->ipil_ino_p;
px_ib_t *ib_p = ino_p->ino_ib_p;
devino_t ino = ino_p->ino_ino;
sysino_t sysino = ino_p->ino_sysino;
dev_info_t *dip = px_p->px_dip;
cpuid_t curr_cpu;
hrtime_t start_time;
intr_state_t intr_state;
int ret = DDI_SUCCESS;
ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
ASSERT(ib_p == px_p->px_ib_p);
DBG(DBG_IB, dip, "px_ib_ino_add_intr ino=%x\n", ino_p->ino_ino);
/* Disable the interrupt */
if ((ret = px_lib_intr_gettarget(dip, sysino,
&curr_cpu)) != DDI_SUCCESS) {
DBG(DBG_IB, dip,
"px_ib_ino_add_intr px_intr_gettarget() failed\n");
return (ret);
}
PX_INTR_DISABLE(dip, sysino);
/* Busy wait on pending interrupt */
for (start_time = gethrtime(); !panicstr &&
((ret = px_lib_intr_getstate(dip, sysino, &intr_state))
== DDI_SUCCESS) && (intr_state == INTR_DELIVERED_STATE); /* */) {
if (gethrtime() - start_time > px_intrpend_timeout) {
cmn_err(CE_WARN, "%s%d: px_ib_ino_add_intr: pending "
"sysino 0x%lx(ino 0x%x) timeout",
ddi_driver_name(dip), ddi_get_instance(dip),
sysino, ino);
ret = DDI_FAILURE;
break;
}
}
/*
* If the interrupt was previously blocked (left in pending state)
* because of jabber we need to clear the pending state in case the
* jabber has gone away.
*/
if (ino_p->ino_unclaimed_intrs > px_unclaimed_intr_max) {
cmn_err(CE_WARN,
"%s%d: px_ib_ino_add_intr: ino 0x%x has been unblocked",
ddi_driver_name(dip), ddi_get_instance(dip), ino);
ino_p->ino_unclaimed_intrs = 0;
ret = px_lib_intr_setstate(dip, sysino, INTR_IDLE_STATE);
}
if (ret != DDI_SUCCESS) {
DBG(DBG_IB, dip, "px_ib_ino_add_intr: failed, "
"ino 0x%x sysino 0x%x\n", ino, sysino);
return (ret);
}
/* Link up px_ih_t */
ih_p->ih_next = ipil_p->ipil_ih_head;
ipil_p->ipil_ih_tail->ih_next = ih_p;
ipil_p->ipil_ih_tail = ih_p;
ipil_p->ipil_ih_start = ipil_p->ipil_ih_head;
ipil_p->ipil_ih_size++;
/* Re-enable interrupt */
PX_INTR_ENABLE(dip, sysino, curr_cpu);
return (ret);
}
/*
* Removes px_ih_t from the ino's link list.
* uses hardware mutex to lock out interrupt threads.
* Side effects: interrupt belongs to that ino is turned off on return.
* if we are sharing PX slot with other inos, the caller needs
* to turn it back on.
*/
int
px_ib_ino_rem_intr(px_t *px_p, px_ino_pil_t *ipil_p, px_ih_t *ih_p)
{
px_ino_t *ino_p = ipil_p->ipil_ino_p;
devino_t ino = ino_p->ino_ino;
sysino_t sysino = ino_p->ino_sysino;
dev_info_t *dip = px_p->px_dip;
px_ih_t *ih_lst = ipil_p->ipil_ih_head;
hrtime_t start_time;
intr_state_t intr_state;
int i, ret = DDI_SUCCESS;
ASSERT(MUTEX_HELD(&ino_p->ino_ib_p->ib_ino_lst_mutex));
DBG(DBG_IB, px_p->px_dip, "px_ib_ino_rem_intr ino=%x\n",
ino_p->ino_ino);
/* Disable the interrupt */
PX_INTR_DISABLE(px_p->px_dip, sysino);
if (ipil_p->ipil_ih_size == 1) {
if (ih_lst != ih_p)
goto not_found;
/* No need to set head/tail as ino_p will be freed */
goto reset;
}
/* Busy wait on pending interrupt */
for (start_time = gethrtime(); !panicstr &&
((ret = px_lib_intr_getstate(dip, sysino, &intr_state))
== DDI_SUCCESS) && (intr_state == INTR_DELIVERED_STATE); /* */) {
if (gethrtime() - start_time > px_intrpend_timeout) {
cmn_err(CE_WARN, "%s%d: px_ib_ino_rem_intr: pending "
"sysino 0x%lx(ino 0x%x) timeout",
ddi_driver_name(dip), ddi_get_instance(dip),
sysino, ino);
ret = DDI_FAILURE;
break;
}
}
/*
* If the interrupt was previously blocked (left in pending state)
* because of jabber we need to clear the pending state in case the
* jabber has gone away.
*/
if (ino_p->ino_unclaimed_intrs > px_unclaimed_intr_max) {
cmn_err(CE_WARN, "%s%d: px_ib_ino_rem_intr: "
"ino 0x%x has been unblocked",
ddi_driver_name(dip), ddi_get_instance(dip), ino);
ino_p->ino_unclaimed_intrs = 0;
ret = px_lib_intr_setstate(dip, sysino, INTR_IDLE_STATE);
}
if (ret != DDI_SUCCESS) {
DBG(DBG_IB, dip, "px_ib_ino_rem_intr: failed, "
"ino 0x%x sysino 0x%x\n", ino, sysino);
return (ret);
}
/* Search the link list for ih_p */
for (i = 0; (i < ipil_p->ipil_ih_size) &&
(ih_lst->ih_next != ih_p); i++, ih_lst = ih_lst->ih_next)
;
if (ih_lst->ih_next != ih_p)
goto not_found;
/* Remove ih_p from the link list and maintain the head/tail */
ih_lst->ih_next = ih_p->ih_next;
if (ipil_p->ipil_ih_head == ih_p)
ipil_p->ipil_ih_head = ih_p->ih_next;
if (ipil_p->ipil_ih_tail == ih_p)
ipil_p->ipil_ih_tail = ih_lst;
ipil_p->ipil_ih_start = ipil_p->ipil_ih_head;
reset:
if (ih_p->ih_config_handle)
pci_config_teardown(&ih_p->ih_config_handle);
if (ih_p->ih_ksp != NULL)
kstat_delete(ih_p->ih_ksp);
kmem_free(ih_p, sizeof (px_ih_t));
ipil_p->ipil_ih_size--;
return (ret);
not_found:
DBG(DBG_R_INTX, ino_p->ino_ib_p->ib_px_p->px_dip,
"ino_p=%x does not have ih_p=%x\n", ino_p, ih_p);
return (DDI_FAILURE);
}
px_ih_t *
px_ib_intr_locate_ih(px_ino_pil_t *ipil_p, dev_info_t *rdip,
uint32_t inum, msiq_rec_type_t rec_type, msgcode_t msg_code)
{
px_ih_t *ih_p = ipil_p->ipil_ih_head;
int i;
for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
if ((ih_p->ih_dip == rdip) && (ih_p->ih_inum == inum) &&
(ih_p->ih_rec_type == rec_type) &&
(ih_p->ih_msg_code == msg_code))
return (ih_p);
}
return ((px_ih_t *)NULL);
}
px_ih_t *
px_ib_alloc_ih(dev_info_t *rdip, uint32_t inum,
uint_t (*int_handler)(caddr_t int_handler_arg1, caddr_t int_handler_arg2),
caddr_t int_handler_arg1, caddr_t int_handler_arg2,
msiq_rec_type_t rec_type, msgcode_t msg_code)
{
px_ih_t *ih_p;
ih_p = kmem_alloc(sizeof (px_ih_t), KM_SLEEP);
ih_p->ih_dip = rdip;
ih_p->ih_inum = inum;
ih_p->ih_intr_state = PX_INTR_STATE_DISABLE;
ih_p->ih_intr_flags = PX_INTR_IDLE;
ih_p->ih_handler = int_handler;
ih_p->ih_handler_arg1 = int_handler_arg1;
ih_p->ih_handler_arg2 = int_handler_arg2;
ih_p->ih_config_handle = NULL;
ih_p->ih_rec_type = rec_type;
ih_p->ih_msg_code = msg_code;
ih_p->ih_nsec = 0;
ih_p->ih_ticks = 0;
ih_p->ih_ksp = NULL;
return (ih_p);
}
int
px_ib_update_intr_state(px_t *px_p, dev_info_t *rdip,
uint_t inum, devino_t ino, uint_t pil,
uint_t new_intr_state, msiq_rec_type_t rec_type,
msgcode_t msg_code)
{
px_ib_t *ib_p = px_p->px_ib_p;
px_ino_t *ino_p;
px_ino_pil_t *ipil_p;
px_ih_t *ih_p;
int ret = DDI_FAILURE;
DBG(DBG_IB, px_p->px_dip, "px_ib_update_intr_state: %s%d "
"inum %x devino %x pil %x state %x\n", ddi_driver_name(rdip),
ddi_get_instance(rdip), inum, ino, pil, new_intr_state);
mutex_enter(&ib_p->ib_ino_lst_mutex);
ino_p = px_ib_locate_ino(ib_p, ino);
if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, pil))) {
if (ih_p = px_ib_intr_locate_ih(ipil_p, rdip, inum, rec_type,
msg_code)) {
ih_p->ih_intr_state = new_intr_state;
ret = DDI_SUCCESS;
}
}
mutex_exit(&ib_p->ib_ino_lst_mutex);
return (ret);
}
/*
* Get interrupt CPU for a given ino.
* Return info only for inos which are already mapped to devices.
*/
/*ARGSUSED*/
int
px_ib_get_intr_target(px_t *px_p, devino_t ino, cpuid_t *cpu_id_p)
{
dev_info_t *dip = px_p->px_dip;
sysino_t sysino;
int ret;
DBG(DBG_IB, px_p->px_dip, "px_ib_get_intr_target: devino %x\n", ino);
/* Convert leaf-wide intr to system-wide intr */
if (px_lib_intr_devino_to_sysino(dip, ino, &sysino) != DDI_SUCCESS)
return (DDI_FAILURE);
ret = px_lib_intr_gettarget(dip, sysino, cpu_id_p);
DBG(DBG_IB, px_p->px_dip, "px_ib_get_intr_target: cpu_id %x\n",
*cpu_id_p);
return (ret);
}
/*
* Associate a new CPU with a given ino.
* Operate only on INOs which are already mapped to devices.
*/
int
px_ib_set_intr_target(px_t *px_p, devino_t ino, cpuid_t cpu_id)
{
dev_info_t *dip = px_p->px_dip;
cpuid_t old_cpu_id;
sysino_t sysino;
int ret = DDI_SUCCESS;
extern const int _ncpu;
extern cpu_t *cpu[];
DBG(DBG_IB, px_p->px_dip, "px_ib_set_intr_target: devino %x "
"cpu_id %x\n", ino, cpu_id);
mutex_enter(&cpu_lock);
/* Convert leaf-wide intr to system-wide intr */
if (px_lib_intr_devino_to_sysino(dip, ino, &sysino) != DDI_SUCCESS) {
ret = DDI_FAILURE;
goto done;
}
if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) != DDI_SUCCESS) {
ret = DDI_FAILURE;
goto done;
}
/*
* Get lock, validate cpu and write it.
*/
if ((cpu_id < _ncpu) && (cpu[cpu_id] && cpu_is_online(cpu[cpu_id]))) {
DBG(DBG_IB, dip, "px_ib_set_intr_target: Enabling CPU %d\n",
cpu_id);
px_ib_intr_dist_en(dip, cpu_id, ino, B_TRUE);
px_ib_log_new_cpu(px_p->px_ib_p, old_cpu_id, cpu_id, ino);
} else { /* Invalid cpu */
DBG(DBG_IB, dip, "px_ib_set_intr_target: Invalid cpuid %x\n",
cpu_id);
ret = DDI_EINVAL;
}
done:
mutex_exit(&cpu_lock);
return (ret);
}
hrtime_t px_ib_msix_retarget_timeout = 120ll * NANOSEC; /* 120 seconds */
/*
* Associate a new CPU with a given MSI/X.
* Operate only on MSI/Xs which are already mapped to devices.
*/
int
px_ib_set_msix_target(px_t *px_p, ddi_intr_handle_impl_t *hdlp,
msinum_t msi_num, cpuid_t cpu_id)
{
px_ib_t *ib_p = px_p->px_ib_p;
px_msi_state_t *msi_state_p = &px_p->px_ib_p->ib_msi_state;
dev_info_t *dip = px_p->px_dip;
dev_info_t *rdip = hdlp->ih_dip;
msiqid_t msiq_id, old_msiq_id;
pci_msi_state_t msi_state;
msiq_rec_type_t msiq_rec_type;
msi_type_t msi_type;
px_ino_t *ino_p;
px_ih_t *ih_p, *old_ih_p;
cpuid_t old_cpu_id;
hrtime_t start_time, end_time;
int ret = DDI_SUCCESS;
extern const int _ncpu;
extern cpu_t *cpu[];
DBG(DBG_IB, dip, "px_ib_set_msix_target: msi_num %x new cpu_id %x\n",
msi_num, cpu_id);
mutex_enter(&cpu_lock);
/* Check for MSI64 support */
if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) {
msiq_rec_type = MSI64_REC;
msi_type = MSI64_TYPE;
} else {
msiq_rec_type = MSI32_REC;
msi_type = MSI32_TYPE;
}
if ((ret = px_lib_msi_getmsiq(dip, msi_num,
&old_msiq_id)) != DDI_SUCCESS) {
mutex_exit(&cpu_lock);
return (ret);
}
DBG(DBG_IB, dip, "px_ib_set_msix_target: current msiq 0x%x\n",
old_msiq_id);
if ((ret = px_ib_get_intr_target(px_p,
px_msiqid_to_devino(px_p, old_msiq_id),
&old_cpu_id)) != DDI_SUCCESS) {
mutex_exit(&cpu_lock);
return (ret);
}
DBG(DBG_IB, dip, "px_ib_set_msix_target: current cpuid 0x%x\n",
old_cpu_id);
if (cpu_id == old_cpu_id) {
mutex_exit(&cpu_lock);
return (DDI_SUCCESS);
}
/*
* Get lock, validate cpu and write it.
*/
if (!((cpu_id < _ncpu) && (cpu[cpu_id] &&
cpu_is_online(cpu[cpu_id])))) {
/* Invalid cpu */
DBG(DBG_IB, dip, "px_ib_set_msix_target: Invalid cpuid %x\n",
cpu_id);
mutex_exit(&cpu_lock);
return (DDI_EINVAL);
}
DBG(DBG_IB, dip, "px_ib_set_msix_target: Enabling CPU %d\n", cpu_id);
if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
msiq_rec_type, msi_num, cpu_id, &msiq_id)) != DDI_SUCCESS) {
DBG(DBG_IB, dip, "px_ib_set_msix_target: Add MSI handler "
"failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
mutex_exit(&cpu_lock);
return (ret);
}
if ((ret = px_lib_msi_setmsiq(dip, msi_num,
msiq_id, msi_type)) != DDI_SUCCESS) {
mutex_exit(&cpu_lock);
(void) px_rem_msiq_intr(dip, rdip,
hdlp, msiq_rec_type, msi_num, msiq_id);
return (ret);
}
if ((ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri,
PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num)) != DDI_SUCCESS) {
mutex_exit(&cpu_lock);
(void) px_rem_msiq_intr(dip, rdip,
hdlp, msiq_rec_type, msi_num, msiq_id);
return (ret);
}
mutex_exit(&cpu_lock);
/*
* Remove the old handler, but first ensure it is finished.
*
* Each handler sets its PENDING flag before it clears the MSI state.
* Then it clears that flag when finished. If a re-target occurs while
* the MSI state is DELIVERED, then it is not yet known which of the
* two handlers will take the interrupt. So the re-target operation
* sets a RETARGET flag on both handlers in that case. Monitoring both
* flags on both handlers then determines when the old handler can be
* be safely removed.
*/
mutex_enter(&ib_p->ib_ino_lst_mutex);
ino_p = px_ib_locate_ino(ib_p, px_msiqid_to_devino(px_p, old_msiq_id));
old_ih_p = px_ib_intr_locate_ih(px_ib_ino_locate_ipil(ino_p,
hdlp->ih_pri), rdip, hdlp->ih_inum, msiq_rec_type, msi_num);
ino_p = px_ib_locate_ino(ib_p, px_msiqid_to_devino(px_p, msiq_id));
ih_p = px_ib_intr_locate_ih(px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri),
rdip, hdlp->ih_inum, msiq_rec_type, msi_num);
if ((ret = px_lib_msi_getstate(dip, msi_num,
&msi_state)) != DDI_SUCCESS) {
(void) px_rem_msiq_intr(dip, rdip,
hdlp, msiq_rec_type, msi_num, msiq_id);
mutex_exit(&ib_p->ib_ino_lst_mutex);
return (ret);
}
if (msi_state == PCI_MSI_STATE_DELIVERED) {
ih_p->ih_intr_flags |= PX_INTR_RETARGET;
old_ih_p->ih_intr_flags |= PX_INTR_RETARGET;
}
start_time = gethrtime();
while (((ih_p->ih_intr_flags & PX_INTR_RETARGET) &&
(old_ih_p->ih_intr_flags & PX_INTR_RETARGET)) ||
(old_ih_p->ih_intr_flags & PX_INTR_PENDING)) {
/* Wait for one second */
delay(drv_usectohz(1000000));
end_time = gethrtime() - start_time;
if (end_time > px_ib_msix_retarget_timeout) {
cmn_err(CE_WARN, "MSIX retarget %x is not completed, "
"even after waiting %llx ticks\n",
msi_num, end_time);
break;
}
}
ih_p->ih_intr_flags &= ~(PX_INTR_RETARGET);
mutex_exit(&ib_p->ib_ino_lst_mutex);
ret = px_rem_msiq_intr(dip, rdip,
hdlp, msiq_rec_type, msi_num, old_msiq_id);
return (ret);
}
static void
px_fill_in_intr_devs(pcitool_intr_dev_t *dev, char *driver_name,
char *path_name, int instance)
{
(void) strncpy(dev->driver_name, driver_name, MAXMODCONFNAME-1);
dev->driver_name[MAXMODCONFNAME] = '\0';
(void) strncpy(dev->path, path_name, MAXPATHLEN-1);
dev->dev_inst = instance;
}
/*
* Return the dips or number of dips associated with a given interrupt block.
* Size of dips array arg is passed in as dips_ret arg.
* Number of dips returned is returned in dips_ret arg.
* Array of dips gets returned in the dips argument.
* Function returns number of dips existing for the given interrupt block.
*
* Note: this function assumes an enabled/valid INO, which is why it returns
* the px node and (Internal) when it finds no other devices (and *devs_ret > 0)
*/
uint8_t
pxtool_ib_get_ino_devs(px_t *px_p, uint32_t ino, uint32_t msi_num,
uint8_t *devs_ret, pcitool_intr_dev_t *devs)
{
px_ib_t *ib_p = px_p->px_ib_p;
px_ino_t *ino_p;
px_ino_pil_t *ipil_p;
px_ih_t *ih_p;
uint32_t num_devs = 0;
char pathname[MAXPATHLEN];
int i, j;
mutex_enter(&ib_p->ib_ino_lst_mutex);
ino_p = px_ib_locate_ino(ib_p, ino);
if (ino_p != NULL) {
for (j = 0, ipil_p = ino_p->ino_ipil_p; ipil_p;
ipil_p = ipil_p->ipil_next_p) {
num_devs += ipil_p->ipil_ih_size;
for (i = 0, ih_p = ipil_p->ipil_ih_head;
((i < ipil_p->ipil_ih_size) && (i < *devs_ret));
i++, j++, ih_p = ih_p->ih_next) {
(void) ddi_pathname(ih_p->ih_dip, pathname);
if (ih_p->ih_msg_code == msi_num) {
num_devs = *devs_ret = 1;
px_fill_in_intr_devs(&devs[0],
(char *)ddi_driver_name(
ih_p->ih_dip), pathname,
ddi_get_instance(ih_p->ih_dip));
goto done;
}
px_fill_in_intr_devs(&devs[j],
(char *)ddi_driver_name(ih_p->ih_dip),
pathname, ddi_get_instance(ih_p->ih_dip));
}
}
*devs_ret = j;
} else if (*devs_ret > 0) {
(void) ddi_pathname(px_p->px_dip, pathname);
strcat(pathname, " (Internal)");
px_fill_in_intr_devs(&devs[0],
(char *)ddi_driver_name(px_p->px_dip), pathname,
ddi_get_instance(px_p->px_dip));
num_devs = *devs_ret = 1;
}
done:
mutex_exit(&ib_p->ib_ino_lst_mutex);
return (num_devs);
}
int
pxtool_ib_get_msi_info(px_t *px_p, devino_t ino, msinum_t msi_num,
ddi_intr_handle_impl_t *hdlp)
{
px_ib_t *ib_p = px_p->px_ib_p;
px_ino_t *ino_p;
px_ino_pil_t *ipil_p;
px_ih_t *ih_p;
int i;
mutex_enter(&ib_p->ib_ino_lst_mutex);
if ((ino_p = px_ib_locate_ino(ib_p, ino)) == NULL) {
mutex_exit(&ib_p->ib_ino_lst_mutex);
return (DDI_FAILURE);
}
for (ipil_p = ino_p->ino_ipil_p; ipil_p;
ipil_p = ipil_p->ipil_next_p) {
for (i = 0, ih_p = ipil_p->ipil_ih_head;
((i < ipil_p->ipil_ih_size) && ih_p);
i++, ih_p = ih_p->ih_next) {
if (ih_p->ih_msg_code != msi_num)
continue;
hdlp->ih_dip = ih_p->ih_dip;
hdlp->ih_inum = ih_p->ih_inum;
hdlp->ih_cb_func = ih_p->ih_handler;
hdlp->ih_cb_arg1 = ih_p->ih_handler_arg1;
hdlp->ih_cb_arg2 = ih_p->ih_handler_arg2;
if (ih_p->ih_rec_type == MSI64_REC)
hdlp->ih_cap = DDI_INTR_FLAG_MSI64;
hdlp->ih_pri = ipil_p->ipil_pil;
hdlp->ih_ver = DDI_INTR_VERSION;
mutex_exit(&ib_p->ib_ino_lst_mutex);
return (DDI_SUCCESS);
}
}
mutex_exit(&ib_p->ib_ino_lst_mutex);
return (DDI_FAILURE);
}
void
px_ib_log_new_cpu(px_ib_t *ib_p, cpuid_t old_cpu_id, cpuid_t new_cpu_id,
uint32_t ino)
{
px_ino_t *ino_p;
px_ino_pil_t *ipil_p;
px_ih_t *ih_p;
int i;
mutex_enter(&ib_p->ib_ino_lst_mutex);
/* Log in OS data structures the new CPU. */
if (ino_p = px_ib_locate_ino(ib_p, ino)) {
/* Log in OS data structures the new CPU. */
ino_p->ino_cpuid = new_cpu_id;
for (ipil_p = ino_p->ino_ipil_p; ipil_p;
ipil_p = ipil_p->ipil_next_p) {
for (i = 0, ih_p = ipil_p->ipil_ih_head;
(i < ipil_p->ipil_ih_size);
i++, ih_p = ih_p->ih_next) {
/*
* Account for any residual time
* to be logged for old cpu.
*/
px_ib_cpu_ticks_to_ih_nsec(ib_p,
ih_p, old_cpu_id);
}
}
}
mutex_exit(&ib_p->ib_ino_lst_mutex);
}