px_intr.c revision a195726fa33097e56cf1c25c31feddb827e140f0
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* PX nexus interrupt handling:
* PX device interrupt handler wrapper
* PIL lookup routine
* PX device interrupt related initchild code
*/
#include <sys/ddi_impldefs.h>
#include "px_obj.h"
/*
* interrupt jabber:
*
* When an interrupt line is jabbering, every time the state machine for the
* associated ino is idled, a new mondo will be sent and the ino will go into
* the pending state again. The mondo will cause a new call to
* px_intr_wrapper() which normally idles the ino's state machine which would
* precipitate another trip round the loop.
*
* The loop can be broken by preventing the ino's state machine from being
* idled when an interrupt line is jabbering. See the comment at the
* beginning of px_intr_wrapper() explaining how the 'interrupt jabber
* protection' code does this.
*/
/*LINTLIBRARY*/
/*
* If the unclaimed interrupt count has reached the limit set by
* pci_unclaimed_intr_max within the time limit, then all interrupts
* on this ino is blocked by not idling the interrupt state machine.
*/
static int
{
char *err_fmt_str;
int i;
return (DDI_INTR_CLAIMED);
if (!ino_p->ino_unclaimed)
ino_p->ino_unclaimed++;
goto clear;
> px_spurintr_duration) {
ino_p->ino_unclaimed = 0;
goto clear;
}
err_fmt_str = "%s%d: ino 0x%x blocked";
goto warn;
/* Clear the pending state */
return (DDI_INTR_UNCLAIMED);
err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
warn:
return (DDI_INTR_CLAIMED);
}
extern uint64_t intr_get_time(void);
/*
* px_intx_intr (INTx or legacy interrupt handler)
*
* This routine is used as wrapper around interrupt handlers installed by child
* device drivers. This routine invokes the driver interrupt handlers and
* examines the return codes.
*
* There is a count of unclaimed interrupts kept on a per-ino basis. If at
* least one handler claims the interrupt then the counter is halved and the
* interrupt state machine is idled. If no handler claims the interrupt then
* the counter is incremented by one and the state machine is idled.
* If the count ever reaches the limit value set by pci_unclaimed_intr_max
* then the interrupt state machine is not idled thus preventing any further
* interrupts on that ino. The state machine will only be idled again if a
* handler is subsequently added or removed.
*
* return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
* DDI_INTR_UNCLAIMED otherwise.
*/
{
int i;
"ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
"px_intx_intr: %s%d interrupt %d is disabled\n",
continue;
}
"ino=%x handler=%p arg1 =%p arg2 = %p\n",
/*
* Account for time used by this interrupt. Protect against
* conflicting writes to ih_ticks from ib_intr_dist_all() by
* using atomic ops.
*/
result += r;
continue;
if (result)
break;
}
if (!result && px_unclaimed_intr_block)
return (px_spurintr(ino_p));
ino_p->ino_unclaimed = 0;
/* Clear the pending state */
return (DDI_INTR_UNCLAIMED);
return (DDI_INTR_CLAIMED);
}
/*
* px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
*
* This routine is used as wrapper around interrupt handlers installed by child
* device drivers. This routine invokes the driver interrupt handlers and
* examines the return codes.
*
* There is a count of unclaimed interrupts kept on a per-ino basis. If at
* least one handler claims the interrupt then the counter is halved and the
* interrupt state machine is idled. If no handler claims the interrupt then
* the counter is incremented by one and the state machine is idled.
* If the count ever reaches the limit value set by pci_unclaimed_intr_max
* then the interrupt state machine is not idled thus preventing any further
* interrupts on that ino. The state machine will only be idled again if a
* handler is subsequently added or removed.
*
* return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
* DDI_INTR_UNCLAIMED otherwise.
*/
{
int ret;
/* Read current MSIQ head index */
curr_msiq_rec_cnt * sizeof (msiq_rec_t));
/* Read next MSIQ record */
/*
* Process current MSIQ record as long as record type
* field is non-zero.
*/
while (msiq_rec_p->msiq_rec_type) {
"msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
/* Get the pointer next EQ record */
/* Check for overflow condition */
/* Check MSIQ record type */
switch (msiq_rec_p->msiq_rec_type) {
case MSG_REC:
"record, msg type 0x%x\n", msg_code);
break;
case MSI32_REC:
case MSI64_REC:
"msi 0x%x\n", msg_code);
/* Clear MSI state */
break;
default:
msg_code = 0;
"record type is not supported",
goto next_rec;
}
/*
* Scan through px_ih_t linked list, searching for the
* right px_ih_t, matching MSIQ record data.
*/
/*
* Special case for PCIE Error Messages.
* The current frame work doesn't fit PCIE Err Msgs
* This should be fixed when PCIE MESSAGES as a whole
* is architected correctly.
*/
if ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
(msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
(msg_code == PCIE_MSG_CODE_ERR_FATAL)) {
} else
/*
* Account for time used by this interrupt. Protect
* against conflicting writes to ih_ticks from
* ib_intr_dist_all() by using atomic ops.
*/
} else {
"Not found matching MSIQ record\n");
/* px_spurintr(ino_p); */
ino_p->ino_unclaimed++;
}
/* Zero out msiq_rec_type field */
msiq_rec_p->msiq_rec_type = 0;
/* Read next MSIQ record */
}
/* Update MSIQ head index with no of MSIQ records processed */
if (new_msiq_rec_cnt > curr_msiq_rec_cnt) {
}
/* Clear the pending state */
return (DDI_INTR_UNCLAIMED);
return (DDI_INTR_CLAIMED);
}
{
;
return (cdip);
}
/* Default class to pil value mapping */
px_class_val_t px_default_pil [] = {
{0x000000, 0xff0000, 0x1}, /* Class code for pre-2.0 devices */
{0x010000, 0xff0000, 0x4}, /* Mass Storage Controller */
{0x020000, 0xff0000, 0x6}, /* Network Controller */
{0x030000, 0xff0000, 0x9}, /* Display Controller */
{0x040000, 0xff0000, 0x9}, /* Multimedia Controller */
{0x050000, 0xff0000, 0xb}, /* Memory Controller */
{0x060000, 0xff0000, 0xb}, /* Bridge Controller */
{0x0c0000, 0xffff00, 0x9}, /* Serial Bus, FireWire (IEEE 1394) */
{0x0c0100, 0xffff00, 0x4}, /* Serial Bus, ACCESS.bus */
{0x0c0200, 0xffff00, 0x4}, /* Serial Bus, SSA */
{0x0c0300, 0xffff00, 0x9}, /* Serial Bus Universal Serial Bus */
{0x0c0400, 0xffff00, 0x6}, /* Serial Bus, Fibre Channel */
{0x0c0600, 0xffff00, 0x6} /* Serial Bus, Infiniband */
};
/*
* Default class to intr_weight value mapping (% of CPU). A driver.conf
* entry on or above the pci node like
*
* pci-class-intr-weights= 0x020000, 0xff0000, 30;
*
* can be used to augment or override entries in the default table below.
*
* NB: The values below give NICs preference on redistribution, and provide
* NICs some isolation from other interrupt sources. We need better interfaces
* that allow the NIC driver to identify a specific NIC instance as high
* bandwidth, and thus deserving of separation from other low bandwidth
* NICs additional isolation from other interrupt sources.
*
* NB: We treat Infiniband like a NIC.
*/
{0x020000, 0xff0000, 35}, /* Network Controller */
{0x010000, 0xff0000, 10}, /* Mass Storage Controller */
{0x0c0400, 0xffff00, 10}, /* Serial Bus, Fibre Channel */
{0x0c0600, 0xffff00, 50} /* Serial Bus, Infiniband */
};
static uint32_t
{
int i;
}
return (default_val);
}
/*
* px_class_to_val
*
* Return the configuration value, based on class code and sub class code,
* from the specified property based or default px_class_val_t table.
*/
{
int property_len;
/*
* Use the "class-code" property to get the base and sub class
* codes for the requesting device.
*/
if (class_code == -1)
return (val);
/* look up the val from the default table */
/* see if there is a more specific property specified value */
return (val);
if ((property_len % sizeof (px_class_val_t)) == 0)
return (val);
}
/* px_class_to_pil: return the pil for a given device. */
{
/* default pil is 0 (uninitialized) */
"pci-class-priorities", px_default_pil,
sizeof (px_default_pil) / sizeof (px_class_val_t), 0);
/* range check the result */
if (pil >= 0xf)
pil = 0;
return (pil);
}
/* px_class_to_intr_weight: return the intr_weight for a given device. */
static int32_t
{
/* default weight is 0% */
"pci-class-intr-weights", px_default_intr_weight,
sizeof (px_default_intr_weight) / sizeof (px_class_val_t), 0);
/* range check the result */
if (intr_weight < 0)
intr_weight = 0;
if (intr_weight > 1000)
intr_weight = 1000;
return (intr_weight);
}
/* ARGSUSED */
int
{
int ret = DDI_SUCCESS;
switch (intr_op) {
case DDI_INTROP_GETCAP:
break;
case DDI_INTROP_SETCAP:
ret = DDI_ENOTSUP;
break;
case DDI_INTROP_ALLOC:
break;
case DDI_INTROP_FREE:
break;
case DDI_INTROP_GETPRI:
break;
case DDI_INTROP_SETPRI:
break;
case DDI_INTROP_ADDISR:
break;
case DDI_INTROP_REMISR:
break;
case DDI_INTROP_ENABLE:
break;
case DDI_INTROP_DISABLE:
break;
case DDI_INTROP_SETMASK:
break;
case DDI_INTROP_CLRMASK:
break;
case DDI_INTROP_GETPENDING:
break;
case DDI_INTROP_NINTRS:
case DDI_INTROP_NAVAIL:
break;
*(int *)result = DDI_INTR_TYPE_FIXED;
break;
default:
ret = DDI_ENOTSUP;
break;
}
return (ret);
}
/* ARGSUSED */
int
{
int i, ret = DDI_SUCCESS;
switch (intr_op) {
case DDI_INTROP_GETCAP:
break;
case DDI_INTROP_SETCAP:
ret = DDI_ENOTSUP;
break;
case DDI_INTROP_ALLOC:
/*
* We need to restrict this allocation in future
* based on Resource Management policies.
*/
(int *)result)) != DDI_SUCCESS) {
"failed, rdip 0x%p inum 0x%x count 0x%x\n",
return (ret);
}
break;
case DDI_INTROP_FREE:
hdlp->ih_scratch1);
break;
case DDI_INTROP_GETPRI:
break;
case DDI_INTROP_SETPRI:
break;
case DDI_INTROP_ADDISR:
return (ret);
return (ret);
}
return (ret);
}
PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
return (ret);
}
break;
case DDI_INTROP_DUPVEC:
ret = DDI_ENOTSUP;
break;
case DDI_INTROP_REMISR:
&msiq_id)) != DDI_SUCCESS)
return (ret);
return (ret);
break;
case DDI_INTROP_ENABLE:
PCI_MSI_VALID)) != DDI_SUCCESS)
return (ret);
return (ret);
return (ret);
}
break;
case DDI_INTROP_DISABLE:
return (ret);
break;
case DDI_INTROP_BLOCKENABLE:
return (ret);
PCI_MSI_VALID)) != DDI_SUCCESS)
return (ret);
}
break;
case DDI_INTROP_BLOCKDISABLE:
return (ret);
PCI_MSI_INVALID)) != DDI_SUCCESS)
return (ret);
}
break;
case DDI_INTROP_SETMASK:
break;
case DDI_INTROP_CLRMASK:
break;
case DDI_INTROP_GETPENDING:
break;
case DDI_INTROP_NINTRS:
break;
case DDI_INTROP_NAVAIL:
/* XXX - a new interface may be needed */
break;
break;
default:
ret = DDI_ENOTSUP;
break;
}
return (ret);
}
static struct {
} pxintr_ks_template = {
{ "name", KSTAT_DATA_CHAR },
{ "type", KSTAT_DATA_CHAR },
{ "cpu", KSTAT_DATA_UINT64 },
{ "pil", KSTAT_DATA_UINT64 },
{ "time", KSTAT_DATA_UINT64 },
{ "ino", KSTAT_DATA_UINT64 },
{ "cookie", KSTAT_DATA_UINT64 },
{ "devpath", KSTAT_DATA_STRING },
{ "buspath", KSTAT_DATA_STRING },
};
static uint32_t pxintr_ks_instance;
int
{
char ih_devpath[MAXPATHLEN];
char ih_buspath[MAXPATHLEN];
return (0);
}
void
{
/*
* Create pci_intrs::: kstats for all ih types except messages,
* which represent unusual conditions and don't need to be tracked.
*/
"interrupts", KSTAT_TYPE_NAMED,
sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
}
}
}
/*
* px_add_intx_intr:
*
* This function is called to register INTx and legacy hardware
* interrupt pins interrupts.
*/
int
{
int ret = DDI_SUCCESS;
"dup intr #%d\n", intr_index);
ret = DDI_FAILURE;
goto fail1;
}
/* Save mondo value in hdlp */
!= DDI_SUCCESS)
goto fail1;
} else {
/* Save mondo value in hdlp */
/*
* Restore original interrupt handler
* and arguments in interrupt handle.
*/
if (ret != DDI_SUCCESS)
goto fail2;
/* Save the pil for this ino */
/* select cpu, saving it for sharing and removal */
/* Enable interrupt */
}
/* add weight to the cpu that we are already targeting */
return (ret);
if (ih_p->ih_config_handle)
return (ret);
}
/*
* px_rem_intx_intr:
*
* This function is called to unregister INTx and legacy hardware
* interrupt pins interrupts.
*/
int
{
int ret = DDI_SUCCESS;
/* Get the current cpu */
&curr_cpu)) != DDI_SUCCESS)
goto fail;
goto fail;
if (ino_p->ino_ih_size == 0) {
goto fail;
} else {
/* Re-enable interrupt only if mapping regsiter still shared */
}
fail:
return (ret);
}
/*
* px_add_msiq_intr:
*
*/
int
{
int ret = DDI_SUCCESS;
"msiq allocation failed\n");
return (ret);
}
"dup intr #%d\n", intr_index);
ret = DDI_FAILURE;
goto fail1;
}
!= DDI_SUCCESS)
goto fail1;
} else {
/* Save mondo value in hdlp */
/*
* Restore original interrupt handler
* and arguments in interrupt handle.
*/
if (ret != DDI_SUCCESS)
goto fail2;
/* Save the pil for this ino */
/* Enable MSIQ */
/* select cpu, saving it for sharing and removal */
/* Enable interrupt */
}
/* add weight to the cpu that we are already targeting */
return (ret);
if (ih_p->ih_config_handle)
return (ret);
}
/*
* px_rem_msiq_intr:
*
*/
int
{
int ret = DDI_SUCCESS;
/* Get the current cpu */
&curr_cpu)) != DDI_SUCCESS)
goto fail;
goto fail;
if (ino_p->ino_ih_size == 0) {
goto fail;
} else {
/* Re-enable interrupt only if mapping regsiter still shared */
}
fail:
return (ret);
}