cnex.c revision 0de66c941512d3ab0d15397815d8c9c552bcbf8d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Logical domain channel devices are devices implemented entirely
* in software; cnex is the nexus for channel-devices. They use
* the HV channel interfaces via the LDC transport module to send
* and receive data and to register callbacks.
*/
#include <sys/types.h>
#include <sys/cmn_err.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/ddi_impldefs.h>
#include <sys/devops.h>
#include <sys/instance.h>
#include <sys/modctl.h>
#include <sys/open.h>
#include <sys/stat.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/systm.h>
#include <sys/mkdev.h>
#include <sys/machsystm.h>
#include <sys/intreg.h>
#include <sys/intr.h>
#include <sys/ddi_intr_impl.h>
#include <sys/ivintr.h>
#include <sys/hypervisor_api.h>
#include <sys/ldc.h>
#include <sys/cnex.h>
#include <sys/mach_descrip.h>
#include <sys/hsvc.h>
#include <sys/sdt.h>
/*
* Internal functions/information
*/
static struct cnex_intr_map cnex_class_to_intr[] = {
{LDC_DEV_GENERIC, PIL_3, 0},
{LDC_DEV_BLK, PIL_4, 10},
{LDC_DEV_BLK_SVC, PIL_3, 10},
{LDC_DEV_NT, PIL_6, 35},
{LDC_DEV_NT_SVC, PIL_4, 35},
{LDC_DEV_SERIAL, PIL_6, 0}
};
#define CNEX_MAX_DEVS (sizeof (cnex_class_to_intr) / \
sizeof (cnex_class_to_intr[0]))
#define CNEX_TX_INTR_WEIGHT 0
#define SUN4V_REG_SPEC2CFG_HDL(x) ((x >> 32) & ~(0xfull << 28))
static clock_t cnex_wait_usecs = 1000; /* wait time in usecs */
static int cnex_wait_retries = 3;
static void *cnex_state;
static uint_t cnex_intr_wrapper(caddr_t arg);
static dev_info_t *cnex_find_chan_dip(dev_info_t *dip, uint64_t chan_id,
md_t *mdp, mde_cookie_t mde);
/*
* Channel Interrupt Distribution
*
* In order to balance interrupts among available CPUs, we use
* the intr_dist_cpuid_{add,remove}_device_weight() interface to
* assign weights to channel interrupts. These weights, which are
* defined in the cnex_intr_map structure, influence which CPU
* is returned by intr_dist_cpuid() when called via the cnex
* interrupt redistribution callback cnex_intr_redist().
* Interrupts for VIO devclass channels are given more weight than
* other interrupts because they are expected to occur more
* frequently and have a larger impact on overall performance.
* Transmit interrupts are given a zero weight because they are
* not used.
*
* The interrupt weights influence the target CPU selection when
* interrupts are redistributed and when they are added. However,
* removal of interrupts can unbalance the distribution even if
* they are removed in converse order--compared to the order they
* are added. This can occur when interrupts are removed after
* redistribution occurs.
*
* Channel interrupt weights affect interrupt-CPU distribution
* relative to other weighted interrupts on the system. For VIO
* devclass channels, values are chosen to match those used by
* the PCI express nexus driver for net and storage devices.
*/
static void cnex_intr_redist(void *arg, int32_t weight_max, int32_t weight);
static int cnex_intr_new_cpu(cnex_soft_state_t *ssp, cnex_intr_t *iinfo);
static int cnex_intr_dis_wait(cnex_soft_state_t *ssp, cnex_intr_t *iinfo);
static int32_t cnex_class_weight(ldc_dev_t devclass);
/*
* Debug info
*/
#ifdef DEBUG
/*
* Print debug messages
*
* set cnexdbg to 0xf for enabling all msgs
* 0x8 - Errors
* 0x4 - Warnings
* 0x2 - All debug messages
* 0x1 - Minimal debug messages
*/
int cnexdbg = 0x8;
static void
cnexdebug(const char *fmt, ...)
{
char buf[512];
va_list ap;
va_start(ap, fmt);
(void) vsprintf(buf, fmt, ap);
va_end(ap);
cmn_err(CE_CONT, "%s\n", buf);
}
#define D1 \
if (cnexdbg & 0x01) \
cnexdebug
#define D2 \
if (cnexdbg & 0x02) \
cnexdebug
#define DWARN \
if (cnexdbg & 0x04) \
cnexdebug
#define DERR \
if (cnexdbg & 0x08) \
cnexdebug
#else
#define D1
#define D2
#define DWARN
#define DERR
#endif
/*
* Config information
*/
static int cnex_attach(dev_info_t *, ddi_attach_cmd_t);
static int cnex_detach(dev_info_t *, ddi_detach_cmd_t);
static int cnex_open(dev_t *, int, int, cred_t *);
static int cnex_close(dev_t, int, int, cred_t *);
static int cnex_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
static int cnex_ctl(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
void *);
static struct bus_ops cnex_bus_ops = {
BUSO_REV,
nullbusmap, /* bus_map */
NULL, /* bus_get_intrspec */
NULL, /* bus_add_intrspec */
NULL, /* bus_remove_intrspec */
i_ddi_map_fault, /* bus_map_fault */
ddi_no_dma_map, /* bus_dma_map */
ddi_no_dma_allochdl, /* bus_dma_allochdl */
NULL, /* bus_dma_freehdl */
NULL, /* bus_dma_bindhdl */
NULL, /* bus_dma_unbindhdl */
NULL, /* bus_dma_flush */
NULL, /* bus_dma_win */
NULL, /* bus_dma_ctl */
cnex_ctl, /* bus_ctl */
ddi_bus_prop_op, /* bus_prop_op */
0, /* bus_get_eventcookie */
0, /* bus_add_eventcall */
0, /* bus_remove_eventcall */
0, /* bus_post_event */
NULL, /* bus_intr_ctl */
NULL, /* bus_config */
NULL, /* bus_unconfig */
NULL, /* bus_fm_init */
NULL, /* bus_fm_fini */
NULL, /* bus_fm_access_enter */
NULL, /* bus_fm_access_exit */
NULL, /* bus_power */
NULL /* bus_intr_op */
};
static struct cb_ops cnex_cb_ops = {
cnex_open, /* open */
cnex_close, /* close */
nodev, /* strategy */
nodev, /* print */
nodev, /* dump */
nodev, /* read */
nodev, /* write */
cnex_ioctl, /* ioctl */
nodev, /* devmap */
nodev, /* mmap */
nodev, /* segmap */
nochpoll, /* poll */
ddi_prop_op, /* cb_prop_op */
0, /* streamtab */
D_MP | D_NEW | D_HOTPLUG /* Driver compatibility flag */
};
static struct dev_ops cnex_ops = {
DEVO_REV, /* devo_rev, */
0, /* refcnt */
ddi_getinfo_1to1, /* info */
nulldev, /* identify */
nulldev, /* probe */
cnex_attach, /* attach */
cnex_detach, /* detach */
nodev, /* reset */
&cnex_cb_ops, /* driver operations */
&cnex_bus_ops, /* bus operations */
nulldev, /* power */
ddi_quiesce_not_needed, /* quiesce */
};
/*
* Module linkage information for the kernel.
*/
static struct modldrv modldrv = {
&mod_driverops,
"sun4v channel-devices nexus",
&cnex_ops,
};
static struct modlinkage modlinkage = {
MODREV_1, (void *)&modldrv, NULL
};
int
_init(void)
{
int err;
uint64_t majornum;
uint64_t minornum;
/*
* Check HV intr group api versioning.
* Note that cnex assumes interrupt cookies is
* in version 1.0 of the intr group api.
*/
if ((err = hsvc_version(HSVC_GROUP_INTR, &majornum, &minornum)) != 0) {
cmn_err(CE_WARN, "cnex: failed to get intr api "
"group versioning errno=%d", err);
return (err);
} else if ((majornum != 1) && (majornum != 2)) {
cmn_err(CE_WARN, "cnex: unsupported intr api group: "
"maj:0x%lx, min:0x%lx", majornum, minornum);
return (ENOTSUP);
}
if ((err = ddi_soft_state_init(&cnex_state,
sizeof (cnex_soft_state_t), 0)) != 0) {
return (err);
}
if ((err = mod_install(&modlinkage)) != 0) {
ddi_soft_state_fini(&cnex_state);
return (err);
}
return (0);
}
int
_fini(void)
{
int err;
if ((err = mod_remove(&modlinkage)) != 0)
return (err);
ddi_soft_state_fini(&cnex_state);
return (0);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
/*
* Callback function invoked by the interrupt redistribution
* framework. This will redirect interrupts at CPUs that are
* currently available in the system.
*
* Note: any interrupts with weight greater than or equal to
* weight_max must be redistributed when this callback is
* invoked with (weight == weight_max) which will be once per
* redistribution.
*/
/*ARGSUSED*/
static void
cnex_intr_redist(void *arg, int32_t weight_max, int32_t weight)
{
cnex_ldc_t *cldcp;
cnex_soft_state_t *cnex_ssp = arg;
ASSERT(cnex_ssp != NULL);
mutex_enter(&cnex_ssp->clist_lock);
cldcp = cnex_ssp->clist;
while (cldcp != NULL) {
mutex_enter(&cldcp->lock);
if (cldcp->tx.hdlr && (cldcp->tx.weight == weight ||
(weight_max == weight && cldcp->tx.weight > weight))) {
(void) cnex_intr_new_cpu(cnex_ssp, &cldcp->tx);
}
if (cldcp->rx.hdlr && (cldcp->rx.weight == weight ||
(weight_max == weight && cldcp->rx.weight > weight))) {
(void) cnex_intr_new_cpu(cnex_ssp, &cldcp->rx);
}
mutex_exit(&cldcp->lock);
/* next channel */
cldcp = cldcp->next;
}
mutex_exit(&cnex_ssp->clist_lock);
}
/*
* Internal function to replace the CPU used by an interrupt
* during interrupt redistribution.
*/
static int
cnex_intr_new_cpu(cnex_soft_state_t *ssp, cnex_intr_t *iinfo)
{
int intr_state;
int rv;
/* Determine if the interrupt is enabled */
rv = hvldc_intr_getvalid(ssp->cfghdl, iinfo->ino, &intr_state);
if (rv) {
DWARN("cnex_intr_new_cpu: rx ino=0x%llx, can't get valid\n",
iinfo->ino);
return (rv);
}
/* If it is enabled, disable it */
if (intr_state == HV_INTR_VALID) {
rv = cnex_intr_dis_wait(ssp, iinfo);
if (rv) {
return (rv);
}
}
/* Target the interrupt at a new CPU. */
iinfo->cpuid = intr_dist_cpuid();
(void) hvldc_intr_settarget(ssp->cfghdl, iinfo->ino, iinfo->cpuid);
intr_dist_cpuid_add_device_weight(iinfo->cpuid, iinfo->dip,
iinfo->weight);
/* Re-enable the interrupt if it was enabled */
if (intr_state == HV_INTR_VALID) {
(void) hvldc_intr_setvalid(ssp->cfghdl, iinfo->ino,
HV_INTR_VALID);
}
return (0);
}
/*
* Internal function to disable an interrupt and wait
* for any pending interrupts to finish.
*/
static int
cnex_intr_dis_wait(cnex_soft_state_t *ssp, cnex_intr_t *iinfo)
{
int rv, intr_state, retries;
/* disable interrupts */
rv = hvldc_intr_setvalid(ssp->cfghdl, iinfo->ino, HV_INTR_NOTVALID);
if (rv) {
DWARN("cnex_intr_dis_wait: ino=0x%llx, can't set valid\n",
iinfo->ino);
return (ENXIO);
}
/*
* Make a best effort to wait for pending interrupts
* to finish. There is not much we can do if we timeout.
*/
retries = 0;
do {
rv = hvldc_intr_getstate(ssp->cfghdl, iinfo->ino, &intr_state);
if (rv) {
DWARN("cnex_intr_dis_wait: ino=0x%llx, can't get "
"state\n", iinfo->ino);
return (ENXIO);
}
if (intr_state != HV_INTR_DELIVERED_STATE)
break;
drv_usecwait(cnex_wait_usecs);
} while (!panicstr && ++retries <= cnex_wait_retries);
return (0);
}
/*
* Returns the interrupt weight to use for the specified devclass.
*/
static int32_t
cnex_class_weight(ldc_dev_t devclass)
{
int idx;
for (idx = 0; idx < CNEX_MAX_DEVS; idx++) {
if (devclass == cnex_class_to_intr[idx].devclass) {
return (cnex_class_to_intr[idx].weight);
}
}
/*
* If this code is reached, the specified devclass is
* invalid. New devclasses should be added to
* cnex_class_to_intr.
*/
ASSERT(0);
return (0);
}
/*
* Exported interface to register a LDC endpoint with
* the channel nexus
*/
static int
cnex_reg_chan(dev_info_t *dip, uint64_t id, ldc_dev_t devclass)
{
int idx;
cnex_ldc_t *cldcp;
cnex_ldc_t *new_cldcp;
int listsz, num_nodes, num_channels;
md_t *mdp = NULL;
mde_cookie_t rootnode, *listp = NULL;
uint64_t tmp_id;
uint64_t rxino = (uint64_t)-1;
uint64_t txino = (uint64_t)-1;
cnex_soft_state_t *cnex_ssp;
int status, instance;
dev_info_t *chan_dip = NULL;
/* Get device instance and structure */
instance = ddi_get_instance(dip);
cnex_ssp = ddi_get_soft_state(cnex_state, instance);
/* Check to see if channel is already registered */
mutex_enter(&cnex_ssp->clist_lock);
cldcp = cnex_ssp->clist;
while (cldcp) {
if (cldcp->id == id) {
DWARN("cnex_reg_chan: channel 0x%llx exists\n", id);
mutex_exit(&cnex_ssp->clist_lock);
return (EINVAL);
}
cldcp = cldcp->next;
}
mutex_exit(&cnex_ssp->clist_lock);
/* Get the Tx/Rx inos from the MD */
if ((mdp = md_get_handle()) == NULL) {
DWARN("cnex_reg_chan: cannot init MD\n");
return (ENXIO);
}
num_nodes = md_node_count(mdp);
ASSERT(num_nodes > 0);
listsz = num_nodes * sizeof (mde_cookie_t);
listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
rootnode = md_root_node(mdp);
/* search for all channel_endpoint nodes */
num_channels = md_scan_dag(mdp, rootnode,
md_find_name(mdp, "channel-endpoint"),
md_find_name(mdp, "fwd"), listp);
if (num_channels <= 0) {
DWARN("cnex_reg_chan: invalid channel id\n");
kmem_free(listp, listsz);
(void) md_fini_handle(mdp);
return (EINVAL);
}
for (idx = 0; idx < num_channels; idx++) {
/* Get the channel ID */
status = md_get_prop_val(mdp, listp[idx], "id", &tmp_id);
if (status) {
DWARN("cnex_reg_chan: cannot read LDC ID\n");
kmem_free(listp, listsz);
(void) md_fini_handle(mdp);
return (ENXIO);
}
if (tmp_id != id)
continue;
/* Get the Tx and Rx ino */
status = md_get_prop_val(mdp, listp[idx], "tx-ino", &txino);
if (status) {
DWARN("cnex_reg_chan: cannot read Tx ino\n");
kmem_free(listp, listsz);
(void) md_fini_handle(mdp);
return (ENXIO);
}
status = md_get_prop_val(mdp, listp[idx], "rx-ino", &rxino);
if (status) {
DWARN("cnex_reg_chan: cannot read Rx ino\n");
kmem_free(listp, listsz);
(void) md_fini_handle(mdp);
return (ENXIO);
}
chan_dip = cnex_find_chan_dip(dip, id, mdp, listp[idx]);
ASSERT(chan_dip != NULL);
}
kmem_free(listp, listsz);
(void) md_fini_handle(mdp);
/*
* check to see if we looped through the list of channel IDs without
* matching one (i.e. an 'ino' has not been initialised).
*/
if ((rxino == -1) || (txino == -1)) {
DERR("cnex_reg_chan: no ID matching '%llx' in MD\n", id);
return (ENOENT);
}
/* Allocate a new channel structure */
new_cldcp = kmem_zalloc(sizeof (*new_cldcp), KM_SLEEP);
/* Initialize the channel */
mutex_init(&new_cldcp->lock, NULL, MUTEX_DRIVER, NULL);
new_cldcp->id = id;
new_cldcp->tx.ino = txino;
new_cldcp->rx.ino = rxino;
new_cldcp->devclass = devclass;
new_cldcp->tx.weight = CNEX_TX_INTR_WEIGHT;
new_cldcp->rx.weight = cnex_class_weight(devclass);
new_cldcp->dip = chan_dip;
/*
* Add channel to nexus channel list.
* Check again to see if channel is already registered since
* clist_lock was dropped above.
*/
mutex_enter(&cnex_ssp->clist_lock);
cldcp = cnex_ssp->clist;
while (cldcp) {
if (cldcp->id == id) {
DWARN("cnex_reg_chan: channel 0x%llx exists\n", id);
mutex_exit(&cnex_ssp->clist_lock);
mutex_destroy(&new_cldcp->lock);
kmem_free(new_cldcp, sizeof (*new_cldcp));
return (EINVAL);
}
cldcp = cldcp->next;
}
new_cldcp->next = cnex_ssp->clist;
cnex_ssp->clist = new_cldcp;
mutex_exit(&cnex_ssp->clist_lock);
return (0);
}
/*
* Add Tx/Rx interrupt handler for the channel
*/
static int
cnex_add_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype,
uint_t (*hdlr)(), caddr_t arg1, caddr_t arg2)
{
int rv, idx, pil;
cnex_ldc_t *cldcp;
cnex_intr_t *iinfo;
cnex_soft_state_t *cnex_ssp;
int instance;
/* Get device instance and structure */
instance = ddi_get_instance(dip);
cnex_ssp = ddi_get_soft_state(cnex_state, instance);
/* get channel info */
mutex_enter(&cnex_ssp->clist_lock);
cldcp = cnex_ssp->clist;
while (cldcp) {
if (cldcp->id == id)
break;
cldcp = cldcp->next;
}
if (cldcp == NULL) {
DWARN("cnex_add_intr: channel 0x%llx does not exist\n", id);
mutex_exit(&cnex_ssp->clist_lock);
return (EINVAL);
}
mutex_exit(&cnex_ssp->clist_lock);
/* get channel lock */
mutex_enter(&cldcp->lock);
/* get interrupt type */
if (itype == CNEX_TX_INTR) {
iinfo = &(cldcp->tx);
} else if (itype == CNEX_RX_INTR) {
iinfo = &(cldcp->rx);
} else {
DWARN("cnex_add_intr: invalid interrupt type\n", id);
mutex_exit(&cldcp->lock);
return (EINVAL);
}
/* check if a handler is already added */
if (iinfo->hdlr != 0) {
DWARN("cnex_add_intr: interrupt handler exists\n");
mutex_exit(&cldcp->lock);
return (EINVAL);
}
/* save interrupt handler info */
iinfo->hdlr = hdlr;
iinfo->arg1 = arg1;
iinfo->arg2 = arg2;
/* save data for DTrace probes used by intrstat(1m) */
iinfo->dip = cldcp->dip;
iinfo->id = cldcp->id;
iinfo->icookie = MINVINTR_COOKIE + iinfo->ino;
/*
* Verify that the ino does not generate a cookie which
* is outside the (MINVINTR_COOKIE, MAXIVNUM) range of the
* system interrupt table.
*/
if (iinfo->icookie >= MAXIVNUM || iinfo->icookie < MINVINTR_COOKIE) {
DWARN("cnex_add_intr: invalid cookie %x ino %x\n",
iinfo->icookie, iinfo->ino);
mutex_exit(&cldcp->lock);
return (EINVAL);
}
D1("cnex_add_intr: add hdlr, cfghdl=0x%llx, ino=0x%llx, "
"cookie=0x%llx\n", cnex_ssp->cfghdl, iinfo->ino, iinfo->icookie);
/* Pick a PIL on the basis of the channel's devclass */
for (idx = 0, pil = PIL_3; idx < CNEX_MAX_DEVS; idx++) {
if (cldcp->devclass == cnex_class_to_intr[idx].devclass) {
pil = cnex_class_to_intr[idx].pil;
break;
}
}
/* add interrupt to solaris ivec table */
if (add_ivintr(iinfo->icookie, pil, (intrfunc)cnex_intr_wrapper,
(caddr_t)iinfo, NULL, NULL) != 0) {
DWARN("cnex_add_intr: add_ivintr fail cookie %x ino %x\n",
iinfo->icookie, iinfo->ino);
mutex_exit(&cldcp->lock);
return (EINVAL);
}
/* set the cookie in the HV */
rv = hvldc_intr_setcookie(cnex_ssp->cfghdl, iinfo->ino, iinfo->icookie);
/* pick next CPU in the domain for this channel */
iinfo->cpuid = intr_dist_cpuid();
/* set the target CPU and then enable interrupts */
rv = hvldc_intr_settarget(cnex_ssp->cfghdl, iinfo->ino, iinfo->cpuid);
if (rv) {
DWARN("cnex_add_intr: ino=0x%llx, cannot set target cpu\n",
iinfo->ino);
goto hv_error;
}
rv = hvldc_intr_setstate(cnex_ssp->cfghdl, iinfo->ino,
HV_INTR_IDLE_STATE);
if (rv) {
DWARN("cnex_add_intr: ino=0x%llx, cannot set state\n",
iinfo->ino);
goto hv_error;
}
rv = hvldc_intr_setvalid(cnex_ssp->cfghdl, iinfo->ino, HV_INTR_VALID);
if (rv) {
DWARN("cnex_add_intr: ino=0x%llx, cannot set valid\n",
iinfo->ino);
goto hv_error;
}
intr_dist_cpuid_add_device_weight(iinfo->cpuid, iinfo->dip,
iinfo->weight);
mutex_exit(&cldcp->lock);
return (0);
hv_error:
(void) rem_ivintr(iinfo->icookie, pil);
mutex_exit(&cldcp->lock);
return (ENXIO);
}
/*
* Exported interface to unregister a LDC endpoint with
* the channel nexus
*/
static int
cnex_unreg_chan(dev_info_t *dip, uint64_t id)
{
cnex_ldc_t *cldcp, *prev_cldcp;
cnex_soft_state_t *cnex_ssp;
int instance;
/* Get device instance and structure */
instance = ddi_get_instance(dip);
cnex_ssp = ddi_get_soft_state(cnex_state, instance);
/* find and remove channel from list */
mutex_enter(&cnex_ssp->clist_lock);
prev_cldcp = NULL;
cldcp = cnex_ssp->clist;
while (cldcp) {
if (cldcp->id == id)
break;
prev_cldcp = cldcp;
cldcp = cldcp->next;
}
if (cldcp == 0) {
DWARN("cnex_unreg_chan: invalid channel %d\n", id);
mutex_exit(&cnex_ssp->clist_lock);
return (EINVAL);
}
if (cldcp->tx.hdlr || cldcp->rx.hdlr) {
DWARN("cnex_unreg_chan: handlers still exist: chan %lx\n", id);
mutex_exit(&cnex_ssp->clist_lock);
return (ENXIO);
}
if (prev_cldcp)
prev_cldcp->next = cldcp->next;
else
cnex_ssp->clist = cldcp->next;
mutex_exit(&cnex_ssp->clist_lock);
/* destroy mutex */
mutex_destroy(&cldcp->lock);
/* free channel */
kmem_free(cldcp, sizeof (*cldcp));
return (0);
}
/*
* Remove Tx/Rx interrupt handler for the channel
*/
static int
cnex_rem_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype)
{
int rv, idx, pil;
cnex_ldc_t *cldcp;
cnex_intr_t *iinfo;
cnex_soft_state_t *cnex_ssp;
int instance, istate;
/* Get device instance and structure */
instance = ddi_get_instance(dip);
cnex_ssp = ddi_get_soft_state(cnex_state, instance);
/* get channel info */
mutex_enter(&cnex_ssp->clist_lock);
cldcp = cnex_ssp->clist;
while (cldcp) {
if (cldcp->id == id)
break;
cldcp = cldcp->next;
}
if (cldcp == NULL) {
DWARN("cnex_rem_intr: channel 0x%llx does not exist\n", id);
mutex_exit(&cnex_ssp->clist_lock);
return (EINVAL);
}
mutex_exit(&cnex_ssp->clist_lock);
/* get rid of the channel intr handler */
mutex_enter(&cldcp->lock);
/* get interrupt type */
if (itype == CNEX_TX_INTR) {
iinfo = &(cldcp->tx);
} else if (itype == CNEX_RX_INTR) {
iinfo = &(cldcp->rx);
} else {
DWARN("cnex_rem_intr: invalid interrupt type\n");
mutex_exit(&cldcp->lock);
return (EINVAL);
}
D1("cnex_rem_intr: interrupt ino=0x%x\n", iinfo->ino);
/* check if a handler is already added */
if (iinfo->hdlr == 0) {
DWARN("cnex_rem_intr: interrupt handler does not exist\n");
mutex_exit(&cldcp->lock);
return (EINVAL);
}
D1("cnex_rem_intr: set intr to invalid ino=0x%x\n", iinfo->ino);
rv = hvldc_intr_setvalid(cnex_ssp->cfghdl,
iinfo->ino, HV_INTR_NOTVALID);
if (rv) {
DWARN("cnex_rem_intr: cannot set valid ino=%x\n", iinfo->ino);
mutex_exit(&cldcp->lock);
return (ENXIO);
}
/*
* Check if there are pending interrupts. If interrupts are
* pending return EAGAIN.
*/
rv = hvldc_intr_getstate(cnex_ssp->cfghdl, iinfo->ino, &istate);
if (rv) {
DWARN("cnex_rem_intr: ino=0x%llx, cannot get state\n",
iinfo->ino);
mutex_exit(&cldcp->lock);
return (ENXIO);
}
/* if interrupts are still pending print warning */
if (istate != HV_INTR_IDLE_STATE) {
DWARN("cnex_rem_intr: cannot remove intr busy ino=%x\n",
iinfo->ino);
mutex_exit(&cldcp->lock);
return (EAGAIN);
}
/* Pick a PIL on the basis of the channel's devclass */
for (idx = 0, pil = PIL_3; idx < CNEX_MAX_DEVS; idx++) {
if (cldcp->devclass == cnex_class_to_intr[idx].devclass) {
pil = cnex_class_to_intr[idx].pil;
break;
}
}
intr_dist_cpuid_rem_device_weight(iinfo->cpuid, iinfo->dip);
/* remove interrupt */
(void) rem_ivintr(iinfo->icookie, pil);
/* clear interrupt info */
bzero(iinfo, sizeof (*iinfo));
mutex_exit(&cldcp->lock);
return (0);
}
/*
* Clear pending Tx/Rx interrupt
*/
static int
cnex_clr_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype)
{
int rv;
cnex_ldc_t *cldcp;
cnex_intr_t *iinfo;
cnex_soft_state_t *cnex_ssp;
int instance;
/* Get device instance and structure */
instance = ddi_get_instance(dip);
cnex_ssp = ddi_get_soft_state(cnex_state, instance);
/* get channel info */
mutex_enter(&cnex_ssp->clist_lock);
cldcp = cnex_ssp->clist;
while (cldcp) {
if (cldcp->id == id)
break;
cldcp = cldcp->next;
}
if (cldcp == NULL) {
DWARN("cnex_clr_intr: channel 0x%llx does not exist\n", id);
mutex_exit(&cnex_ssp->clist_lock);
return (EINVAL);
}
mutex_exit(&cnex_ssp->clist_lock);
mutex_enter(&cldcp->lock);
/* get interrupt type */
if (itype == CNEX_TX_INTR) {
iinfo = &(cldcp->tx);
} else if (itype == CNEX_RX_INTR) {
iinfo = &(cldcp->rx);
} else {
DWARN("cnex_clr_intr: invalid interrupt type\n");
mutex_exit(&cldcp->lock);
return (EINVAL);
}
D1("%s: interrupt ino=0x%x\n", __func__, iinfo->ino);
/* check if a handler is already added */
if (iinfo->hdlr == 0) {
DWARN("cnex_clr_intr: interrupt handler does not exist\n");
mutex_exit(&cldcp->lock);
return (EINVAL);
}
rv = hvldc_intr_setstate(cnex_ssp->cfghdl, iinfo->ino,
HV_INTR_IDLE_STATE);
if (rv) {
DWARN("cnex_clr_intr: cannot clear interrupt state\n");
mutex_exit(&cldcp->lock);
return (ENXIO);
}
mutex_exit(&cldcp->lock);
return (0);
}
/*
* Channel nexus interrupt handler wrapper
*/
static uint_t
cnex_intr_wrapper(caddr_t arg)
{
int res;
uint_t (*handler)();
caddr_t handler_arg1;
caddr_t handler_arg2;
cnex_intr_t *iinfo = (cnex_intr_t *)arg;
ASSERT(iinfo != NULL);
handler = iinfo->hdlr;
handler_arg1 = iinfo->arg1;
handler_arg2 = iinfo->arg2;
/*
* The 'interrupt__start' and 'interrupt__complete' probes
* are provided to support 'intrstat' command. These probes
* help monitor the interrupts on a per device basis only.
* In order to provide the ability to monitor the
* activity on a per channel basis, two additional
* probes('channelintr__start','channelintr__complete')
* are provided here.
*/
DTRACE_PROBE4(channelintr__start, uint64_t, iinfo->id,
cnex_intr_t *, iinfo, void *, handler, caddr_t, handler_arg1);
DTRACE_PROBE4(interrupt__start, dev_info_t, iinfo->dip,
void *, handler, caddr_t, handler_arg1, caddr_t, handler_arg2);
D1("cnex_intr_wrapper:ino=0x%llx invoke client handler\n", iinfo->ino);
res = (*handler)(handler_arg1, handler_arg2);
DTRACE_PROBE4(interrupt__complete, dev_info_t, iinfo->dip,
void *, handler, caddr_t, handler_arg1, int, res);
DTRACE_PROBE4(channelintr__complete, uint64_t, iinfo->id,
cnex_intr_t *, iinfo, void *, handler, caddr_t, handler_arg1);
return (res);
}
/*ARGSUSED*/
static int
cnex_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
int rv, instance, reglen;
cnex_regspec_t *reg_p;
ldc_cnex_t cinfo;
cnex_soft_state_t *cnex_ssp;
switch (cmd) {
case DDI_ATTACH:
break;
case DDI_RESUME:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
/*
* Get the instance specific soft state structure.
* Save the devi for this instance in the soft_state data.
*/
instance = ddi_get_instance(devi);
if (ddi_soft_state_zalloc(cnex_state, instance) != DDI_SUCCESS)
return (DDI_FAILURE);
cnex_ssp = ddi_get_soft_state(cnex_state, instance);
cnex_ssp->devi = devi;
cnex_ssp->clist = NULL;
if (ddi_getlongprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
"reg", (caddr_t)&reg_p, &reglen) != DDI_SUCCESS) {
return (DDI_FAILURE);
}
/* get the sun4v config handle for this device */
cnex_ssp->cfghdl = SUN4V_REG_SPEC2CFG_HDL(reg_p->physaddr);
kmem_free(reg_p, reglen);
D1("cnex_attach: cfghdl=0x%llx\n", cnex_ssp->cfghdl);
/* init channel list mutex */
mutex_init(&cnex_ssp->clist_lock, NULL, MUTEX_DRIVER, NULL);
/* Register with LDC module */
cinfo.dip = devi;
cinfo.reg_chan = cnex_reg_chan;
cinfo.unreg_chan = cnex_unreg_chan;
cinfo.add_intr = cnex_add_intr;
cinfo.rem_intr = cnex_rem_intr;
cinfo.clr_intr = cnex_clr_intr;
/*
* LDC register will fail if an nexus instance had already
* registered with the LDC framework
*/
rv = ldc_register(&cinfo);
if (rv) {
DWARN("cnex_attach: unable to register with LDC\n");
ddi_soft_state_free(cnex_state, instance);
mutex_destroy(&cnex_ssp->clist_lock);
return (DDI_FAILURE);
}
if (ddi_create_minor_node(devi, "devctl", S_IFCHR, instance,
DDI_NT_NEXUS, 0) != DDI_SUCCESS) {
ddi_remove_minor_node(devi, NULL);
ddi_soft_state_free(cnex_state, instance);
mutex_destroy(&cnex_ssp->clist_lock);
return (DDI_FAILURE);
}
/* Add interrupt redistribution callback. */
intr_dist_add_weighted(cnex_intr_redist, cnex_ssp);
ddi_report_dev(devi);
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static int
cnex_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
{
int instance;
ldc_cnex_t cinfo;
cnex_soft_state_t *cnex_ssp;
switch (cmd) {
case DDI_DETACH:
break;
case DDI_SUSPEND:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
instance = ddi_get_instance(devi);
cnex_ssp = ddi_get_soft_state(cnex_state, instance);
/* check if there are any channels still registered */
if (cnex_ssp->clist) {
cmn_err(CE_WARN, "?cnex_dettach: channels registered %d\n",
ddi_get_instance(devi));
return (DDI_FAILURE);
}
/* Unregister with LDC module */
cinfo.dip = devi;
(void) ldc_unregister(&cinfo);
/* Remove interrupt redistribution callback. */
intr_dist_rem_weighted(cnex_intr_redist, cnex_ssp);
/* destroy mutex */
mutex_destroy(&cnex_ssp->clist_lock);
/* free soft state structure */
ddi_soft_state_free(cnex_state, instance);
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static int
cnex_open(dev_t *devp, int flags, int otyp, cred_t *credp)
{
int instance;
if (otyp != OTYP_CHR)
return (EINVAL);
instance = getminor(*devp);
if (ddi_get_soft_state(cnex_state, instance) == NULL)
return (ENXIO);
return (0);
}
/*ARGSUSED*/
static int
cnex_close(dev_t dev, int flags, int otyp, cred_t *credp)
{
int instance;
if (otyp != OTYP_CHR)
return (EINVAL);
instance = getminor(dev);
if (ddi_get_soft_state(cnex_state, instance) == NULL)
return (ENXIO);
return (0);
}
/*ARGSUSED*/
static int
cnex_ioctl(dev_t dev,
int cmd, intptr_t arg, int mode, cred_t *cred_p, int *rval_p)
{
int instance;
cnex_soft_state_t *cnex_ssp;
instance = getminor(dev);
if ((cnex_ssp = ddi_get_soft_state(cnex_state, instance)) == NULL)
return (ENXIO);
ASSERT(cnex_ssp->devi);
return (ndi_devctl_ioctl(cnex_ssp->devi, cmd, arg, mode, 0));
}
static int
cnex_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
void *arg, void *result)
{
char name[MAXNAMELEN];
uint32_t reglen;
int *cnex_regspec;
switch (ctlop) {
case DDI_CTLOPS_REPORTDEV:
if (rdip == NULL)
return (DDI_FAILURE);
cmn_err(CE_CONT, "?channel-device: %s%d\n",
ddi_driver_name(rdip), ddi_get_instance(rdip));
return (DDI_SUCCESS);
case DDI_CTLOPS_INITCHILD:
{
dev_info_t *child = (dev_info_t *)arg;
if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, child,
DDI_PROP_DONTPASS, "reg",
&cnex_regspec, &reglen) != DDI_SUCCESS) {
return (DDI_FAILURE);
}
(void) snprintf(name, sizeof (name), "%x", *cnex_regspec);
ddi_set_name_addr(child, name);
ddi_set_parent_data(child, NULL);
ddi_prop_free(cnex_regspec);
return (DDI_SUCCESS);
}
case DDI_CTLOPS_UNINITCHILD:
{
dev_info_t *child = (dev_info_t *)arg;
NDI_CONFIG_DEBUG((CE_NOTE,
"DDI_CTLOPS_UNINITCHILD(%s, instance=%d)",
ddi_driver_name(child), DEVI(child)->devi_instance));
ddi_set_name_addr(child, NULL);
return (DDI_SUCCESS);
}
case DDI_CTLOPS_DMAPMAPC:
case DDI_CTLOPS_REPORTINT:
case DDI_CTLOPS_REGSIZE:
case DDI_CTLOPS_NREGS:
case DDI_CTLOPS_SIDDEV:
case DDI_CTLOPS_SLAVEONLY:
case DDI_CTLOPS_AFFINITY:
case DDI_CTLOPS_POKE:
case DDI_CTLOPS_PEEK:
/*
* These ops correspond to functions that "shouldn't" be called
* by a channel-device driver. So we whine when we're called.
*/
cmn_err(CE_WARN, "%s%d: invalid op (%d) from %s%d\n",
ddi_driver_name(dip), ddi_get_instance(dip), ctlop,
ddi_driver_name(rdip), ddi_get_instance(rdip));
return (DDI_FAILURE);
case DDI_CTLOPS_ATTACH:
case DDI_CTLOPS_BTOP:
case DDI_CTLOPS_BTOPR:
case DDI_CTLOPS_DETACH:
case DDI_CTLOPS_DVMAPAGESIZE:
case DDI_CTLOPS_IOMIN:
case DDI_CTLOPS_POWER:
case DDI_CTLOPS_PTOB:
default:
/*
* Everything else (e.g. PTOB/BTOP/BTOPR requests) we pass up
*/
return (ddi_ctlops(dip, rdip, ctlop, arg, result));
}
}
/*
* cnex_find_chan_dip -- Find the dip of a device that is corresponding
* to the specific channel. Below are the details on how the dip
* is derived.
*
* - In the MD, the cfg-handle is expected to be unique for
* virtual-device nodes that have the same 'name' property value.
* This value is expected to be the same as that of "reg" property
* of the corresponding OBP device node.
*
* - The value of the 'name' property of a virtual-device node
* in the MD is expected to be the same for the corresponding
* OBP device node.
*
* - Find the virtual-device node corresponding to a channel-endpoint
* by walking backwards. Then obtain the values for the 'name' and
* 'cfg-handle' properties.
*
* - Walk all the children of the cnex, find a matching dip which
* has the same 'name' and 'reg' property values.
*
* - The channels that have no corresponding device driver are
* treated as if they correspond to the cnex driver,
* that is, return cnex dip for them. This means, the
* cnex acts as an umbrella device driver. Note, this is
* for 'intrstat' statistics purposes only. As a result of this,
* the 'intrstat' shows cnex as the device that is servicing the
* interrupts corresponding to these channels.
*
* For now, only one such case is known, that is, the channels that
* are used by the "domain-services".
*/
static dev_info_t *
cnex_find_chan_dip(dev_info_t *dip, uint64_t chan_id,
md_t *mdp, mde_cookie_t mde)
{
int listsz;
int num_nodes;
int num_devs;
uint64_t cfghdl;
char *md_name;
mde_cookie_t *listp;
dev_info_t *cdip = NULL;
num_nodes = md_node_count(mdp);
ASSERT(num_nodes > 0);
listsz = num_nodes * sizeof (mde_cookie_t);
listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
num_devs = md_scan_dag(mdp, mde, md_find_name(mdp, "virtual-device"),
md_find_name(mdp, "back"), listp);
ASSERT(num_devs <= 1);
if (num_devs <= 0) {
DWARN("cnex_find_chan_dip:channel(0x%llx): "
"No virtual-device found\n", chan_id);
goto fdip_exit;
}
if (md_get_prop_str(mdp, listp[0], "name", &md_name) != 0) {
DWARN("cnex_find_chan_dip:channel(0x%llx): "
"name property not found\n", chan_id);
goto fdip_exit;
}
D1("cnex_find_chan_dip: channel(0x%llx): virtual-device "
"name property value = %s\n", chan_id, md_name);
if (md_get_prop_val(mdp, listp[0], "cfg-handle", &cfghdl) != 0) {
DWARN("cnex_find_chan_dip:channel(0x%llx): virtual-device's "
"cfg-handle property not found\n", chan_id);
goto fdip_exit;
}
D1("cnex_find_chan_dip:channel(0x%llx): virtual-device cfg-handle "
" property value = 0x%x\n", chan_id, cfghdl);
for (cdip = ddi_get_child(dip); cdip != NULL;
cdip = ddi_get_next_sibling(cdip)) {
int *cnex_regspec;
uint32_t reglen;
char *dev_name;
if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
DDI_PROP_DONTPASS, "name",
&dev_name) != DDI_PROP_SUCCESS) {
DWARN("cnex_find_chan_dip: name property not"
" found for dip(0x%p)\n", cdip);
continue;
}
if (strcmp(md_name, dev_name) != 0) {
ddi_prop_free(dev_name);
continue;
}
ddi_prop_free(dev_name);
if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, cdip,
DDI_PROP_DONTPASS, "reg",
&cnex_regspec, &reglen) != DDI_SUCCESS) {
DWARN("cnex_find_chan_dip: reg property not"
" found for dip(0x%p)\n", cdip);
continue;
}
if (*cnex_regspec == cfghdl) {
D1("cnex_find_chan_dip:channel(0x%llx): found "
"dip(0x%p) drvname=%s\n", chan_id, cdip,
ddi_driver_name(cdip));
ddi_prop_free(cnex_regspec);
break;
}
ddi_prop_free(cnex_regspec);
}
fdip_exit:
if (cdip == NULL) {
/*
* If a virtual-device node exists but no dip found,
* then for now print a DEBUG error message only.
*/
if (num_devs > 0) {
DERR("cnex_find_chan_dip:channel(0x%llx): "
"No device found\n", chan_id);
}
/* If no dip was found, return cnex device's dip. */
cdip = dip;
}
kmem_free(listp, listsz);
D1("cnex_find_chan_dip:channel(0x%llx): returning dip=0x%p\n",
chan_id, cdip);
return (cdip);
}
/* -------------------------------------------------------------------------- */