evtchn_dev.c revision 193974072f41a843678abf5f61979c748687e66b
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* evtchn.c
*
* Driver for receiving and demuxing event-channel signals.
*
* Copyright (c) 2004-2005, K A Fraser
* Multi-process extensions Copyright (c) 2004, Steven Smith
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <sys/types.h>
#include <sys/hypervisor.h>
#include <sys/machsystm.h>
#include <sys/mutex.h>
#include <sys/evtchn_impl.h>
#include <sys/ddi_impldefs.h>
#include <sys/avintr.h>
#include <sys/cpuvar.h>
#include <sys/smp_impldefs.h>
#include <sys/archsystm.h>
#include <sys/sysmacros.h>
#include <sys/fcntl.h>
#include <sys/open.h>
#include <sys/stat.h>
#include <sys/psm.h>
#include <sys/cpu.h>
#include <sys/cmn_err.h>
#include <sys/xen_errno.h>
#include <sys/policy.h>
#include <xen/sys/evtchn.h>
/* Some handy macros */
#define EVTCHNDRV_MINOR2INST(minor) ((int)(minor))
#define EVTCHNDRV_DEFAULT_NCLONES 256
#define EVTCHNDRV_INST2SOFTS(inst) \
(ddi_get_soft_state(evtchndrv_statep, (inst)))
/* Soft state data structure for evtchn driver */
struct evtsoftdata {
dev_info_t *dip;
/* Notification ring, accessed via /dev/xen/evtchn. */
#define EVTCHN_RING_SIZE (PAGESIZE / sizeof (evtchn_port_t))
#define EVTCHN_RING_MASK(_i) ((_i) & (EVTCHN_RING_SIZE - 1))
evtchn_port_t *ring;
unsigned int ring_cons, ring_prod, ring_overflow;
/* Processes wait on this queue when ring is empty. */
kcondvar_t evtchn_wait;
kmutex_t evtchn_lock;
struct pollhead evtchn_pollhead;
/* last pid to bind to this event channel. debug aid. */
pid_t pid;
};
static void *evtchndrv_statep;
int evtchndrv_nclones = EVTCHNDRV_DEFAULT_NCLONES;
static int *evtchndrv_clone_tab;
static dev_info_t *evtchndrv_dip;
static kmutex_t evtchndrv_clone_tab_mutex;
static int evtchndrv_detach(dev_info_t *, ddi_detach_cmd_t);
/* Who's bound to each port? */
static struct evtsoftdata *port_user[NR_EVENT_CHANNELS];
static kmutex_t port_user_lock;
void
evtchn_device_upcall()
{
struct evtsoftdata *ep;
int port;
/*
* This is quite gross, we had to leave the evtchn that led to this
* invocation in a global mailbox, retrieve it now.
* We do this because the interface doesn't offer us a way to pass
* a dynamic argument up through the generic interrupt service layer.
* The mailbox is safe since we either run with interrupts disabled or
* non-preemptable till we reach here.
*/
port = ec_dev_mbox;
ASSERT(port != 0);
ec_dev_mbox = 0;
ec_clear_evtchn(port);
mutex_enter(&port_user_lock);
if ((ep = port_user[port]) != NULL) {
mutex_enter(&ep->evtchn_lock);
if ((ep->ring_prod - ep->ring_cons) < EVTCHN_RING_SIZE) {
ep->ring[EVTCHN_RING_MASK(ep->ring_prod)] = port;
/*
* Wake up reader when ring goes non-empty
*/
if (ep->ring_cons == ep->ring_prod++) {
cv_signal(&ep->evtchn_wait);
mutex_exit(&ep->evtchn_lock);
pollwakeup(&ep->evtchn_pollhead,
POLLIN | POLLRDNORM);
goto done;
}
} else {
ep->ring_overflow = 1;
}
mutex_exit(&ep->evtchn_lock);
}
done:
mutex_exit(&port_user_lock);
}
/* ARGSUSED */
static int
evtchndrv_read(dev_t dev, struct uio *uio, cred_t *cr)
{
int rc = 0;
ssize_t count;
unsigned int c, p, bytes1 = 0, bytes2 = 0;
struct evtsoftdata *ep;
minor_t minor = getminor(dev);
if (secpolicy_xvm_control(cr))
return (EPERM);
ep = EVTCHNDRV_INST2SOFTS(EVTCHNDRV_MINOR2INST(minor));
/* Whole number of ports. */
count = uio->uio_resid;
count &= ~(sizeof (evtchn_port_t) - 1);
if (count == 0)
return (0);
if (count > PAGESIZE)
count = PAGESIZE;
mutex_enter(&ep->evtchn_lock);
for (;;) {
if (ep->ring_overflow) {
rc = EFBIG;
goto done;
}
if ((c = ep->ring_cons) != (p = ep->ring_prod))
break;
if (uio->uio_fmode & O_NONBLOCK) {
rc = EAGAIN;
goto done;
}
if (cv_wait_sig(&ep->evtchn_wait, &ep->evtchn_lock) == 0) {
rc = EINTR;
goto done;
}
}
/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
sizeof (evtchn_port_t);
bytes2 = EVTCHN_RING_MASK(p) * sizeof (evtchn_port_t);
} else {
bytes1 = (p - c) * sizeof (evtchn_port_t);
bytes2 = 0;
}
/* Truncate chunks according to caller's maximum byte count. */
if (bytes1 > count) {
bytes1 = count;
bytes2 = 0;
} else if ((bytes1 + bytes2) > count) {
bytes2 = count - bytes1;
}
if (uiomove(&ep->ring[EVTCHN_RING_MASK(c)], bytes1, UIO_READ, uio) ||
((bytes2 != 0) && uiomove(&ep->ring[0], bytes2, UIO_READ, uio))) {
rc = EFAULT;
goto done;
}
ep->ring_cons += (bytes1 + bytes2) / sizeof (evtchn_port_t);
done:
mutex_exit(&ep->evtchn_lock);
return (rc);
}
/* ARGSUSED */
static int
evtchndrv_write(dev_t dev, struct uio *uio, cred_t *cr)
{
int rc, i;
ssize_t count;
evtchn_port_t *kbuf;
struct evtsoftdata *ep;
ulong_t flags;
minor_t minor = getminor(dev);
if (secpolicy_xvm_control(cr))
return (EPERM);
ep = EVTCHNDRV_INST2SOFTS(EVTCHNDRV_MINOR2INST(minor));
kbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
/* Whole number of ports. */
count = uio->uio_resid;
count &= ~(sizeof (evtchn_port_t) - 1);
if (count == 0) {
rc = 0;
goto out;
}
if (count > PAGESIZE)
count = PAGESIZE;
if ((rc = uiomove(kbuf, count, UIO_WRITE, uio)) != 0)
goto out;
mutex_enter(&port_user_lock);
for (i = 0; i < (count / sizeof (evtchn_port_t)); i++)
if ((kbuf[i] < NR_EVENT_CHANNELS) &&
(port_user[kbuf[i]] == ep)) {
flags = intr_clear();
ec_unmask_evtchn(kbuf[i]);
intr_restore(flags);
}
mutex_exit(&port_user_lock);
out:
kmem_free(kbuf, PAGESIZE);
return (rc);
}
static void
evtchn_bind_to_user(struct evtsoftdata *u, int port)
{
ulong_t flags;
/*
* save away the PID of the last process to bind to this event channel.
* Useful for debugging.
*/
u->pid = ddi_get_pid();
mutex_enter(&port_user_lock);
ASSERT(port_user[port] == NULL);
port_user[port] = u;
ec_irq_add_evtchn(ec_dev_irq, port);
flags = intr_clear();
ec_unmask_evtchn(port);
intr_restore(flags);
mutex_exit(&port_user_lock);
}
static void
evtchndrv_close_evtchn(int port)
{
struct evtsoftdata *ep;
ASSERT(MUTEX_HELD(&port_user_lock));
ep = port_user[port];
ASSERT(ep != NULL);
(void) ec_mask_evtchn(port);
/*
* It is possible the event is in transit to us.
* If it is already in the ring buffer, then a client may
* get a spurious event notification on the next read of
* of the evtchn device. Clients will need to be able to
* handle getting a spurious event notification.
*/
port_user[port] = NULL;
/*
* The event is masked and should stay so, clean it up.
*/
ec_irq_rm_evtchn(ec_dev_irq, port);
}
/* ARGSUSED */
static int
evtchndrv_ioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cr,
int *rvalp)
{
int err = 0;
struct evtsoftdata *ep;
minor_t minor = getminor(dev);
if (secpolicy_xvm_control(cr))
return (EPERM);
ep = EVTCHNDRV_INST2SOFTS(EVTCHNDRV_MINOR2INST(minor));
*rvalp = 0;
switch (cmd) {
case IOCTL_EVTCHN_BIND_VIRQ: {
struct ioctl_evtchn_bind_virq bind;
if (copyin((void *)data, &bind, sizeof (bind))) {
err = EFAULT;
break;
}
if ((err = xen_bind_virq(bind.virq, 0, rvalp)) != 0)
break;
evtchn_bind_to_user(ep, *rvalp);
break;
}
case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
struct ioctl_evtchn_bind_interdomain bind;
if (copyin((void *)data, &bind, sizeof (bind))) {
err = EFAULT;
break;
}
if ((err = xen_bind_interdomain(bind.remote_domain,
bind.remote_port, rvalp)) != 0)
break;
ec_bind_vcpu(*rvalp, 0);
evtchn_bind_to_user(ep, *rvalp);
break;
}
case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
struct ioctl_evtchn_bind_unbound_port bind;
if (copyin((void *)data, &bind, sizeof (bind))) {
err = EFAULT;
break;
}
if ((err = xen_alloc_unbound_evtchn(bind.remote_domain,
rvalp)) != 0)
break;
evtchn_bind_to_user(ep, *rvalp);
break;
}
case IOCTL_EVTCHN_UNBIND: {
struct ioctl_evtchn_unbind unbind;
if (copyin((void *)data, &unbind, sizeof (unbind))) {
err = EFAULT;
break;
}
if (unbind.port >= NR_EVENT_CHANNELS) {
err = EFAULT;
break;
}
mutex_enter(&port_user_lock);
if (port_user[unbind.port] != ep) {
mutex_exit(&port_user_lock);
err = ENOTCONN;
break;
}
evtchndrv_close_evtchn(unbind.port);
mutex_exit(&port_user_lock);
break;
}
case IOCTL_EVTCHN_NOTIFY: {
struct ioctl_evtchn_notify notify;
if (copyin((void *)data, &notify, sizeof (notify))) {
err = EFAULT;
break;
}
if (notify.port >= NR_EVENT_CHANNELS) {
err = EINVAL;
} else if (port_user[notify.port] != ep) {
err = ENOTCONN;
} else {
ec_notify_via_evtchn(notify.port);
}
break;
}
default:
err = ENOSYS;
}
return (err);
}
static int
evtchndrv_poll(dev_t dev, short ev, int anyyet, short *revp, pollhead_t **phpp)
{
struct evtsoftdata *ep;
minor_t minor = getminor(dev);
short mask = 0;
ep = EVTCHNDRV_INST2SOFTS(EVTCHNDRV_MINOR2INST(minor));
*phpp = (struct pollhead *)NULL;
if (ev & POLLOUT)
mask |= POLLOUT;
if (ep->ring_overflow)
mask |= POLLERR;
if (ev & (POLLIN | POLLRDNORM)) {
mutex_enter(&ep->evtchn_lock);
if (ep->ring_cons != ep->ring_prod)
mask |= (POLLIN | POLLRDNORM) & ev;
else
if (mask == 0 && !anyyet)
*phpp = &ep->evtchn_pollhead;
mutex_exit(&ep->evtchn_lock);
}
*revp = mask;
return (0);
}
/* ARGSUSED */
static int
evtchndrv_open(dev_t *devp, int flag, int otyp, cred_t *credp)
{
struct evtsoftdata *ep;
minor_t minor = getminor(*devp);
if (otyp == OTYP_BLK)
return (ENXIO);
/*
* only allow open on minor = 0 - the clone device
*/
if (minor != 0)
return (ENXIO);
/*
* find a free slot and grab it
*/
mutex_enter(&evtchndrv_clone_tab_mutex);
for (minor = 1; minor < evtchndrv_nclones; minor++) {
if (evtchndrv_clone_tab[minor] == 0) {
evtchndrv_clone_tab[minor] = 1;
break;
}
}
mutex_exit(&evtchndrv_clone_tab_mutex);
if (minor == evtchndrv_nclones)
return (EAGAIN);
/* Allocate softstate structure */
if (ddi_soft_state_zalloc(evtchndrv_statep,
EVTCHNDRV_MINOR2INST(minor)) != DDI_SUCCESS) {
mutex_enter(&evtchndrv_clone_tab_mutex);
evtchndrv_clone_tab[minor] = 0;
mutex_exit(&evtchndrv_clone_tab_mutex);
return (EAGAIN);
}
ep = EVTCHNDRV_INST2SOFTS(EVTCHNDRV_MINOR2INST(minor));
/* ... and init it */
ep->dip = evtchndrv_dip;
cv_init(&ep->evtchn_wait, NULL, CV_DEFAULT, NULL);
mutex_init(&ep->evtchn_lock, NULL, MUTEX_DEFAULT, NULL);
ep->ring = kmem_alloc(PAGESIZE, KM_SLEEP);
/* clone driver */
*devp = makedevice(getmajor(*devp), minor);
return (0);
}
/* ARGSUSED */
static int
evtchndrv_close(dev_t dev, int flag, int otyp, struct cred *credp)
{
struct evtsoftdata *ep;
minor_t minor = getminor(dev);
int i;
ep = EVTCHNDRV_INST2SOFTS(EVTCHNDRV_MINOR2INST(minor));
if (ep == NULL)
return (ENXIO);
mutex_enter(&port_user_lock);
for (i = 0; i < NR_EVENT_CHANNELS; i++) {
if (port_user[i] != ep)
continue;
evtchndrv_close_evtchn(i);
}
mutex_exit(&port_user_lock);
kmem_free(ep->ring, PAGESIZE);
ddi_soft_state_free(evtchndrv_statep, EVTCHNDRV_MINOR2INST(minor));
/*
* free clone tab slot
*/
mutex_enter(&evtchndrv_clone_tab_mutex);
evtchndrv_clone_tab[minor] = 0;
mutex_exit(&evtchndrv_clone_tab_mutex);
return (0);
}
/* ARGSUSED */
static int
evtchndrv_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
{
dev_t dev = (dev_t)arg;
minor_t minor = getminor(dev);
int retval;
switch (cmd) {
case DDI_INFO_DEVT2DEVINFO:
if (minor != 0 || evtchndrv_dip == NULL) {
*result = (void *)NULL;
retval = DDI_FAILURE;
} else {
*result = (void *)evtchndrv_dip;
retval = DDI_SUCCESS;
}
break;
case DDI_INFO_DEVT2INSTANCE:
*result = (void *)0;
retval = DDI_SUCCESS;
break;
default:
retval = DDI_FAILURE;
}
return (retval);
}
static int
evtchndrv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
int error;
int unit = ddi_get_instance(dip);
switch (cmd) {
case DDI_ATTACH:
break;
case DDI_RESUME:
return (DDI_SUCCESS);
default:
cmn_err(CE_WARN, "evtchn_attach: unknown cmd 0x%x\n", cmd);
return (DDI_FAILURE);
}
/* DDI_ATTACH */
/*
* only one instance - but we clone using the open routine
*/
if (ddi_get_instance(dip) > 0)
return (DDI_FAILURE);
mutex_init(&evtchndrv_clone_tab_mutex, NULL, MUTEX_DRIVER,
NULL);
error = ddi_create_minor_node(dip, "evtchn", S_IFCHR, unit,
DDI_PSEUDO, NULL);
if (error != DDI_SUCCESS)
goto fail;
/*
* save dip for getinfo
*/
evtchndrv_dip = dip;
ddi_report_dev(dip);
mutex_init(&port_user_lock, NULL, MUTEX_DRIVER, NULL);
(void) memset(port_user, 0, sizeof (port_user));
ec_dev_irq = ec_dev_alloc_irq();
(void) add_avintr(NULL, IPL_EVTCHN, (avfunc)evtchn_device_upcall,
"evtchn_driver", ec_dev_irq, NULL, NULL, NULL, dip);
return (DDI_SUCCESS);
fail:
(void) evtchndrv_detach(dip, DDI_DETACH);
return (error);
}
/*ARGSUSED*/
static int
evtchndrv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
/*
* Don't allow detach for now.
*/
return (DDI_FAILURE);
}
/* Solaris driver framework */
static struct cb_ops evtchndrv_cb_ops = {
evtchndrv_open, /* cb_open */
evtchndrv_close, /* cb_close */
nodev, /* cb_strategy */
nodev, /* cb_print */
nodev, /* cb_dump */
evtchndrv_read, /* cb_read */
evtchndrv_write, /* cb_write */
evtchndrv_ioctl, /* cb_ioctl */
nodev, /* cb_devmap */
nodev, /* cb_mmap */
nodev, /* cb_segmap */
evtchndrv_poll, /* cb_chpoll */
ddi_prop_op, /* cb_prop_op */
0, /* cb_stream */
D_NEW | D_MP | D_64BIT /* cb_flag */
};
static struct dev_ops evtchndrv_dev_ops = {
DEVO_REV, /* devo_rev */
0, /* devo_refcnt */
evtchndrv_info, /* devo_getinfo */
nulldev, /* devo_identify */
nulldev, /* devo_probe */
evtchndrv_attach, /* devo_attach */
evtchndrv_detach, /* devo_detach */
nodev, /* devo_reset */
&evtchndrv_cb_ops, /* devo_cb_ops */
NULL, /* devo_bus_ops */
NULL, /* power */
ddi_quiesce_not_needed, /* devo_quiesce */
};
static struct modldrv modldrv = {
&mod_driverops, /* Type of module. This one is a driver */
"Evtchn driver", /* Name of the module. */
&evtchndrv_dev_ops /* driver ops */
};
static struct modlinkage modlinkage = {
MODREV_1,
&modldrv,
NULL
};
int
_init(void)
{
int err;
err = ddi_soft_state_init(&evtchndrv_statep,
sizeof (struct evtsoftdata), 1);
if (err)
return (err);
err = mod_install(&modlinkage);
if (err)
ddi_soft_state_fini(&evtchndrv_statep);
else
evtchndrv_clone_tab = kmem_zalloc(
sizeof (int) * evtchndrv_nclones, KM_SLEEP);
return (err);
}
int
_fini(void)
{
int e;
e = mod_remove(&modlinkage);
if (e)
return (e);
ddi_soft_state_fini(&evtchndrv_statep);
return (0);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}