evtchn.c revision a43153bf6cee5e5307b15ed4f77dafc5c56f333d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* evtchn.c
*
* Communication via hypervisor event channels.
*
* Copyright (c) 2002-2005, K A Fraser
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/* some parts derived from netbsd's hypervisor_machdep.c 1.2.2.2 */
/*
*
* Copyright (c) 2004 Christian Limpach.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* 3. This section intentionally left blank.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Section 3 of the above license was updated in response to bug 6379571.
*/
#include <sys/hypervisor.h>
#include <sys/machsystm.h>
#include <sys/evtchn_impl.h>
#include <sys/ddi_impldefs.h>
#include <sys/smp_impldefs.h>
#include <sys/archsystm.h>
#include <sys/sysmacros.h>
#include <sys/privregs.h>
#include <sys/traptrace.h>
/*
* This file manages our association between hypervisor event channels and
* Solaris's IRQs. This is a one-to-one mapping, with the exception of
* IPI IRQs, for which there is one event channel per CPU participating
* in the IPI, and the clock VIRQ which also has an event channel per cpu
*
* IRQT_VIRQ:
* The hypervisor's standard virtual IRQ, used for the clock timer, for
* example. This code allows any cpu to bind to one of these, although
* some are treated specially (i.e. VIRQ_DEBUG).
* Event channel binding is done via EVTCHNOP_bind_virq.
*
* IRQT_PIRQ:
* These associate a physical IRQ with an event channel via
* EVTCHNOP_bind_pirq.
*
* IRQT_IPI:
* A cross-call IRQ. Maps to "ncpus" event channels, each of which is
* bound to exactly one of the vcpus. We do not currently support
* unbinding of IPIs (since Solaris doesn't need it). Uses
* EVTCHNOP_bind_ipi.
*
* IRQT_EVTCHN:
* A "normal" binding to an event channel, typically used by the frontend
* drivers to bind to the their backend event channel.
*
* IRQT_DEV_EVTCHN:
* have a one-IRQ to many-evtchn mapping. We only track evtchn->irq for
* these event channels, which are managed via ec_irq_add/rm_evtchn().
* We enforce that IRQT_DEV_EVTCHN's representative evtchn (->ii_evtchn)
* is zero, and make any calls to irq_evtchn() an error, to prevent
* accidentally attempting to use the illegal evtchn 0.
*
*
* All other mapping data is kept. The drivers will remove their own event
* channels via xendev on receiving a DDI_SUSPEND. This leaves us with
* the IPIs and VIRQs, which we handle in ec_suspend() and ec_resume()
* below.
*
* CPU binding
*
* When an event channel is bound to a CPU, we set a bit in a mask present
* in the machcpu (evt_affinity) to indicate that this CPU can accept this
* event channel. For both IPIs and VIRQs, this binding is fixed at
* allocation time and we never modify it. All other event channels are
* bound via the PSM either as part of add_avintr(), or interrupt
* redistribution (xen_psm_dis/enable_intr()) as a result of CPU
*
* Locking
*
* Updates are done holding the ec_lock. The xen_callback_handler()
* routine reads the mapping data in a lockless fashion. Additionally
* cycle. The IPI info is also examined without the lock; this is OK
* since we only ever change IPI info during initial setup and resume.
*/
#define EVTCHN_MASKED(ev) \
static short evtchn_to_irq[NR_EVENT_CHANNELS];
static int evtchn_owner[NR_EVENT_CHANNELS];
#ifdef DEBUG
#endif
/*
* Mailbox for communication with the evtchn device driver.
* We rely on only cpu 0 servicing the event channels associated
* with the driver. i.e. all evtchn driver evtchns are bound to cpu 0.
*/
volatile int ec_dev_mbox; /* mailbox for evtchn device driver */
/*
* See the locking description above.
*/
/*
* Bitmap indicating which PIRQs require the hypervisor to be notified
* on unmask.
*/
static int ec_debug_irq = INVALID_IRQ;
int ec_dev_irq = INVALID_IRQ;
int
{
int err;
else
return (err);
}
int
{
int err;
&bind)) == 0)
else
return (err);
}
int
{
int err;
&alloc)) == 0) {
/* ensure evtchn is masked till we're ready to use it */
(void) ec_mask_evtchn(*evtchnp);
} else {
}
return (err);
}
static int
xen_close_evtchn(int evtchn)
{
int err;
if (err)
return (err);
}
static int
{
panic("xen_bind_ipi() failed");
}
/* Send future instances of this interrupt to other vcpu. */
static void
{
panic("xen_bind_vcpu() failed");
}
static int
xen_bind_pirq(int pirq)
{
int ret;
}
/* unmask an evtchn and send upcall to appropriate vcpu if pending bit is set */
static void
xen_evtchn_unmask(int evtchn)
{
panic("xen_evtchn_unmask() failed");
}
static void
{
struct xen_evt_data *cpe;
/*
* Use lockless search of cpu_list, similar to mutex_vector_enter().
*/
do {
else
}
static void
{
}
static void
{
}
static void
{
case IRQT_IPI:
break;
case IRQT_VIRQ:
break;
default:
break;
}
/*
* If a CPU is not specified, we expect to bind it to a CPU later via
* the PSM.
*/
if (cpu != -1) {
}
}
static int
{
int irq;
break;
}
panic("No available IRQ to bind to: increase NR_IRQS!\n");
/*
* Set irq/has_handler field to zero which means handler not installed
*/
return (irq);
}
static int
{
int evtchn;
case IRQT_IPI:
break;
case IRQT_VIRQ:
break;
default:
break;
}
return (evtchn);
}
static void
{
int err;
*evtchnp = 0;
}
static void
pirq_unmask_notify(int pirq)
{
struct physdev_eoi eoi;
}
}
static void
pirq_query_unmask(int pirq)
{
struct physdev_irq_status_query irq_status;
}
static void
{
}
/*
* probe if a pirq is available to bind to, return 1 if available
* else return 0.
* Note that for debug versions of xen this probe may cause an in use IRQ
* warning message from xen.
*/
int
ec_probe_pirq(int pirq)
{
return (0);
} else {
return (1);
}
}
/*
* Bind an event channel to a vcpu
*/
void
{
}
/*
* Set up a physical device irq to be associated with an event channel.
*/
void
{
int evtchn;
/*
* Test if this PIRQ is already bound to an evtchn,
* which means it is a shared IRQ and we don't want to
* bind and do some initial setup that has already been
* done for this irq on a previous trip through this code.
*/
} else {
}
}
void
ec_unbind_irq(int irq)
{
int drop_lock = 0;
int type, i;
/*
* Nasty, but we need this during suspend.
*/
drop_lock = 1;
}
/* There's only one event channel associated with this irq */
/*
* Each cpu on the system can have it's own event channel
* associated with a virq. Unbind them all.
*/
for (i = 0; i < NCPU; i++) {
if (virqp->mi_evtchns[i] != 0)
}
/* Mark the virq structure as invalid. */
}
/* Re-reserve PIRQ. */
if (drop_lock)
}
/*
* Rebind an event channel for delivery to a CPU.
*/
void
{
/*
* Binding is done at allocation time for these types, so we should
* never modify them.
*/
return;
}
/*
* Now send the new target processor a NOP IPI.
* It will check for any pending interrupts, and so service any that
* got delivered to the wrong processor by mistake.
*/
if (ncpus > 1)
}
int
{
return (-1);
return (-1);
return (0);
}
void
ec_clear_irq_priority(int irq)
{
}
int
{
return (evtchn_to_irq[evtchn]);
}
int
{
int err;
int evtchn;
} else {
}
}
int
{
int evtchn;
} else {
}
/*
* Unmask the new evtchn so that it can be seen by the target cpu
*/
flags = intr_clear();
}
/*
* When bringing up a CPU, bind to all the IPIs that CPU0 bound.
*/
void
ec_bind_cpu_ipis(int cpu)
{
int i;
for (i = 0; i < MAXIPL; i++) {
continue;
(void) ec_bind_ipi_to_irq(i, cpu);
}
}
/*
* Can this IRQ be rebound to another CPU?
*/
int
ec_irq_rebindable(int irq)
{
return (0);
}
/*
* Should this IRQ be unbound from this CPU (which is being offlined) to
* another?
*/
int
{
return (ec_irq_rebindable(irq) &&
}
void
{
}
void
{
return;
}
void
{
/*
* See description of IRQT_DEV_EVTCHN above.
*/
/*
* We enforce that the representative event channel for IRQT_DEV_EVTCHN
* is zero, so PSM operations on it have no effect.
*/
}
void
{
unbind_evtchn(&ec);
}
/*
* for an explanation.
*/
int
ec_dev_alloc_irq(void)
{
int i;
for (i = 0; i < NR_IRQS; i++) {
break;
}
/*
* Force the evtchn to zero for the special evtchn device irq
*/
return (i);
}
void
ec_enable_irq(unsigned int irq)
{
return;
flag = intr_clear();
}
void
ec_disable_irq(unsigned int irq)
{
return;
/*
* Spin till we are the one to mask the evtchn
* Ensures no one else can be servicing this evtchn.
*/
SMT_PAUSE();
}
static int
{
}
int
ec_pending_irq(unsigned int irq)
{
return (ec_evtchn_pending(evtchn));
}
void
ec_clear_irq(int irq)
{
int evtchn;
return;
}
void
ec_unmask_irq(int irq)
{
flags = intr_clear();
case IRQT_PIRQ:
break;
case IRQT_DEV_EVTCHN:
break;
default:
break;
}
}
void
ec_try_unmask_irq(int irq)
{
int evtchn;
flags = intr_clear();
case IRQT_PIRQ:
break;
case IRQT_DEV_EVTCHN:
break;
default:
break;
}
}
/*
* Poll until an event channel is ready or 'check_func' returns true. This can
* only be used in a situation where interrupts are masked, otherwise we have a
* classic time-of-check vs. time-of-use race.
*/
void
{
if (DOMAIN_IS_INITDOMAIN(xen_info)) {
while (!check_func(arg))
(void) HYPERVISOR_yield();
return;
}
for (;;) {
if (check_func(arg))
return;
}
}
void
{
return;
}
void
ec_suspend(void)
{
int i;
int c;
for (i = 0; i < MAXIPL; i++) {
continue;
for (c = 0; c < NCPU; c++) {
continue;
if (CPU_IN_SET(cpu_suspend_lost_set, c))
continue;
}
}
for (i = 0; i < NR_VIRQS; i++) {
continue;
/*
* If we're sharing a single event channel across all CPUs, we
* should only unbind once.
*/
for (c = 1; c < NCPU; c++)
virq_info[i].mi_evtchns[c] = 0;
} else {
for (c = 0; c < NCPU; c++) {
continue;
if (*evtchnp != 0)
}
}
}
for (i = 0; i < NR_IRQS; i++) {
case IRQT_EVTCHN:
case IRQT_DEV_EVTCHN:
(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
break;
case IRQT_PIRQ:
(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
break;
default:
break;
}
}
}
/*
* The debug irq is special, we only have one evtchn and irq but we allow all
* cpus to service it. It's marked as shared and we propogate the event
* channel into all CPUs by hand.
*/
static void
{
int i;
for (i = 1; i < NCPU; i++)
}
static void
virq_resume(int virq)
{
int evtchn;
int i, err;
for (i = 0; i < NCPU; i++) {
continue;
CPUSET_ONLY(tcpus, i);
/*
* only timer VIRQ is bound to all cpus
*/
if (virq != VIRQ_TIMER)
break;
}
}
static void
ipi_resume(int ipl)
{
int i;
for (i = 0; i < NCPU; i++) {
int evtchn;
continue;
evtchn = xen_bind_ipi(i);
CPUSET_ONLY(tcpus, i);
}
}
void
ec_resume(void)
{
int i;
/* New event-channel space is not 'live' yet. */
for (i = 0; i < NR_EVENT_CHANNELS; i++)
(void) ec_mask_evtchn(i);
for (i = 0; i < MAXIPL; i++) {
continue;
ipi_resume(i);
}
for (i = 0; i < NR_VIRQS; i++) {
continue;
virq_resume(i);
}
}
void
ec_init(void)
{
int i;
for (i = 0; i < NR_EVENT_CHANNELS; i++) {
CPUSET_ZERO(evtchn_cpus[i]);
evtchn_to_irq[i] = INVALID_IRQ;
(void) ec_mask_evtchn(i);
}
for (i = 0; i < MAXIPL; i++)
for (i = 0; i < NR_VIRQS; i++)
/*
* Phys IRQ space is statically bound (1:1 mapping), grab the IRQs
* now.
*/
}
}
void
{
int irq;
ec_debug_irq = irq;
}
/*
* This is the entry point for processing events from xen
*
* (See the commentary associated with the shared_info_st structure
* in hypervisor-if.h)
*
* Since the event channel mechanism doesn't really implement the
* concept of priority like hardware interrupt controllers, we simulate
* that in software here using the cpu priority field and the pending
* interrupts field. Events/interrupts that are not able to be serviced
* now because they are at a lower priority than the current cpu priority
* cause a level bit to be recorded in the pending interrupts word. When
* the priority is lowered (either by spl or interrupt exit code) the pending
* levels are checked and an upcall is scheduled if there are events/interrupts
* that have become deliverable.
*/
void
{
vci->evtchn_upcall_pending = 0;
/*
* To expedite scanning of pending notifications, any 0->1
* pending transition on an unmasked channel causes a
* corresponding bit in evtchn_pending_sel to be set.
* Each bit in the selector covers a 32-bit word in
* the evtchn_pending[] array.
*/
membar_enter();
do {
pending_sels, 0) != pending_sels);
pending_ints = *cpu_ipp;
while ((i = ffs(pending_sels)) != 0) {
i--;
selbit = 1ul << i;
pending_sels &= ~selbit;
membar_enter();
pe &= ~(1ul << j);
port = (i << EVTCHN_SHIFT) + j;
/*
* If no irq set, just ignore the event.
* On e.g. netbsd they call evtchn_device_upcall(port)
* We require the evtchn driver to install a handler
* so there will be an irq associated with user mode
* evtchns.
*/
if (irq == INVALID_IRQ) {
continue;
}
/*
* If there's no handler, it could be a poke, so just
* accept the event and continue.
*/
#ifdef TRAPTRACE
if (IRQ_IS_CPUPOKE(irq)) {
}
#endif /* TRAPTRACE */
if (ec_mask_evtchn(port)) {
continue;
}
}
/*
* If we are the cpu that successfully masks
* the event, then record it as a pending event
* for this cpu to service
*/
if (ec_mask_evtchn(port)) {
if (ec_evtchn_pending(port)) {
} else {
/*
* another cpu serviced this event
* before us, clear the mask.
*/
}
}
}
}
*cpu_ipp = pending_ints;
if (pending_ints == 0)
return;
/*
* We have gathered all the pending events/interrupts,
* go service all the ones we can from highest priority to lowest.
* Note: This loop may not actually complete and service all
* pending interrupts since one of the interrupt threads may
* block and the pinned thread runs. In that case, when we
* exit the interrupt thread that blocked we will check for
* any unserviced interrupts and re-post an upcall to process
* any unserviced pending events.
*/
pe &= ~(1ul << j);
if (pe == 0) {
/*
* Must reload pending selector bits
* here as they could have changed on
* a previous trip around the inner loop
* while we were interrupt enabled
* in a interrupt service routine.
*/
pending_sels &= ~(1ul << i);
if (pending_sels == 0)
}
port = (i << EVTCHN_SHIFT) + j;
if (irq == INVALID_IRQ) {
/*
* No longer a handler for this event
* channel. Clear the event and
* ignore it, unmask the event.
*/
continue;
}
if (irq == ec_dev_irq) {
volatile int *tptr = &ec_dev_mbox;
ASSERT(ec_dev_mbox == 0);
/*
* NOTE: this gross store thru a pointer
* is necessary because of a Sun C
* compiler bug that does not properly
* honor a volatile declaration.
* we really should just be able to say
* ec_dev_mbox = port;
* here
*/
}
/*
* Set up the regs struct to
* look like a normal hardware int
* and do normal interrupt handling.
*/
/*
* Check for cpu priority change
* Can happen if int thread blocks
*/
return;
}
}
}
}
void
ec_unmask_evtchn(unsigned int ev)
{
ASSERT(!interrupts_enabled());
/*
* Check if we need to take slow path
*/
return;
}
/*
* The following is basically the equivalent of
* 'hw_resend_irq'. Just like a real IO-APIC we 'lose the
* interrupt edge' if the channel is masked.
* XXPV - slight race if upcall was about to be set, we may get
* an extra upcall.
*/
membar_enter();
}
}
}
/*
* Set a bit in an evtchan mask word, return true if we are the cpu that
* set the bit.
*/
int
ec_mask_evtchn(unsigned int ev)
{
int masked;
do {
if (masked) {
#ifdef DEBUG
#endif
}
return (masked);
}
void
ec_clear_evtchn(unsigned int ev)
{
}
void
ec_notify_via_evtchn(unsigned int port)
{
}
int
ec_block_irq(int irq)
{
int evtchn;
(void) ec_mask_evtchn(evtchn);
return (evtchn_owner[evtchn]);
}
/*
* Make a event that is pending for delivery on the current cpu "go away"
* without servicing the interrupt.
*/
void
ec_unpend_irq(int irq)
{
unsigned long pe, pending_sels;
struct xen_evt_data *cpe;
/*
* The evtchn must be masked
*/
flags = intr_clear();
if (pe == 0) {
if (pending_sels == 0)
}
}