xpv_psm.c revision e8ed0869d5c65afe0c37c4755bf81f7381d1f43c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#define PSMI_1_6
#include <sys/machlock.h>
#include <sys/smp_impldefs.h>
#include <sys/psm_common.h>
#include <sys/archsystm.h>
#include <sys/mach_intr.h>
#include <sys/hypervisor.h>
#include <sys/evtchn_impl.h>
/*
* Global Data
*/
int xen_psm_verbose = 0;
/* As of now we don't support x2apic in xVM */
int apic_error = 0;
int apic_verbose = 0;
int apic_forceload = 0;
3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15
};
/* use to make sure only one cpu handles the nmi */
static lock_t xen_psm_nmi_lock;
int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */
int xen_psm_panic_on_nmi = 0;
int xen_psm_num_nmis = 0;
int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */
/*
* XXPV we flag MSI as not supported, since the hypervisor currently doesn't
* support MSI at all. Change this initialization to zero when MSI is
* supported.
*/
int xen_support_msi = -1;
static int xen_clock_irq = INVALID_IRQ;
/* flag definitions for xen_psm_verbose */
#define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001
#define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002
#define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004
#define XEN_PSM_VERBOSE_IRQ(fmt) \
if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \
#define XEN_PSM_VERBOSE_POWEROFF(fmt) \
/*
* Dummy apic array to point common routines at that want to do some apic
* manipulation. Xen doesn't allow guest apic access so we point at these
* memory locations to fake out those who want to do apic fiddling.
*/
static struct psm_info xen_psm_info;
static void xen_psm_setspl(int);
static int apic_alloc_vectors(dev_info_t *, int, int, int, int, int);
/*
* Local support routines
*/
/*
* Select vcpu to bind xen virtual device interrupt to.
*/
/*ARGSUSED*/
int
xen_psm_bind_intr(int irq)
{
return (IRQ_UNBOUND);
if (irq <= APIC_MAX_VECTOR)
else
bind_cpu = 0;
goto done;
}
do {
if (xen_psm_next_bind_cpu >= xen_psm_ncpus)
} else {
bind_cpu = 0;
}
done:
return (bind_cpu);
}
/*
* Autoconfiguration Routines
*/
static int
xen_psm_probe(void)
{
int ret = PSM_SUCCESS;
if (DOMAIN_IS_INITDOMAIN(xen_info))
return (ret);
}
static void
xen_psm_softinit(void)
{
/* LINTED logical expression always true: op "||" */
if (DOMAIN_IS_INITDOMAIN(xen_info)) {
}
}
/*ARGSUSED*/
static int
xen_psm_clkinit(int hertz)
{
extern int dosynctodr;
/*
* domU cannot set the TOD hardware, fault the TOD clock now to
* indicate that and turn off attempts to sync TOD hardware
* with the hires timer.
*/
if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
(void) tod_fault(TOD_RDONLY, 0);
dosynctodr = 0;
}
/*
* The hypervisor provides a timer based on the local APIC timer.
* The interface supports requests of nanosecond resolution.
* A common frequency of the apic clock is 100 Mhz which
* gives a resolution of 10 nsec per tick. What we would really like
* is a way to get the ns per tick value from xen.
* XXPV - This is an assumption that needs checking and may change
*/
return (XEN_NSEC_PER_TICK);
}
static void
xen_psm_hrtimeinit(void)
{
extern int gethrtime_hires;
gethrtime_hires = 1;
}
/* xen_psm NMI handler */
/*ARGSUSED*/
static void
{
if (!lock_try(&xen_psm_nmi_lock))
return;
if (xen_psm_kmdb_on_nmi && psm_debugger()) {
debug_enter("NMI received: entering kmdb\n");
} else if (xen_psm_panic_on_nmi) {
/* Keep panic from entering kmdb. */
nopanicdebug = 1;
panic("NMI received\n");
} else {
/*
* prom_printf is the best shot we have of something which is
*/
prom_printf("NMI received\n");
}
}
static void
{
if (DOMAIN_IS_INITDOMAIN(xen_info)) {
/* set a flag so we know we have run xen_psm_picinit() */
apic_picinit_called = 1;
/* XXPV - do we need to do this? */
picsetup(); /* initialise the 8259 */
/* enable apic mode if imcr present */
/* XXPV - do we need to do this either? */
if (apic_imcrp) {
}
/*
* We never called xen_psm_addspl() when the SCI
* interrupt was added because that happened before the
* PSM module was loaded. Fix that up here by doing
* any missed operations (e.g. bind to CPU)
*/
if ((irqno = apic_sci_vect) > 0) {
} else {
}
}
}
/* add nmi handler - least priority nmi handler */
}
/*
* generates an interprocessor interrupt to another CPU
*/
static void
{
}
/*ARGSUSED*/
static int
{
/*
* We are called at splhi() so we can't call anything that might end
* up trying to context switch.
*/
/*
*/
} else {
/*
*/
} else {
}
}
return (ret);
}
/*
* Acquire ownership of this irq on this cpu
*/
void
xen_psm_acquire_irq(int irq)
{
int cpuid;
/*
* If the irq is currently being serviced by another cpu
* we busy-wait for the other cpu to finish. Take any
* pending interrupts before retrying.
*/
do {
flags = intr_clear();
}
/*ARGSUSED*/
static int
{
int err = PSM_SUCCESS;
/*
*/
}
/*
* If still in use reset priority
*/
return (err);
}
} else {
}
return (err);
}
static processorid_t
{
if (id == -1)
return (0);
case 0: /* yeah, that one's there */
return (id);
default:
case X_EINVAL: /* out of range */
return (-1);
case X_ENOENT: /* not present in the domain */
/*
* It's not clear that we -need- to keep looking
* at this point, if, e.g., we can guarantee
* the hypervisor always keeps a contiguous range
* of vcpus around this is equivalent to "out of range".
*
* But it would be sad to miss a vcpu we're
* supposed to be using ..
*/
break;
}
}
return (-1);
}
/*
* XXPV - undo the start cpu op change; return to ignoring this value
* - also tweak error handling in main startup loop
*/
/*ARGSUSED*/
static int
{
int ret;
else
return (ret);
}
/*
* Allocate an irq for inter cpu signaling
*/
/*ARGSUSED*/
static int
{
return (ec_bind_ipi_to_irq(ipl, 0));
}
/*ARGSUSED*/
static int
xen_psm_get_clockirq(int ipl)
{
if (xen_clock_irq != INVALID_IRQ)
return (xen_clock_irq);
return (xen_clock_irq);
}
/*ARGSUSED*/
static void
{
switch (cmd) {
case A_SHUTDOWN:
switch (fcn) {
case AD_BOOT:
case AD_IBOOT:
(void) HYPERVISOR_shutdown(SHUTDOWN_reboot);
break;
case AD_POWEROFF:
/* fall through if domU or if poweroff fails */
if (DOMAIN_IS_INITDOMAIN(xen_info))
if (apic_enable_acpi)
(void) acpi_poweroff();
/* FALLTHRU */
case AD_HALT:
default:
(void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
break;
}
break;
case A_REBOOT:
(void) HYPERVISOR_shutdown(SHUTDOWN_reboot);
break;
default:
return;
}
}
static int
{
" dip = NULL\n", irqno));
return (irqno);
}
return (irqno);
}
/*
* xen_psm_intr_enter() acks the event that triggered the interrupt and
* returns the new priority level,
*/
/*ARGSUSED*/
static int
{
int newipl;
if (newipl == 0) {
/*
* (newipl == 0) means we have no service routines for this
* vector. We will treat this as a spurious interrupt.
* We have cleared the pending bit already, clear the event
* mask and return a spurious interrupt. This case can happen
* when an interrupt delivery is racing with the removal of
* of the service routine for that interrupt.
*/
/*
* (newipl <= cpu->cpu_pri) means that we must be trying to
* service a vector that was shared with a higher priority
* isr. The higher priority handler has been removed and
* we need to service this int. We can't return a lower
* priority than current cpu priority. Just synthesize a
* priority to return that should be acceptable.
*/
}
return (newipl);
}
/*
* xen_psm_intr_exit() restores the old interrupt
* priority level after processing an interrupt.
* It is called with interrupts disabled, and does not enable interrupts.
*/
/* ARGSUSED */
static void
{
}
psm_intr_exit_fn(void)
{
return (xen_psm_intr_exit);
}
/*
* Check if new ipl level allows delivery of previously unserviced events
*/
static void
xen_psm_setspl(int ipl)
{
/*
* If new ipl level will enable any pending interrupts, setup so the
* upcoming sti will cause us to get an upcall.
*/
if (pending) {
int i;
ulong_t pending_sels = 0;
}
}
/*
* This function provides external interface to the nexus for all
* functionality related to the new DDI interrupt framework.
*
* Input:
* dip - pointer to the dev_info structure of the requested device
* hdlp - pointer to the internal interrupt handle structure for the
* requested interrupt
* intr_op - opcode for this call
* result - pointer to the integer that will hold the result to be
* passed back if return value is PSM_SUCCESS
*
* Output:
* return value is either PSM_SUCCESS or PSM_FAILURE
*/
int
{
int cap;
int err;
int new_priority;
switch (intr_op) {
case PSM_INTR_OP_CHECK_MSI:
if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
break;
}
/*
* Check MSI/X is supported or not at APIC level and
* masked off the MSI/X bits in hdlp->ih_type if not
* supported before return. If MSI/X is supported,
* leave the ih_type unchanged and return.
*
* hdlp->ih_type passed in from the nexus has all the
* interrupt types supported by the device.
*/
if (xen_support_msi == 0) {
/*
* if xen_support_msi is not set, call
* apic_check_msi_support() to check whether msi
* is supported first
*/
if (apic_check_msi_support() == PSM_SUCCESS)
xen_support_msi = 1;
else
xen_support_msi = -1;
}
if (xen_support_msi == 1)
else
break;
break;
case PSM_INTR_OP_FREE_VECTORS:
break;
/*
* XXPV - maybe we should make this be:
* min(APIC_VECTOR_PER_IPL, count of all avail vectors);
*/
if (DOMAIN_IS_INITDOMAIN(xen_info))
else
*result = 1;
break;
case PSM_INTR_OP_XLATE_VECTOR:
} else {
}
break;
case PSM_INTR_OP_GET_PENDING:
/* XXPV - is this enough for dom0 or do we need to ref ioapic */
break;
case PSM_INTR_OP_CLEAR_MASK:
/* XXPV - is this enough for dom0 or do we need to set ioapic */
return (PSM_FAILURE);
break;
case PSM_INTR_OP_SET_MASK:
/* XXPV - is this enough for dom0 or do we need to set ioapic */
return (PSM_FAILURE);
break;
case PSM_INTR_OP_GET_CAP:
break;
case PSM_INTR_OP_GET_SHARED:
if (DOMAIN_IS_INITDOMAIN(xen_info)) {
return (PSM_FAILURE);
== NULL)
return (PSM_FAILURE);
} else {
return (PSM_FAILURE);
}
break;
case PSM_INTR_OP_SET_PRI:
new_priority = *(int *)result;
if (err != 0)
return (PSM_FAILURE);
break;
case PSM_INTR_OP_GET_INTR:
if (!DOMAIN_IS_INITDOMAIN(xen_info))
return (PSM_FAILURE);
/*
* The interrupt handle given here has been allocated
* specifically for this command, and ih_private carries
* a pointer to a apic_get_intr_t.
*/
return (PSM_FAILURE);
break;
case PSM_INTR_OP_SET_CAP:
/* FALLTHRU */
default:
return (PSM_FAILURE);
}
return (PSM_SUCCESS);
}
static void
xen_psm_rebind_irq(int irq)
{
if (newcpu == IRQ_UNBOUND) {
} else {
}
if (irq <= APIC_MAX_VECTOR) {
}
}
/*
* Disable all device interrupts for the given cpu.
* High priority interrupts are not disabled and will still be serviced.
*/
static int
{
int irq;
/*
* Can't offline VCPU 0 on this hypervisor. There's no reason
* anyone would want to given that the CPUs are virtual. Also note
*/
if (cpun == 0)
return (PSM_FAILURE);
continue;
}
return (PSM_SUCCESS);
}
static void
{
int irq;
if (cpun == 0)
return;
/*
* Rebalance device interrupts among online processors
*/
if (!ec_irq_rebindable(irq))
continue;
}
if (DOMAIN_IS_INITDOMAIN(xen_info)) {
}
}
static int
{
cpun = psm_get_cpu_id();
if (DOMAIN_IS_INITDOMAIN(xen_info)) {
/*
* Non-virtualized environments can call psm_post_cpu_start
* xen_psm_post_cpu_start() is only called from boot.
*/
}
return (PSM_SUCCESS);
}
/*
* This function will reprogram the timer.
*
* When in oneshot mode the argument is the absolute time in future at which to
* generate the interrupt.
*
* When in periodic mode, the argument is the interval at which the
* interrupts should be generated. There is no need to support the periodic
* mode timer change at this time.
*
* Note that we must be careful to convert from hrtime to Xen system time (see
* xpv_timestamp.c).
*/
static void
{
flags = intr_clear();
/*
* We should be called from high PIL context (CBE_HIGH_PIL),
* so kpreempt is disabled.
*/
now = xpv_gethrtime();
xen_time = xpv_getsystime();
/*
* requested to generate an interrupt in the past
* generate an interrupt as soon as possible
*/
} else
if (HYPERVISOR_set_timer_op(timer_new) != 0)
panic("can't set hypervisor timer?");
}
/*
* This function will enable timer interrupts.
*/
static void
xen_psm_timer_enable(void)
{
}
/*
* This function will disable timer interrupts on the current cpu.
*/
static void
xen_psm_timer_disable(void)
{
(void) ec_block_irq(xen_clock_irq);
/*
* If the clock irq is pending on this cpu then we need to
* clear the pending interrupt.
*/
}
/*
*
* The following functions are in the platform specific file so that they
* can be different functions depending on whether we are running on
* bare metal or a hypervisor.
*/
/*
* Allocate a free vector for irq at ipl.
*/
/* ARGSUSED */
{
panic("Hypervisor alloc vector failed");
/*
* No need to worry about vector colliding with our reserved vectors
* e.g. T_FASTTRAP, xen can differentiate between hardware and software
* generated traps and handle them properly.
*/
return (vector);
}
/* Mark vector as not being used by any irq */
void
{
}
/*
*/
static int
int behavior)
{
int rcount, i;
int irqno;
/* only supports MSI at the moment, will add MSI-X support later */
if (type != DDI_INTR_TYPE_MSI)
return (0);
"inum=0x%x pri=0x%x count=0x%x behavior=%d\n",
if (count > 1) {
if (behavior == DDI_INTR_ALLOC_STRICT &&
return (0);
if (apic_multi_msi_enable == 0)
count = 1;
else if (count > apic_multi_msi_max)
}
/*
* XXPV - metal version takes all vectors avail at given pri.
* Why do that? For now just allocate count vectors.
*/
/*
* XXPV - currently the hypervisor does not support MSI at all.
* It doesn't return consecutive vectors. This code is a first
* cut for the (future) time that MSI is supported.
*/
for (i = 0; i < rcount; i++) {
INVALID_IRQ) {
"apic_allocate_irq failed\n"));
return (i);
}
#ifdef DEBUG
"apic_vector_to_irq is not APIC_RESV_IRQ\n"));
#endif
irqptr->airq_share_id = 0;
if (i == 0) /* they all bound to the same cpu */
0xff, 0xff);
else
"dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno,
}
return (rcount);
}
/*
* The hypervisor doesn't permit access to local apics directly
*/
/* ARGSUSED */
uint32_t *
{
/*
* Return a pointer to a memory area to fake out the
* probe code that wants to read apic registers.
* The dummy values will end up being ignored by xen
* later on when they are used anyway.
*/
return (xen_psm_dummy_apic);
}
/* ARGSUSED */
uint32_t *
{
/*
* Return non-null here to fake out configure code that calls this.
* The i86xpv platform will not reference through the returned value..
*/
return ((uint32_t *)0x1);
}
/* ARGSUSED */
void
{
}
/* ARGSUSED */
void
{
}
{
}
void
{
}
/*
* This function was added as part of x2APIC support in pcplusmp.
*/
void
{
}
/*
* This function was added as part of x2APIC support in pcplusmp to resolve
* undefined symbol in xpv_psm.
*/
void
{
}
/*
* This function was added as part of x2APIC support in pcplusmp to resolve
* undefined symbol in xpv_psm.
*/
void
apic_ret()
{
}
/*
* Call rebind to do the actual programming.
*/
int
{
/*
* Set cpu based on xen idea of online cpu's not apic tables.
* target cpu field when programming ioapic anyway.
*/
} else {
}
if (deferred) {
drep = (struct ioapic_reprogram_data *)p;
} else {
irqptr = (apic_irq_t *)p;
}
if (rv) {
/* CPU is not up or interrupt is disabled. Fall back to 0 */
cpu = 0;
}
/*
* If rebind successful bind the irq to an event channel
*/
if (rv == 0) {
}
return (rv);
}
/*
* Allocate a new vector for the given irq
*/
/* ARGSUSED */
{
return (apic_allocate_vector(0, irq, 0));
}
/*
* The rest of the file is just generic psm module boilerplate
*/
static struct psm_ops xen_psm_ops = {
xen_psm_probe, /* psm_probe */
xen_psm_softinit, /* psm_init */
xen_psm_picinit, /* psm_picinit */
xen_psm_intr_enter, /* psm_intr_enter */
xen_psm_intr_exit, /* psm_intr_exit */
xen_psm_setspl, /* psm_setspl */
xen_psm_addspl, /* psm_addspl */
xen_psm_delspl, /* psm_delspl */
xen_psm_disable_intr, /* psm_disable_intr */
xen_psm_enable_intr, /* psm_enable_intr */
(int (*)(int))NULL, /* psm_softlvl_to_irq */
(void (*)(int))NULL, /* psm_set_softintr */
xen_psm_clkinit, /* psm_clkinit */
xen_psm_get_clockirq, /* psm_get_clockirq */
xen_psm_hrtimeinit, /* psm_hrtimeinit */
xpv_gethrtime, /* psm_gethrtime */
xen_psm_get_next_processorid, /* psm_get_next_processorid */
xen_psm_cpu_start, /* psm_cpu_start */
xen_psm_post_cpu_start, /* psm_post_cpu_start */
xen_psm_shutdown, /* psm_shutdown */
xen_psm_get_ipivect, /* psm_get_ipivect */
xen_psm_send_ipi, /* psm_send_ipi */
xen_psm_translate_irq, /* psm_translate_irq */
(void (*)(int, char *))NULL, /* psm_notify_error */
xen_psm_timer_reprogram, /* psm_timer_reprogram */
xen_psm_timer_enable, /* psm_timer_enable */
xen_psm_timer_disable, /* psm_timer_disable */
(void (*)(int, int))NULL, /* psm_preshutdown */
xen_intr_ops, /* Advanced DDI Interrupt framework */
};
static struct psm_info xen_psm_info = {
PSM_INFO_VER01_5, /* version */
PSM_OWN_EXCLUSIVE, /* ownership */
&xen_psm_ops, /* operation */
"xVM_psm", /* machine name */
"platform module" /* machine descriptions */
};
static void *xen_psm_hdlp;
int
_init(void)
{
}
int
_fini(void)
{
}
int
{
}