xen_machdep.c revision 349b53dd4e695e3d833b5380540385145b2d3ae8
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* derived from netbsd's xen_machdep.c 1.1.2.1 */
/*
*
* Copyright (c) 2004 Christian Limpach.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* 3. This section intentionally left blank.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Section 3 of the above license was updated in response to bug 6379571.
*/
#include <sys/xpv_user.h>
/* XXX 3.3. TODO remove this include */
#include <sys/segments.h>
#include <sys/hypervisor.h>
#include <sys/machsystm.h>
#include <sys/bootconf.h>
#include <sys/bootinfo.h>
#include <sys/evtchn_impl.h>
#include <sys/archsystm.h>
#include <sys/mach_mmu.h>
#include <sys/processor.h>
#include <sys/xen_errno.h>
#include <sys/xpv_panic.h>
#include <sys/smp_impldefs.h>
#include <sys/balloon_impl.h>
#ifdef DEBUG
#else
#define SUSPEND_DEBUG(...)
#endif
int cpr_debug;
static int xen_suspend_debug;
int xen_physinfo_debug = 0;
/*
* Determine helpful version information.
*
* (And leave copies in the data segment so we can look at them later
* with e.g. kmdb.)
*/
typedef enum xen_version {
struct xenver {
} xenver[2];
/*
* Update the xenver data. We maintain two copies, boot and
* current. If we are setting the boot, then also set current.
*/
static void
{
/*
* The revision is buried in the extraversion information that is
* maintained by the hypervisor. For our purposes we expect that
* the revision number is:
* - the second character in the extraversion information
* - one character long
* - numeric digit
* If it isn't then we can't extract the revision and we leave it
* set to 0.
*/
else
"version: v%s, unexpected version format",
"-xvm", 4) == 0)
/*
* Capabilities are a set of space separated ascii strings
* e.g. 'xen-3.1-x86_32p' or 'hvm-3.2-x86_64'
*/
if (idx == XENVER_BOOT_IDX)
sizeof (xenver[XENVER_BOOT_IDX]));
}
typedef enum xen_hypervisor_check {
/*
* we need 3.0.4 or better and if it is 3.0.4. then it must be provided
* by the Solaris xVM project.
* Checking can be disabled for testing purposes by setting the
* xen_suspend_debug variable.
*/
static int
{
if (xen_suspend_debug == 1)
return (1);
return (0);
return (1);
if (XENVER_CURRENT(xv_minor) > 0)
return (1);
return (0);
return (0);
return (1);
}
/*
* If the hypervisor is -xvm, or 3.1.2 or higher, we don't need the
* workaround.
*/
static void
xen_pte_workaround(void)
{
#if defined(__amd64)
extern int pt_kern;
return;
return;
return;
if (XENVER_CURRENT(xv_is_xvm))
return;
#endif
}
void
{
struct callback_register cb;
#if defined(__amd64)
#endif
/*
* XXPV always ignore return value for NMI
*/
type != CALLBACKTYPE_nmi)
panic("HYPERVISOR_callback_op failed");
}
void
xen_init_callbacks(void)
{
/*
* register event (interrupt) handler.
*/
/*
* failsafe handler.
*/
/*
* NMI handler.
*/
/*
* system call handler
* XXPV move to init_cpu_syscall?
*/
#if defined(__amd64)
#endif /* __amd64 */
}
/*
* cmn_err() followed by a 1/4 second delay; this gives the
* logging service a chance to flush messages and helps avoid
* intermixing output from prom_printf().
* XXPV: doesn't exactly help us on UP though.
*/
/*PRINTFLIKE2*/
void
{
}
void
xen_suspend_devices(void)
{
int rc;
SUSPEND_DEBUG("xen_suspend_devices\n");
}
void
xen_resume_devices(void)
{
int rc;
SUSPEND_DEBUG("xen_resume_devices\n");
}
/*
* The list of mfn pages is out of date. Recompute it.
*/
static void
rebuild_mfn_list(void)
{
int i = 0;
SUSPEND_DEBUG("rebuild_mfn_list\n");
if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
(caddr_t)&mfn_list_pages[j]);
}
}
= pfn_to_mfn(pfn);
}
static void
suspend_cpus(void)
{
int i;
SUSPEND_DEBUG("suspend_cpus\n");
for (i = 1; i < ncpus; i++) {
if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
SUSPEND_DEBUG("xen_vcpu_down %d\n", i);
(void) xen_vcpu_down(i);
}
mach_cpucontext_reset(cpu[i]);
}
}
static void
resume_cpus(void)
{
int i;
for (i = 1; i < ncpus; i++) {
continue;
if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
SUSPEND_DEBUG("xen_vcpu_up %d\n", i);
(void) xen_vcpu_up(i);
}
}
}
/*
*/
void
xen_suspend_domain(void)
{
extern void rtcsync(void);
extern hrtime_t hres_last_tick;
int i;
/*
* Check that we are happy to suspend on this hypervisor.
*/
if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
"version: v%lu.%lu%s, need at least version v3.0.4 or "
return;
}
/*
* XXPV - Are we definitely OK to suspend by the time we've connected
* the handler?
*/
SUSPEND_DEBUG("xen_suspend_domain\n");
/*
* suspend interrupts and devices
* cpr) and for migration. Would be nice to know the difference if
* may want to do more of the things that cpr does. (i.e. notify user
* processes, shrink memory footprint for faster restore, etc.)
*/
SUSPEND_DEBUG("xenbus_suspend\n");
/*
* XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
* wrt xenbus being suspended here?
*/
/*
* Suspend must be done on vcpu 0, as no context for other CPUs is
* saved.
*
* XXPV - add to taskq API ?
*/
SUSPEND_DEBUG("xen_start_migrate\n");
if (ncpus > 1)
suspend_cpus();
/*
* We can grab the ec_lock as it's a spinlock with a high SPL. Hence
* any holder would have dropped it to get through suspend_cpus().
*/
/*
* From here on in, we can't take locks.
*/
SUSPEND_DEBUG("ec_suspend\n");
ec_suspend();
SUSPEND_DEBUG("gnttab_suspend\n");
flags = intr_clear();
/*
* Currently, the hypervisor incorrectly fails to bring back
* powered-down VCPUs. Thus we need to record any powered-down VCPUs
* to prevent any attempts to operate on them. But we have to do this
* *after* the very first time we do ec_suspend().
*/
for (i = 1; i < ncpus; i++) {
continue;
}
/*
* these into PFNs, but expects them to be, so we do it here.
* We don't use mfn_to_pfn() because so many OS services have
* been disabled at this point.
*/
prom_printf("xen_suspend_domain(): "
"CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
}
0, UVMF_INVLPG)) {
prom_printf("xen_suspend_domain(): "
"HYPERVISOR_update_va_mapping() failed\n");
(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
}
SUSPEND_DEBUG("HYPERVISOR_suspend\n");
/*
* At this point we suspend and sometime later resume.
*/
if (HYPERVISOR_suspend(start_info_mfn)) {
prom_printf("xen_suspend_domain(): "
"HYPERVISOR_suspend() failed\n");
(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
}
/*
* Point HYPERVISOR_shared_info to its new value.
*/
(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
prom_printf("xen_suspend_domain(): number of pages"
" changed, was 0x%lx, now 0x%lx\n", mfn_count,
(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
}
cached_max_mfn = 0;
SUSPEND_DEBUG("gnttab_resume\n");
/* XXPV: add a note that this must be lockless. */
SUSPEND_DEBUG("ec_resume\n");
ec_resume();
if (ncpus > 1)
resume_cpus();
/*
* Now we can take locks again.
*/
/*
* Force the tick value used for tv_nsec in hres_tick() to be up to
* date. rtcsync() will reset the hrestime value appropriately.
*/
/*
* XXPV: we need to have resumed the CPUs since this takes locks, but
* can remote CPUs see bad state? Presumably yes. Should probably nest
* taking of todlock inside of cpu_lock, or vice versa, then provide an
* unlocked version. Probably need to call clkinitf to reset cpu freq
* and re-calibrate if we migrated to a different speed cpu. Also need
* to make a (re)init_cpu_info call to update processor info structs
* and device tree info. That remains to be written at the moment.
*/
rtcsync();
SUSPEND_DEBUG("xenbus_resume\n");
SUSPEND_DEBUG("xenbus_resume_devices\n");
SUSPEND_DEBUG("finished xen_suspend_domain\n");
/*
* We have restarted our suspended domain, update the hypervisor
* details. NB: This must be done at the end of this function,
* since we need the domain to be completely resumed before
* these functions will work correctly.
*/
/*
* We can check and report a warning, but we don't stop the
* process.
*/
if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
"but need at least version v3.0.4",
}
/*ARGSUSED*/
int
xen_debug_handler(void *arg)
{
debug_enter("External debug event received");
/*
* If we've not got KMDB loaded, output some stuff difficult to capture
* from a domain core.
*/
int i;
prom_printf("evtchn_pending [ ");
for (i = 0; i < 8; i++)
prom_printf("]\nevtchn_mask [ ");
for (i = 0; i < 8; i++)
prom_printf("]\n");
for (i = 0; i < ncpus; i++) {
continue;
prom_printf("CPU%d pending %d mask %d sel %lx\n",
}
}
return (0);
}
/*ARGSUSED*/
static void
unsigned int len)
{
char key = '\0';
int ret;
if (xenbus_transaction_start(&xbt)) {
return;
}
/*
* ENOENT happens in response to our own xenbus_rm.
* XXPV - this happens spuriously on boot?
*/
goto out;
}
goto out;
}
goto retry;
/*
* Somewhat arbitrary - on Linux this means 'reboot'. We could just
* accept any key, but this might increase the risk of sending a
* harmless sysrq to the wrong domain...
*/
if (key == 'b')
(void) xen_debug_handler(NULL);
else
return;
out:
}
#define SHUTDOWN_INVALID -1
#define SHUTDOWN_POWEROFF 0
#define SHUTDOWN_REBOOT 1
#define SHUTDOWN_SUSPEND 2
#define SHUTDOWN_HALT 3
#define SHUTDOWN_MAX 4
static const char *cmd_strings[SHUTDOWN_MAX] = {
"poweroff",
"reboot",
"suspend",
"halt"
};
static void
xen_dirty_shutdown(void *arg)
{
"timed out.\nShutting down.\n");
switch (cmd) {
case SHUTDOWN_HALT:
case SHUTDOWN_POWEROFF:
break;
case SHUTDOWN_REBOOT:
break;
}
}
static void
xen_shutdown(void *arg)
{
if (cmd == SHUTDOWN_SUSPEND) {
return;
}
switch (cmd) {
case SHUTDOWN_POWEROFF:
break;
case SHUTDOWN_HALT:
break;
case SHUTDOWN_REBOOT:
break;
}
/*
* If we're still booting and init(1) isn't set up yet, simply halt.
*/
extern void halt(char *);
}
/*
* else, graceful shutdown with inittab and all getting involved
*/
}
/*ARGSUSED*/
static void
unsigned int len)
{
char *str;
unsigned int slen;
if (err)
return;
return;
}
/*
* If this is a watch fired from our write below, check out early to
* avoid an infinite loop.
*/
(void) xenbus_transaction_end(xbt, 0);
return;
} else {
}
/*
* XXPV Should we check the value of xenbus_write() too, or are all
* errors automatically folded into xenbus_transaction_end() ??
*/
goto again;
}
if (shutdown_code != SHUTDOWN_INVALID) {
(void *)(intptr_t)shutdown_code, 0);
}
}
static struct xenbus_watch shutdown_watch;
static struct xenbus_watch sysrq_watch;
void
xen_late_startup(void)
{
if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
if (register_xenbus_watch(&sysrq_watch))
}
}
#ifdef DEBUG
#define XEN_PRINTF_BUFSIZE 1024
/*
* Printf function that calls hypervisor directly. For DomU it only
* works when running on a xen hypervisor built with debug on. Works
* always since no I/O ring interaction is needed.
*/
/*PRINTFLIKE1*/
void
xen_printf(const char *fmt, ...)
{
(void) HYPERVISOR_console_io(CONSOLEIO_write,
}
#else
void
xen_printf(const char *fmt, ...)
{
}
#endif /* DEBUG */
void
startup_xen_version(void)
{
if (xen_hypervisor_supports_solaris(XEN_RUN_CHECK) == 0)
"but need at least version v3.0.4",
}
int xen_mca_simulate_mc_physinfo_failure = 0;
void
startup_xen_mca(void)
{
if (!DOMAIN_IS_INITDOMAIN(xen_info))
return;
xen_phys_ncpus = 0;
"%sxen_get_mc_physinfo failure during xen MCA startup: "
"there will be no machine check support",
return;
}
sizeof (xen_mc_logical_cpu_t), KM_NOSLEEP);
if (xen_phys_cpus == NULL) {
"xen_get_mc_physinfo failure: can't allocate CPU array");
return;
}
"physical CPU info");
xen_phys_ncpus * sizeof (xen_mc_logical_cpu_t));
xen_phys_ncpus = 0;
}
if (xen_physinfo_debug) {
unsigned i;
for (i = 0; i < xen_phys_ncpus; i++) {
xcp = &xen_phys_cpus[i];
}
}
}
/*
* Miscellaneous hypercall wrappers with slightly more verbose diagnostics.
*/
void
{
int err;
/*
* X_EINVAL: reserved entry or bad frames
* X_EFAULT: bad address
*/
panic("xen_set_gdt(%p, %d): error %d",
}
}
void
{
long err;
panic("xen_set_ldt(%p, %d): error %d",
}
}
void
{
long err;
/*
* X_EPERM: bad selector
*/
-(int)err);
}
}
long
{
long err;
/*
* X_EFAULT: bad address
* X_EPERM: bad selector
*/
-(int)err);
}
return (err);
}
#if defined(__amd64)
void
{
long err;
/*
* X_EFAULT: bad address
* X_EINVAL: bad type
*/
panic("xen_set_segment_base(%d, %lx): error %d",
}
}
#endif /* __amd64 */
/*
* Translate a hypervisor errcode to a Solaris error code.
*/
int
xen_xlate_errcode(int error)
{
switch (-error) {
/*
* Translate hypervisor errno's into native errno's
*/
default:
}
return (error);
}
/*
* Raise PS_IOPL on current vcpu to user level.
* Caller responsible for preventing kernel preemption.
*/
void
xen_enable_user_iopl(void)
{
}
/*
* Drop PS_IOPL on current vcpu to kernel level
*/
void
xen_disable_user_iopl(void)
{
}
int
{
int err;
#if defined(__amd64)
if (prot & PROT_WRITE)
pt_bits |= PT_WRITABLE;
#endif
MMU_PAGESIZE, prot)) != 0)
goto done;
#if defined(__amd64)
#endif
done:
if (err) {
err);
}
return (err);
}
int
{
int err;
#if defined(__amd64)
if (prot & PROT_WRITE)
pt_bits |= PT_WRITABLE;
#endif /* __amd64 */
goto done;
#if defined(__amd64)
while (npgs--) {
pt_bits)) != 0)
break;
}
#endif /* __amd64 */
done:
if (err) {
(void *)lva,
}
return (err);
}
int
{
struct xen_mc_physcpuinfo cpi;
/*LINTED: constant in conditional context*/
return (-1);
return (0);
}
void
print_panic(const char *str)
{
}
/*
* Interfaces to iterate over real cpu information, but only that info
* which we choose to expose here. These are of interest to dom0
* only (and the backing hypercall should not work for domu).
*/
{
if (!DOMAIN_IS_INITDOMAIN(xen_info))
return (NULL);
return ((xen_mc_lcpu_cookie_t)xen_phys_cpus);
return (NULL);
else
return ((xen_mc_lcpu_cookie_t)++xcp);
}
#define COOKIE2XCP(c) ((xen_mc_logical_cpu_t *)(c))
const char *
{
return ((const char *)&xcp->mc_vendorid[0]);
}
int
{
}
int
{
}
int
{
}
{
}
{
}
{
}
{
}
{
}
{
/*
* Need to #define the indices, or search through the array.
*/
}
int
{
long rc;
uint_t i;
#if !defined(_BOOT)
for (i = 0; i < count; ++i) {
}
}
#endif
return (rc);
}
static int
{
int ret;
/*LINTED: constant in conditional context*/
if (ret != 0)
return (xen_xlate_errcode(ret));
return (0);
}
/*
* On dom0, we can determine the number of physical cpus on the machine.
* This number is important when figuring out what workarounds are
* appropriate, so compute it now.
*/
xpv_nr_phys_cpus(void)
{
if (nphyscpus == 0) {
int ret;
}
return (nphyscpus);
}
xpv_nr_phys_pages(void)
{
int ret;
}
xpv_cpu_khz(void)
{
int ret;
}