xpv_panic.c revision 9844da31e6f9a1bffcbbb9ec7926f759ee04c460
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/archsystm.h>
#include <sys/machsystm.h>
#include <sys/compress.h>
#include <sys/x86_archext.h>
#include <sys/xpv_panic.h>
#include <sys/boot_console.h>
#include <sys/bootsvcs.h>
/* XXX: need to add a PAE version too, if we ever support both PAE and non */
#if defined(__i386)
#define XPV_FILENAME "/boot/xen-syms"
#else
#define XPV_FILENAME "/boot/amd64/xen-syms"
#endif
#define XPV_MODNAME "xpv"
int xpv_panicking = 0;
struct module *xpv_module;
struct modctl *xpv_modctl;
/* Pointer to the xpv_panic_info structure handed to us by Xen. */
/* Timer support */
#define NSEC_SHIFT 5
#define T_XPV_TIMER 0xd1
static uint_t nsec_scale;
/* IDT support */
/* Xen pagetables mapped into our HAT's ptable windows */
/* Number of MMU_PAGESIZE pages we're adding to the Solaris dump */
static int xpv_dump_pages;
/*
* There are up to two large swathes of RAM that we don't want to include
* in the dump: those that comprise the Xen version of segkpm. On 32-bit
* systems there is no such region of memory. On 64-bit systems, there
* should be just a single contiguous region that corresponds to all of
* physical memory. The tricky bit is that Xen's heap sometimes lives in
* the middle of their segkpm, and is mapped using only kpm-like addresses.
* In that case, we need to skip the swathes before and after Xen's heap.
*/
/*
* Some commonly used values that we don't want to recompute over and over.
*/
static int xpv_panic_nptes[MAX_NUM_LEVEL];
static ulong_t xpv_panic_cr3;
static void xpv_panic_console_print(const char *fmt, ...);
static void (*xpv_panic_printf)(const char *, ...) = xpv_panic_console_print;
#define CONSOLE_BUF_SIZE 256
static char console_buffer[CONSOLE_BUF_SIZE];
static boolean_t use_polledio;
/*
* Pointers to machine check panic info (if any).
*/
static void
xpv_panic_putc(int m)
{
struct cons_polledio *c = cons_polledio;
/* This really shouldn't happen */
if (console == CONS_HYPERVISOR)
return;
if (use_polledio == B_TRUE)
c->cons_polledio_putchar(c->cons_polledio_argument, m);
else
bcons_putchar(m);
}
static void
xpv_panic_puts(char *msg)
{
char *m;
for (m = msg; *m; m++)
xpv_panic_putc((int)*m);
}
static void
xpv_panic_console_print(const char *fmt, ...)
{
}
static void
{
/*
* The provided pfn represents a level 'level' page table. Map it
* into the 'level' slot in the list of page table windows.
*/
else
}
/*
* Walk the page tables to find the pfn mapped by the given va.
*/
static pfn_t
{
int l, idx;
static pfn_t toplevel_pfn;
/*
* If we do anything other than a simple scan through memory, don't
* trust the mapped page tables.
*/
ptable_pfn[l] = PFN_INVALID;
/* Find the lowest table with any entry for va */
pfn = toplevel_pfn;
if (ptable_pfn[l] != pfn) {
xpv_panic_map(l, pfn);
ptable_pfn[l] = pfn;
}
/*
* Search this pagetable for any mapping to an
* address >= va.
*/
ptable_window = PWIN_VA(l);
break;
idx++;
}
/*
* If there are no valid mappings in this table, we
* can skip to the end of the VA range it covers.
*/
if (idx == xpv_panic_nptes[l]) {
break;
}
/*
* See if we've hit the end of the range.
*/
break;
/*
* If this mapping is for a pagetable, we drop down
* to the next level in the hierarchy and look for
* a mapping in it.
*/
if (!PTE_ISPAGE(pte, l))
continue;
/*
* The APIC page is magic. Nothing to see here;
* move along.
*/
(va & MMU_PAGEMASK)) {
va += MMU_PAGESIZE;
break;
}
/*
* See if the address is within one of the two
* kpm-like regions we want to skip.
*/
break;
}
break;
}
/*
* The Xen panic code only handles small pages. If
* this mapping is for a large page, we need to
* identify the consituent page that covers the
* specific VA we were looking for.
*/
if (l > 0) {
if (l > 1)
panic("Xen panic can't cope with "
"giant pages.");
(xpv_panic_nptes[0] - 1);
}
return (pfn | PFN_IS_FOREIGN_MFN);
}
}
return (PFN_INVALID);
}
/*
* Walk through the Xen VA space, finding pages that are mapped in.
*
* These pages all have MFNs rather than PFNs, meaning they may be outside
* the physical address space the kernel knows about, or they may collide
* with PFNs the kernel is using.
*
* The obvious trick of just adding the PFN_IS_FOREIGN_MFN bit to the MFNs
* to avoid collisions doesn't work. The pages need to be written to disk
* in PFN-order or savecore gets confused. We can't allocate memory to
* contruct a sorted pfn->VA reverse mapping, so we have to write the pages
* to disk in VA order.
*
* To square this circle, we simply make up PFNs for each of Xen's pages.
* We assign each mapped page a fake PFN in ascending order. These fake
* PFNs each have the FOREIGN bit set, ensuring that they fall outside the
* range of Solaris PFNs written by the kernel.
*/
int
{
xpv_dump_pages = 0;
va = xen_virt_start;
va += MMU_PAGESIZE;
}
/*
* Add the shared_info page. This page actually ends up in the
* dump twice: once for the Xen va and once for the Solaris va.
* This isn't ideal, but we don't know the address Xen is using for
* the page, so we can't share it.
*/
return (xpv_dump_pages);
}
void
{
int cnt;
}
}
int
dump_xpv_data(void *dump_cbuf)
{
int cnt = 0;
/*
* XXX: we should probably run this data through a UE check. The
* catch is that the UE code relies on on_trap() and getpfnum()
* working.
*/
va = xen_virt_start;
if (dump_ioerr) {
return (cnt);
}
cnt++;
va += MMU_PAGESIZE;
}
/*
* Finally, dump the shared_info page
*/
PAGESIZE);
if (dump_ioerr)
cnt++;
return (cnt);
}
static void *
{
char *sym;
return (fpreg);
}
do {
if ((xpv_only != 0) &&
break;
else
/*
* Xen marks an exception frame by inverting the frame
* pointer.
*/
}
return ((void *)fp);
}
void *
xpv_traceback(void *fpreg)
{
}
#if defined(__amd64)
static void
{
}
#endif
void
{
struct panic_trap_info ti;
panic("Fatal pagefault at 0x%lx. fault addr=0x%p rp=0x%p",
} else {
}
}
/*
* Build IDT to handle a Xen panic
*/
static void
{
int i;
for (i = 0; i < 32; i++)
0);
0);
TRP_XPL, 0);
0);
0);
0);
0);
0);
0);
/*
* We have no double fault handler. Any single fault represents a
* catastrophic failure for us, so there is no attempt to handle
* them cleanly: we just print a message and reboot. If we
* encounter a second fault while doing that, there is nothing
* else we can do.
*/
/*
* Be prepared to absorb any stray device interrupts received
* while writing the core to disk.
*/
for (i = 33; i < NIDT; i++)
TRP_XPL, 0);
/* The one interrupt we expect to get is from the APIC timer. */
TRP_XPL, 0);
#if defined(__amd64)
/* Catch any hypercalls. */
#endif
}
static void
{
uint_t apic_ticks = 0;
/*
* Measure how many APIC ticks there are within a fixed time
* period. We're going to be fairly coarse here. This timer is
* just being used to detect a stalled panic, so as long as we have
* the right order of magnitude, everything should be fine.
*/
xpv_apicadr[APIC_DIVIDE_REG] = 0;
/*
* apic_ticks now represents roughly how many apic ticks comprise
* one timeout interval. Program the timer to send us an interrupt
* every time that interval expires.
*/
xpv_apicadr[APIC_EOI_REG] = 0;
}
void
xpv_timer_tick(void)
{
static int ticks = 0;
ticks = 0;
if (dump_timeleft && (--dump_timeleft == 0))
panic("Xen panic timeout\n");
}
xpv_apicadr[APIC_EOI_REG] = 0;
}
void
xpv_interrupt(void)
{
#ifdef DEBUG
static int cnt = 0;
if (cnt++ < 10)
xpv_panic_printf("Unexpected interrupt received.\n");
#endif
xpv_apicadr[APIC_EOI_REG] = 0;
}
/*
* Managing time in panic context is trivial. We only have a single CPU,
* we never get rescheduled, we never get suspended. We just need to
* convert clock ticks into nanoseconds.
*/
static hrtime_t
xpv_panic_gethrtime(void)
{
unsigned int *l = (unsigned int *)&(tsc);
tsc = __rdtsc_insn();
return (hrt);
}
static void
{
}
static void
{
}
void
xpv_do_panic(void *arg)
{
int l;
#if defined(__amd64)
extern uintptr_t postbootkernelbase;
#endif
if (xpv_panicking++ > 0)
panic("multiple calls to xpv_do_panic()");
/*
* Indicate to the underlying panic framework that a panic has been
* initiated. This is ordinarily done as part of vpanic(). Since
* we already have all the register state saved by the hypervisor,
* we skip that and jump straight into the panic processing code.
*
* XXX If another thread grabs and wins the panic_quiesce trigger
* then we'll have two threads in panicsys believing they are in
* charge of the panic attempt!
*/
(void) panic_trigger(&panic_quiesce);
#if defined(__amd64)
/*
* bzero() and bcopy() get unhappy when asked to operate on
* addresses outside of the kernel. At this point Xen is really a
* part of the kernel, so we update the routines' notion of where
* the kernel starts.
*/
#endif
#if defined(HYPERVISOR_VIRT_END)
#else
#endif
/*
* If we were redirecting console output to the hypervisor, we have
* to stop.
*/
if (console == CONS_HYPERVISOR) {
} else if (cons_polledio != NULL &&
use_polledio = 1;
}
/* Make sure we handle all console output from here on. */
/*
* If we find an unsupported panic_info structure, there's not much
* we can do other than complain, plow on, and hope for the best.
*/
xpv_panic_printf("Warning: Xen is using an unsupported "
"version of the panic_info structure.\n");
#if defined(__amd64)
} else {
}
#endif
/*
* Make sure we are running on the Solaris %gs. The Xen panic code
* should already have set up the GDT properly.
*/
#if defined(__amd64)
#endif
/*
* Switch to our own IDT, avoiding any accidental returns to Xen
* world.
*/
/*
* Initialize the APIC timer, which is used to detect a hung dump
* attempt.
*/
/*
* Set up a few values that we'll need repeatedly.
*/
#ifdef __i386
#endif
/* Add the fake Xen module to the module list */
if (xpv_module != NULL) {
extern int last_module_id;
}
xpv_panic_printf("Failed to reboot following panic.\n");
for (;;)
;
}
/*
* Set up the necessary data structures to pretend that the Xen hypervisor
* is a loadable module, allowing mdb to find the Xen symbols in a crash
* dump. Since these symbols all map to VA space Solaris doesn't normally
* have access to, we don't link these structures into the kernel's lists
*
* The observant reader will note a striking amount of overlap between this
* code and that found in krtld. While it would be handy if we could just
* ask krtld to do this work for us, it's not that simple. Among the
* complications: we're not actually loading the text here (grub did it at
* boot), the .text section is writable, there are no relocations to do,
* to deal with this weird module is as complicated, and more risky, than
* reimplementing the necessary subset of it here.
*/
static void
{
int i, shn;
#if defined(__amd64)
#else
#endif
/* Allocate and init the module structure */
/* Allocate and init the modctl structure */
/*
* Try to open a Xen image that hasn't had its symbol and CTF
* information stripped off.
*/
goto err;
}
/*
* Read the header and ensure that this is an ELF file for the
* proper ISA. If it's not, somebody has done something very
* stupid. Why bother? See Mencken.
*/
goto err;
for (i = 0; i < SELFMAG; i++)
goto err;
goto err;
/* Read in the section headers */
goto err;
/* Read the section names */
goto err;
/*
* Fill in the text and data size fields.
*/
text_align = data_align = 0;
/* Sanity check the offset of the section name */
continue;
/* If we find the symtab section, remember it for later. */
continue;
}
/* If we find the CTF section, remember it for later. */
continue;
}
continue;
/*
* Xen marks its text section as writable, so we need to
* look for the name - not just the flag.
*/
} else {
}
}
/*
* If we have symbol table and string table sections, read them in
* now. If we don't, we just plow on. We'll still get a valid
* core dump, but finding anything useful will be just a bit
* harder.
*
* Note: we don't bother with a hash table. We'll never do a
* symbol lookup unless we crash, and then mdb creates its own. We
* also don't try to perform any relocations. Xen should be loaded
* exactly where the ELF file indicates, and the symbol information
* in the file should be complete and correct already. Static
* linking ain't all bad.
*/
/* Allocate space for the symbol table and strings. */
goto err;
}
/*
* Read in the CTF section
*/
goto err;
}
xpv_module = mp;
xpv_modctl = mcp;
return;
err:
}
void
{
int i;
ptable_pfn[i] = PFN_INVALID;
(void) HYPERVISOR_platform_op(&op);
}