fastboot.c revision 753a6d457b330b1b29b2d3eefcd0831116ce950d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file contains the functions for performing Fast Reboot -- a
* reboot which bypasses the firmware and bootloader, considerably
* reducing downtime.
*
* fastboot_load_kernel(): This function is invoked by mdpreboot() in the
* reboot path. It loads the new kernel and boot archive into memory, builds
* the data structure containing sufficient information about the new
* kernel and boot archive to be passed to the fast reboot switcher
* (see fb_swtch_src.s for details). When invoked the switcher relocates
* the new kernel and boot archive to physically contiguous low memory,
* similar to where the boot loader would have loaded them, and jumps to
* the new kernel.
*
* If fastreboot_onpanic is enabled, fastboot_load_kernel() is called
* by fastreboot_post_startup() to load the back up kernel in case of
* panic.
*
* The physical addresses of the memory allocated for the new kernel, boot
* archive and their page tables must be above where the boot archive ends
* after it has been relocated by the switcher, otherwise the new files
* and their page tables could be overridden during relocation.
*
* fast_reboot(): This function is invoked by mdboot() once it's determined
* that the system is capable of fast reboot. It jumps to the fast reboot
* switcher with the data structure built by fastboot_load_kernel() as the
* argument.
*/
#include <sys/types.h>
#include <sys/param.h>
#include <sys/segments.h>
#include <sys/sysmacros.h>
#include <sys/vm.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/kmem.h>
#include <sys/reboot.h>
#include <sys/uadmin.h>
#include <sys/cred.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/cmn_err.h>
#include <sys/dumphdr.h>
#include <sys/bootconf.h>
#include <sys/ddidmareq.h>
#include <sys/varargs.h>
#include <sys/promif.h>
#include <sys/modctl.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/page.h>
#include <vm/seg.h>
#include <vm/hat_i86.h>
#include <sys/vm_machparam.h>
#include <sys/archsystm.h>
#include <sys/machsystm.h>
#include <sys/mman.h>
#include <sys/x86_archext.h>
#include <sys/smp_impldefs.h>
#include <sys/spl.h>
#include <sys/fastboot.h>
#include <sys/machelf.h>
#include <sys/kobj.h>
#include <sys/multiboot.h>
#include <sys/kobj_lex.h>
/*
* Macro to determine how many pages are needed for PTEs to map a particular
* file. Allocate one extra page table entry for terminating the list.
*/
#define FASTBOOT_PTE_LIST_SIZE(fsize) \
P2ROUNDUP((((fsize) >> PAGESHIFT) + 1) * sizeof (x86pte_t), PAGESIZE)
/*
* Data structure containing necessary information for the fast reboot
* switcher to jump to the new kernel.
*/
fastboot_info_t newkernel = { 0 };
char fastboot_args[OBP_MAXPATHLEN];
static char fastboot_filename[2][OBP_MAXPATHLEN] = { { 0 }, { 0 }};
static x86pte_t ptp_bits = PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
static x86pte_t pte_bits =
PT_VALID | PT_REF | PT_MOD | PT_NOCONSIST | PT_WRITABLE;
static uint_t fastboot_shift_amt_pae[] = {12, 21, 30, 39};
int fastboot_debug = 0;
int fastboot_contig = 0;
/*
* Fake starting va for new kernel and boot archive.
*/
static uintptr_t fake_va = FASTBOOT_FAKE_VA;
/*
* Reserve memory below PA 1G in preparation of fast reboot.
*
* This variable is only checked when fastreboot_capable is set, but
* fastreboot_onpanic is not set. The amount of memory reserved
* is negligible, but just in case we are really short of low memory,
* this variable will give us a backdoor to not consume memory at all.
*/
int reserve_mem_enabled = 1;
/*
* Amount of memory below PA 1G to reserve for constructing the multiboot
* data structure and the page tables as we tend to run out of those
* when more drivers are loaded.
*/
static size_t fastboot_mbi_size = 0x2000; /* 8K */
static size_t fastboot_pagetable_size = 0x5000; /* 20K */
/*
* Use below 1G for page tables as
* 1. we are only doing 1:1 mapping of the bottom 1G of physical memory.
* 2. we are using 2G as the fake virtual address for the new kernel and
* boot archive.
*/
static ddi_dma_attr_t fastboot_below_1G_dma_attr = {
DMA_ATTR_V0,
0x0000000008000000ULL, /* dma_attr_addr_lo: 128MB */
0x000000003FFFFFFFULL, /* dma_attr_addr_hi: 1G */
0x00000000FFFFFFFFULL, /* dma_attr_count_max */
0x0000000000001000ULL, /* dma_attr_align: 4KB */
1, /* dma_attr_burstsize */
1, /* dma_attr_minxfer */
0x00000000FFFFFFFFULL, /* dma_attr_maxxfer */
0x00000000FFFFFFFFULL, /* dma_attr_seg */
1, /* dma_attr_sgllen */
0x1000ULL, /* dma_attr_granular */
0, /* dma_attr_flags */
};
static ddi_dma_attr_t fastboot_dma_attr = {
DMA_ATTR_V0,
0x0000000008000000ULL, /* dma_attr_addr_lo: 128MB */
#ifdef __amd64
0xFFFFFFFFFFFFFFFFULL, /* dma_attr_addr_hi: 2^64B */
#else
0x0000000FFFFFFFFFULL, /* dma_attr_addr_hi: 64GB */
#endif /* __amd64 */
0x00000000FFFFFFFFULL, /* dma_attr_count_max */
0x0000000000001000ULL, /* dma_attr_align: 4KB */
1, /* dma_attr_burstsize */
1, /* dma_attr_minxfer */
0x00000000FFFFFFFFULL, /* dma_attr_maxxfer */
0x00000000FFFFFFFFULL, /* dma_attr_seg */
1, /* dma_attr_sgllen */
0x1000ULL, /* dma_attr_granular */
0, /* dma_attr_flags */
};
/*
* Various information saved from the previous boot to reconstruct
* multiboot_info.
*/
extern multiboot_info_t saved_mbi;
extern mb_memory_map_t saved_mmap[FASTBOOT_SAVED_MMAP_COUNT];
extern struct sol_netinfo saved_drives[FASTBOOT_SAVED_DRIVES_COUNT];
extern char saved_cmdline[FASTBOOT_SAVED_CMDLINE_LEN];
extern int saved_cmdline_len;
extern size_t saved_file_size[];
extern void* contig_alloc(size_t size, ddi_dma_attr_t *attr,
uintptr_t align, int cansleep);
extern void contig_free(void *addr, size_t size);
/* PRINTLIKE */
extern void vprintf(const char *, va_list);
/*
* Need to be able to get boot_archives from other places
*/
#define BOOTARCHIVE64 "/platform/i86pc/amd64/boot_archive"
#define BOOTARCHIVE32 "/platform/i86pc/boot_archive"
#define BOOTARCHIVE32_FAILSAFE "/boot/x86.miniroot-safe"
#define BOOTARCHIVE64_FAILSAFE "/boot/amd64/x86.miniroot-safe"
#define FAILSAFE_BOOTFILE32 "/boot/platform/i86pc/kernel/unix"
#define FAILSAFE_BOOTFILE64 "/boot/platform/i86pc/kernel/amd64/unix"
static uint_t fastboot_vatoindex(fastboot_info_t *, uintptr_t, int);
static void fastboot_map_with_size(fastboot_info_t *, uintptr_t,
paddr_t, size_t, int);
static void fastboot_build_pagetables(fastboot_info_t *);
static int fastboot_build_mbi(char *, fastboot_info_t *);
static void fastboot_free_file(fastboot_file_t *);
static const char fastboot_enomem_msg[] = "Fastboot: Couldn't allocate 0x%"
PRIx64" bytes below %s to do fast reboot";
static void
dprintf(char *fmt, ...)
{
va_list adx;
if (!fastboot_debug)
return;
va_start(adx, fmt);
vprintf(fmt, adx);
va_end(adx);
}
/*
* Return the index corresponding to a virt address at a given page table level.
*/
static uint_t
fastboot_vatoindex(fastboot_info_t *nk, uintptr_t va, int level)
{
return ((va >> nk->fi_shift_amt[level]) & (nk->fi_ptes_per_table - 1));
}
/*
* Add mapping from vstart to pstart for the specified size.
* vstart, pstart and size should all have been aligned at 2M boundaries.
*/
static void
fastboot_map_with_size(fastboot_info_t *nk, uintptr_t vstart, paddr_t pstart,
size_t size, int level)
{
x86pte_t pteval, *table;
uintptr_t vaddr;
paddr_t paddr;
int index, l;
table = (x86pte_t *)(nk->fi_pagetable_va);
for (l = nk->fi_top_level; l >= level; l--) {
index = fastboot_vatoindex(nk, vstart, l);
if (l == level) {
/*
* Last level. Program the page table entries.
*/
for (vaddr = vstart, paddr = pstart;
vaddr < vstart + size;
vaddr += (1ULL << nk->fi_shift_amt[l]),
paddr += (1ULL << nk->fi_shift_amt[l])) {
uint_t index = fastboot_vatoindex(nk, vaddr, l);
if (l > 0)
pteval = paddr | pte_bits | PT_PAGESIZE;
else
pteval = paddr | pte_bits;
table[index] = pteval;
}
} else if (table[index] & PT_VALID) {
table = (x86pte_t *)
((uintptr_t)(((paddr_t)table[index] & MMU_PAGEMASK)
- nk->fi_pagetable_pa) + nk->fi_pagetable_va);
} else {
/*
* Intermediate levels.
* Program with either valid bit or PTP bits.
*/
if (l == nk->fi_top_level) {
#ifdef __amd64
ASSERT(nk->fi_top_level == 3);
table[index] = nk->fi_next_table_pa | ptp_bits;
#else
table[index] = nk->fi_next_table_pa | PT_VALID;
#endif /* __amd64 */
} else {
table[index] = nk->fi_next_table_pa | ptp_bits;
}
table = (x86pte_t *)(nk->fi_next_table_va);
nk->fi_next_table_va += MMU_PAGESIZE;
nk->fi_next_table_pa += MMU_PAGESIZE;
}
}
}
/*
* Build page tables for the lower 1G of physical memory using 2M
* pages, and prepare page tables for mapping new kernel and boot
* archive pages using 4K pages.
*/
static void
fastboot_build_pagetables(fastboot_info_t *nk)
{
/*
* Map lower 1G physical memory. Use large pages.
*/
fastboot_map_with_size(nk, 0, 0, ONE_GIG, 1);
/*
* Map one 4K page to get the middle page tables set up.
*/
fake_va = P2ALIGN_TYPED(fake_va, nk->fi_lpagesize, uintptr_t);
fastboot_map_with_size(nk, fake_va,
nk->fi_files[0].fb_pte_list_va[0] & MMU_PAGEMASK, PAGESIZE, 0);
}
/*
* Sanity check. Look for dboot offset.
*/
static int
fastboot_elf64_find_dboot_load_offset(void *img, off_t imgsz, uint32_t *offp)
{
Elf64_Ehdr *ehdr = (Elf64_Ehdr *)img;
Elf64_Phdr *phdr;
uint8_t *phdrbase;
int i;
if ((ehdr->e_phoff + ehdr->e_phnum * ehdr->e_phentsize) >= imgsz)
return (-1);
phdrbase = (uint8_t *)img + ehdr->e_phoff;
for (i = 0; i < ehdr->e_phnum; i++) {
phdr = (Elf64_Phdr *)(phdrbase + ehdr->e_phentsize * i);
if (phdr->p_type == PT_LOAD) {
if (phdr->p_vaddr == phdr->p_paddr &&
phdr->p_vaddr == DBOOT_ENTRY_ADDRESS) {
ASSERT(phdr->p_offset <= UINT32_MAX);
*offp = (uint32_t)phdr->p_offset;
return (0);
}
}
}
return (-1);
}
/*
* Initialize text and data section information for 32-bit kernel.
* sectcntp - is both input/output parameter.
* On entry, *sectcntp contains maximum allowable number of sections;
* on return, it contains the actual number of sections filled.
*/
static int
fastboot_elf32_find_loadables(void *img, off_t imgsz, fastboot_section_t *sectp,
int *sectcntp, uint32_t *offp)
{
Elf32_Ehdr *ehdr = (Elf32_Ehdr *)img;
Elf32_Phdr *phdr;
uint8_t *phdrbase;
int i;
int used_sections = 0;
const int max_sectcnt = *sectcntp;
if ((ehdr->e_phoff + ehdr->e_phnum * ehdr->e_phentsize) >= imgsz)
return (-1);
phdrbase = (uint8_t *)img + ehdr->e_phoff;
for (i = 0; i < ehdr->e_phnum; i++) {
phdr = (Elf32_Phdr *)(phdrbase + ehdr->e_phentsize * i);
if (phdr->p_type == PT_INTERP)
return (-1);
if (phdr->p_type != PT_LOAD)
continue;
if (phdr->p_vaddr == phdr->p_paddr &&
phdr->p_paddr == DBOOT_ENTRY_ADDRESS) {
*offp = (uint32_t)phdr->p_offset;
} else {
if (max_sectcnt <= used_sections)
return (-1);
sectp[used_sections].fb_sec_offset = phdr->p_offset;
sectp[used_sections].fb_sec_paddr = phdr->p_paddr;
sectp[used_sections].fb_sec_size = phdr->p_filesz;
sectp[used_sections].fb_sec_bss_size =
(phdr->p_filesz < phdr->p_memsz) ?
(phdr->p_memsz - phdr->p_filesz) : 0;
/* Extra sanity check for the input object file */
if (sectp[used_sections].fb_sec_paddr +
sectp[used_sections].fb_sec_size +
sectp[used_sections].fb_sec_bss_size >=
DBOOT_ENTRY_ADDRESS)
return (-1);
used_sections++;
}
}
*sectcntp = used_sections;
return (0);
}
/*
* Create multiboot info structure
*/
static int
fastboot_build_mbi(char *mdep, fastboot_info_t *nk)
{
mb_module_t *mbp;
uintptr_t next_addr;
uintptr_t new_mbi_pa;
size_t arglen;
char bootargs[OBP_MAXPATHLEN];
size_t size;
bzero(bootargs, OBP_MAXPATHLEN);
if (mdep != NULL && strlen(mdep) != 0) {
arglen = strlen(mdep) + 1;
} else {
arglen = saved_cmdline_len;
}
size = PAGESIZE + P2ROUNDUP(arglen, PAGESIZE);
if (nk->fi_mbi_size && nk->fi_mbi_size < size) {
contig_free((void *)nk->fi_new_mbi_va, nk->fi_mbi_size);
nk->fi_mbi_size = 0;
}
if (nk->fi_mbi_size == 0) {
if ((nk->fi_new_mbi_va =
(uintptr_t)contig_alloc(size, &fastboot_below_1G_dma_attr,
PAGESIZE, 0)) == NULL) {
cmn_err(CE_WARN, fastboot_enomem_msg,
(uint64_t)size, "1G");
return (-1);
}
/*
* fi_mbi_size must be set after the allocation succeeds
* as it's used to determine how much memory to free.
*/
nk->fi_mbi_size = size;
}
bzero((void *)nk->fi_new_mbi_va, nk->fi_mbi_size);
new_mbi_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
(caddr_t)nk->fi_new_mbi_va));
/*
* Map the address into both the current proc's address
* space and the kernel's address space in case the panic
* is forced by kmdb.
*/
AS_LOCK_ENTER(&kas, &kas.a_lock, RW_WRITER);
hat_devload(kas.a_hat, (caddr_t)new_mbi_pa, nk->fi_mbi_size,
mmu_btop(new_mbi_pa), PROT_READ | PROT_WRITE,
HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
AS_LOCK_EXIT(&kas, &kas.a_lock);
if (&kas != curproc->p_as) {
struct as *asp = curproc->p_as;
AS_LOCK_ENTER(asp, &asp->a_lock, RW_WRITER);
hat_devload(asp->a_hat, (caddr_t)new_mbi_pa, nk->fi_mbi_size,
mmu_btop(new_mbi_pa), PROT_READ | PROT_WRITE,
HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
AS_LOCK_EXIT(asp, &asp->a_lock);
}
nk->fi_new_mbi_pa = (paddr_t)new_mbi_pa;
bcopy(&saved_mbi, (void *)new_mbi_pa, sizeof (multiboot_info_t));
next_addr = new_mbi_pa + sizeof (multiboot_info_t);
((multiboot_info_t *)new_mbi_pa)->mods_addr = next_addr;
mbp = (mb_module_t *)(uintptr_t)next_addr;
mbp->mod_start = nk->fi_files[FASTBOOT_BOOTARCHIVE].fb_dest_pa;
mbp->mod_end = nk->fi_files[FASTBOOT_BOOTARCHIVE].fb_next_pa;
next_addr += sizeof (mb_module_t);
bcopy(fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE], (void *)next_addr,
strlen(fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE]));
mbp->mod_name = next_addr;
mbp->reserved = 0;
next_addr += strlen(fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE]);
*(char *)next_addr = '\0';
next_addr++;
next_addr = P2ROUNDUP_TYPED(next_addr, 16, uintptr_t);
((multiboot_info_t *)new_mbi_pa)->mmap_addr = next_addr;
bcopy((void *)(uintptr_t)saved_mmap, (void *)next_addr,
saved_mbi.mmap_length);
next_addr += saved_mbi.mmap_length;
((multiboot_info_t *)new_mbi_pa)->drives_addr = next_addr;
bcopy((void *)(uintptr_t)saved_drives, (void *)next_addr,
saved_mbi.drives_length);
next_addr += saved_mbi.drives_length;
((multiboot_info_t *)new_mbi_pa)->cmdline = next_addr;
if (mdep != NULL && strlen(mdep) != 0) {
bcopy(mdep, (void *)(uintptr_t)
(((multiboot_info_t *)new_mbi_pa)->cmdline), (arglen - 1));
} else {
bcopy((void *)saved_cmdline, (void *)next_addr, (arglen - 1));
}
/* Terminate the string */
((char *)(intptr_t)next_addr)[arglen - 1] = '\0';
return (0);
}
/*
* Initialize HAT related fields
*/
static void
fastboot_init_fields(fastboot_info_t *nk)
{
if (x86_feature & X86_PAE) {
nk->fi_has_pae = 1;
nk->fi_shift_amt = fastboot_shift_amt_pae;
nk->fi_ptes_per_table = 512;
nk->fi_lpagesize = (2 << 20); /* 2M */
#ifdef __amd64
nk->fi_top_level = 3;
#else
nk->fi_top_level = 2;
#endif /* __amd64 */
}
}
/*
* Process boot argument
*/
static void
fastboot_parse_mdep(char *mdep, char *kern_bootpath, int *bootpath_len,
char *bootargs)
{
int i;
/*
* If mdep is not NULL, it comes in the format of
* mountpoint unix args
*/
if (mdep != NULL && strlen(mdep) != 0) {
if (mdep[0] != '-') {
/* First get the root argument */
i = 0;
while (mdep[i] != '\0' && mdep[i] != ' ') {
i++;
}
if (i < 4 || strncmp(&mdep[i-4], "unix", 4) != 0) {
/* mount point */
bcopy(mdep, kern_bootpath, i);
kern_bootpath[i] = '\0';
*bootpath_len = i;
/*
* Get the next argument. It should be unix as
* we have validated in in halt.c.
*/
if (strlen(mdep) > i) {
mdep += (i + 1);
i = 0;
while (mdep[i] != '\0' &&
mdep[i] != ' ') {
i++;
}
}
}
bcopy(mdep, kern_bootfile, i);
kern_bootfile[i] = '\0';
bcopy(mdep, bootargs, strlen(mdep));
} else {
int off = strlen(kern_bootfile);
bcopy(kern_bootfile, bootargs, off);
bcopy(" ", &bootargs[off++], 1);
bcopy(mdep, &bootargs[off], strlen(mdep));
off += strlen(mdep);
bootargs[off] = '\0';
}
}
}
/*
* Reserve memory under PA 1G for mapping the new kernel and boot archive.
* This function is only called if fastreboot_onpanic is *not* set.
*/
static void
fastboot_reserve_mem(fastboot_info_t *nk)
{
int i;
/*
* A valid kernel is in place. No need to reserve any memory.
*/
if (nk->fi_valid)
return;
/*
* Reserve memory under PA 1G for PTE lists.
*/
for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
fastboot_file_t *fb = &nk->fi_files[i];
size_t fsize_roundup, size;
fsize_roundup = P2ROUNDUP_TYPED(saved_file_size[i],
PAGESIZE, size_t);
size = FASTBOOT_PTE_LIST_SIZE(fsize_roundup);
if ((fb->fb_pte_list_va = contig_alloc(size,
&fastboot_below_1G_dma_attr, PAGESIZE, 0)) == NULL) {
return;
}
fb->fb_pte_list_size = size;
}
/*
* Reserve memory under PA 1G for page tables.
*/
if ((nk->fi_pagetable_va =
(uintptr_t)contig_alloc(fastboot_pagetable_size,
&fastboot_below_1G_dma_attr, PAGESIZE, 0)) == NULL) {
return;
}
nk->fi_pagetable_size = fastboot_pagetable_size;
/*
* Reserve memory under PA 1G for multiboot structure.
*/
if ((nk->fi_new_mbi_va = (uintptr_t)contig_alloc(fastboot_mbi_size,
&fastboot_below_1G_dma_attr, PAGESIZE, 0)) == NULL) {
return;
}
nk->fi_mbi_size = fastboot_mbi_size;
}
/*
* Calculate MD5 digest for the given fastboot_file.
* Assumes that the file is allready loaded properly.
*/
static void
fastboot_cksum_file(fastboot_file_t *fb, uchar_t *md5_hash)
{
MD5_CTX md5_ctx;
MD5Init(&md5_ctx);
MD5Update(&md5_ctx, (void *)fb->fb_va, fb->fb_size);
MD5Final(md5_hash, &md5_ctx);
}
/*
* Free up the memory we have allocated for a file
*/
static void
fastboot_free_file(fastboot_file_t *fb)
{
size_t fsize_roundup;
fsize_roundup = P2ROUNDUP_TYPED(fb->fb_size, PAGESIZE, size_t);
if (fsize_roundup) {
contig_free((void *)fb->fb_va, fsize_roundup);
fb->fb_va = NULL;
fb->fb_size = 0;
}
}
/*
* Free up memory used by the PTEs for a file.
*/
static void
fastboot_free_file_pte(fastboot_file_t *fb, uint64_t endaddr)
{
if (fb->fb_pte_list_size && fb->fb_pte_list_pa < endaddr) {
contig_free((void *)fb->fb_pte_list_va, fb->fb_pte_list_size);
fb->fb_pte_list_va = 0;
fb->fb_pte_list_pa = 0;
fb->fb_pte_list_size = 0;
}
}
/*
* Free up all the memory used for representing a kernel with
* fastboot_info_t.
*/
static void
fastboot_free_mem(fastboot_info_t *nk, uint64_t endaddr)
{
int i;
for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
fastboot_free_file(nk->fi_files + i);
fastboot_free_file_pte(nk->fi_files + i, endaddr);
}
if (nk->fi_pagetable_size && nk->fi_pagetable_pa < endaddr) {
contig_free((void *)nk->fi_pagetable_va, nk->fi_pagetable_size);
nk->fi_pagetable_va = 0;
nk->fi_pagetable_pa = 0;
nk->fi_pagetable_size = 0;
}
if (nk->fi_mbi_size && nk->fi_new_mbi_pa < endaddr) {
contig_free((void *)nk->fi_new_mbi_va, nk->fi_mbi_size);
nk->fi_new_mbi_va = 0;
nk->fi_new_mbi_pa = 0;
nk->fi_mbi_size = 0;
}
}
/*
* Only free up the memory allocated for the kernel and boot archive,
* but not for the page tables.
*/
void
fastboot_free_newkernel(fastboot_info_t *nk)
{
int i;
nk->fi_valid = 0;
/*
* Free the memory we have allocated
*/
for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
fastboot_free_file(&(nk->fi_files[i]));
}
}
static void
fastboot_cksum_cdata(fastboot_info_t *nk, uchar_t *md5_hash)
{
int i;
MD5_CTX md5_ctx;
MD5Init(&md5_ctx);
for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
MD5Update(&md5_ctx, nk->fi_files[i].fb_pte_list_va,
nk->fi_files[i].fb_pte_list_size);
}
MD5Update(&md5_ctx, (void *)nk->fi_pagetable_va, nk->fi_pagetable_size);
MD5Update(&md5_ctx, (void *)nk->fi_new_mbi_va, nk->fi_mbi_size);
MD5Final(md5_hash, &md5_ctx);
}
/*
* Generate MD5 checksum of the given kernel.
*/
static void
fastboot_cksum_generate(fastboot_info_t *nk)
{
int i;
for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
fastboot_cksum_file(nk->fi_files + i, nk->fi_md5_hash[i]);
}
fastboot_cksum_cdata(nk, nk->fi_md5_hash[i]);
}
/*
* Calculate MD5 checksum of the given kernel and verify that
* it matches with what was calculated before.
*/
int
fastboot_cksum_verify(fastboot_info_t *nk)
{
int i;
uchar_t md5_hash[MD5_DIGEST_LENGTH];
for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
fastboot_cksum_file(nk->fi_files + i, md5_hash);
if (bcmp(nk->fi_md5_hash[i], md5_hash,
sizeof (nk->fi_md5_hash[i])) != 0)
return (i + 1);
}
fastboot_cksum_cdata(nk, md5_hash);
if (bcmp(nk->fi_md5_hash[i], md5_hash,
sizeof (nk->fi_md5_hash[i])) != 0)
return (i + 1);
return (0);
}
/*
* This function performs the following tasks:
* - Read the sizes of the new kernel and boot archive.
* - Allocate memory for the new kernel and boot archive.
* - Allocate memory for page tables necessary for mapping the memory
* allocated for the files.
* - Read the new kernel and boot archive into memory.
* - Map in the fast reboot switcher.
* - Load the fast reboot switcher to FASTBOOT_SWTCH_PA.
* - Build the new multiboot_info structure
* - Build page tables for the low 1G of physical memory.
* - Mark the data structure as valid if all steps have succeeded.
*/
void
fastboot_load_kernel(char *mdep)
{
void *buf = NULL;
int i;
fastboot_file_t *fb;
uint32_t dboot_start_offset;
char kern_bootpath[OBP_MAXPATHLEN];
extern uintptr_t postbootkernelbase;
uintptr_t saved_kernelbase;
int bootpath_len = 0;
int is_failsafe = 0;
int is_retry = 0;
uint64_t end_addr;
ASSERT(fastreboot_capable);
if (newkernel.fi_valid)
fastboot_free_newkernel(&newkernel);
saved_kernelbase = postbootkernelbase;
postbootkernelbase = 0;
/*
* Initialize various HAT related fields in the data structure
*/
fastboot_init_fields(&newkernel);
bzero(kern_bootpath, OBP_MAXPATHLEN);
/*
* Process the boot argument
*/
bzero(fastboot_args, OBP_MAXPATHLEN);
fastboot_parse_mdep(mdep, kern_bootpath, &bootpath_len, fastboot_args);
/*
* Make sure we get the null character
*/
bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_UNIX],
bootpath_len);
bcopy(kern_bootfile,
&fastboot_filename[FASTBOOT_NAME_UNIX][bootpath_len],
strlen(kern_bootfile) + 1);
bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE],
bootpath_len);
if (bcmp(kern_bootfile, FAILSAFE_BOOTFILE32,
(sizeof (FAILSAFE_BOOTFILE32) - 1)) == 0 ||
bcmp(kern_bootfile, FAILSAFE_BOOTFILE64,
(sizeof (FAILSAFE_BOOTFILE64) - 1)) == 0) {
is_failsafe = 1;
}
load_kernel_retry:
/*
* Read in unix and boot_archive
*/
end_addr = DBOOT_ENTRY_ADDRESS;
for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
struct _buf *file;
uintptr_t va;
uint64_t fsize;
size_t fsize_roundup, pt_size;
int page_index;
uintptr_t offset;
ddi_dma_attr_t dma_attr = fastboot_dma_attr;
dprintf("fastboot_filename[%d] = %s\n",
i, fastboot_filename[i]);
if ((file = kobj_open_file(fastboot_filename[i])) ==
(struct _buf *)-1) {
cmn_err(CE_WARN, "Fastboot: Couldn't open %s",
fastboot_filename[i]);
goto err_out;
}
if (kobj_get_filesize(file, &fsize) != 0) {
cmn_err(CE_WARN,
"Fastboot: Couldn't get filesize for %s",
fastboot_filename[i]);
goto err_out;
}
fsize_roundup = P2ROUNDUP_TYPED(fsize, PAGESIZE, size_t);
/*
* Where the files end in physical memory after being
* relocated by the fast boot switcher.
*/
end_addr += fsize_roundup;
if (end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_hi) {
cmn_err(CE_WARN, "Fastboot: boot archive is too big");
goto err_out;
}
/*
* Adjust dma_attr_addr_lo so that the new kernel and boot
* archive will not be overridden during relocation.
*/
if (end_addr > fastboot_dma_attr.dma_attr_addr_lo ||
end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_lo) {
if (is_retry) {
/*
* If we have already tried and didn't succeed,
* just give up.
*/
cmn_err(CE_WARN,
"Fastboot: boot archive is too big");
goto err_out;
} else {
/* Set the flag so we don't keep retrying */
is_retry++;
/* Adjust dma_attr_addr_lo */
fastboot_dma_attr.dma_attr_addr_lo = end_addr;
fastboot_below_1G_dma_attr.dma_attr_addr_lo =
end_addr;
/*
* Free the memory we have already allocated
* whose physical addresses might not fit
* the new lo and hi constraints.
*/
fastboot_free_mem(&newkernel, end_addr);
goto load_kernel_retry;
}
}
if (!fastboot_contig)
dma_attr.dma_attr_sgllen = (fsize / PAGESIZE) +
(((fsize % PAGESIZE) == 0) ? 0 : 1);
if ((buf = contig_alloc(fsize, &dma_attr, PAGESIZE, 0))
== NULL) {
cmn_err(CE_WARN, fastboot_enomem_msg, fsize, "64G");
goto err_out;
}
va = P2ROUNDUP_TYPED((uintptr_t)buf, PAGESIZE, uintptr_t);
if (kobj_read_file(file, (char *)va, fsize, 0) < 0) {
cmn_err(CE_WARN, "Fastboot: Couldn't read %s",
fastboot_filename[i]);
goto err_out;
}
fb = &newkernel.fi_files[i];
fb->fb_va = va;
fb->fb_size = fsize;
fb->fb_sectcnt = 0;
pt_size = FASTBOOT_PTE_LIST_SIZE(fsize_roundup);
/*
* If we have reserved memory but it not enough, free it.
*/
if (fb->fb_pte_list_size && fb->fb_pte_list_size < pt_size) {
contig_free((void *)fb->fb_pte_list_va,
fb->fb_pte_list_size);
fb->fb_pte_list_size = 0;
}
if (fb->fb_pte_list_size == 0) {
if ((fb->fb_pte_list_va =
(x86pte_t *)contig_alloc(pt_size,
&fastboot_below_1G_dma_attr, PAGESIZE, 0))
== NULL) {
cmn_err(CE_WARN, fastboot_enomem_msg,
(uint64_t)pt_size, "1G");
goto err_out;
}
/*
* fb_pte_list_size must be set after the allocation
* succeeds as it's used to determine how much memory to
* free.
*/
fb->fb_pte_list_size = pt_size;
}
bzero((void *)(fb->fb_pte_list_va), fb->fb_pte_list_size);
fb->fb_pte_list_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
(caddr_t)fb->fb_pte_list_va));
for (page_index = 0, offset = 0; offset < fb->fb_size;
offset += PAGESIZE) {
uint64_t paddr;
paddr = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
(caddr_t)fb->fb_va + offset));
ASSERT(paddr >= fastboot_dma_attr.dma_attr_addr_lo);
/*
* Include the pte_bits so we don't have to make
* it in assembly.
*/
fb->fb_pte_list_va[page_index++] = (x86pte_t)
(paddr | pte_bits);
}
fb->fb_pte_list_va[page_index] = FASTBOOT_TERMINATE;
if (i == FASTBOOT_UNIX) {
Ehdr *ehdr = (Ehdr *)va;
int j;
/*
* Sanity checks:
*/
for (j = 0; j < SELFMAG; j++) {
if (ehdr->e_ident[j] != ELFMAG[j]) {
cmn_err(CE_WARN, "Fastboot: Bad ELF "
"signature");
goto err_out;
}
}
if (ehdr->e_ident[EI_CLASS] == ELFCLASS32 &&
ehdr->e_ident[EI_DATA] == ELFDATA2LSB &&
ehdr->e_machine == EM_386) {
fb->fb_sectcnt = sizeof (fb->fb_sections) /
sizeof (fb->fb_sections[0]);
if (fastboot_elf32_find_loadables((void *)va,
fsize, &fb->fb_sections[0],
&fb->fb_sectcnt, &dboot_start_offset) < 0) {
cmn_err(CE_WARN, "Fastboot: ELF32 "
"program section failure");
goto err_out;
}
if (fb->fb_sectcnt == 0) {
cmn_err(CE_WARN, "Fastboot: No ELF32 "
"program sections found");
goto err_out;
}
if (is_failsafe) {
/* Failsafe boot_archive */
bcopy(BOOTARCHIVE32_FAILSAFE,
&fastboot_filename
[FASTBOOT_NAME_BOOTARCHIVE]
[bootpath_len],
sizeof (BOOTARCHIVE32_FAILSAFE));
} else {
bcopy(BOOTARCHIVE32,
&fastboot_filename
[FASTBOOT_NAME_BOOTARCHIVE]
[bootpath_len],
sizeof (BOOTARCHIVE32));
}
} else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64 &&
ehdr->e_ident[EI_DATA] == ELFDATA2LSB &&
ehdr->e_machine == EM_AMD64) {
if (fastboot_elf64_find_dboot_load_offset(
(void *)va, fsize, &dboot_start_offset)
!= 0) {
cmn_err(CE_WARN, "Fastboot: Couldn't "
"find ELF64 dboot entry offset");
goto err_out;
}
if ((x86_feature & X86_64) == 0 ||
(x86_feature & X86_PAE) == 0) {
cmn_err(CE_WARN, "Fastboot: Cannot "
"reboot to %s: "
"not a 64-bit capable system",
kern_bootfile);
goto err_out;
}
if (is_failsafe) {
/* Failsafe boot_archive */
bcopy(BOOTARCHIVE64_FAILSAFE,
&fastboot_filename
[FASTBOOT_NAME_BOOTARCHIVE]
[bootpath_len],
sizeof (BOOTARCHIVE64_FAILSAFE));
} else {
bcopy(BOOTARCHIVE64,
&fastboot_filename
[FASTBOOT_NAME_BOOTARCHIVE]
[bootpath_len],
sizeof (BOOTARCHIVE64));
}
} else {
cmn_err(CE_WARN, "Fastboot: Unknown ELF type");
goto err_out;
}
fb->fb_dest_pa = DBOOT_ENTRY_ADDRESS -
dboot_start_offset;
fb->fb_next_pa = DBOOT_ENTRY_ADDRESS + fsize_roundup;
} else {
fb->fb_dest_pa = newkernel.fi_files[i - 1].fb_next_pa;
fb->fb_next_pa = fb->fb_dest_pa + fsize_roundup;
}
kobj_close_file(file);
}
/*
* Add the function that will switch us to 32-bit protected mode
*/
fb = &newkernel.fi_files[FASTBOOT_SWTCH];
fb->fb_va = fb->fb_dest_pa = FASTBOOT_SWTCH_PA;
fb->fb_size = MMU_PAGESIZE;
hat_devload(kas.a_hat, (caddr_t)fb->fb_va,
MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa),
PROT_READ | PROT_WRITE | PROT_EXEC,
HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
/*
* Build the new multiboot_info structure
*/
if (fastboot_build_mbi(fastboot_args, &newkernel) != 0) {
goto err_out;
}
/*
* Build page table for low 1G physical memory. Use big pages.
* Allocate 4 (5 for amd64) pages for the page tables.
* 1 page for PML4 (amd64)
* 1 page for Page-Directory-Pointer Table
* 2 pages for Page Directory
* 1 page for Page Table.
* The page table entry will be rewritten to map the physical
* address as we do the copying.
*/
if (newkernel.fi_has_pae) {
#ifdef __amd64
size_t size = MMU_PAGESIZE * 5;
#else
size_t size = MMU_PAGESIZE * 4;
#endif /* __amd64 */
if (newkernel.fi_pagetable_size && newkernel.fi_pagetable_size
< size) {
contig_free((void *)newkernel.fi_pagetable_va,
newkernel.fi_pagetable_size);
newkernel.fi_pagetable_size = 0;
}
if (newkernel.fi_pagetable_size == 0) {
if ((newkernel.fi_pagetable_va = (uintptr_t)
contig_alloc(size, &fastboot_below_1G_dma_attr,
MMU_PAGESIZE, 0)) == NULL) {
cmn_err(CE_WARN, fastboot_enomem_msg,
(uint64_t)size, "1G");
goto err_out;
}
/*
* fi_pagetable_size must be set after the allocation
* succeeds as it's used to determine how much memory to
* free.
*/
newkernel.fi_pagetable_size = size;
}
bzero((void *)(newkernel.fi_pagetable_va), size);
newkernel.fi_pagetable_pa =
mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
(caddr_t)newkernel.fi_pagetable_va));
newkernel.fi_last_table_pa = newkernel.fi_pagetable_pa +
size - MMU_PAGESIZE;
newkernel.fi_next_table_va = newkernel.fi_pagetable_va +
MMU_PAGESIZE;
newkernel.fi_next_table_pa = newkernel.fi_pagetable_pa +
MMU_PAGESIZE;
fastboot_build_pagetables(&newkernel);
}
/* Generate MD5 checksums */
fastboot_cksum_generate(&newkernel);
/* Mark it as valid */
newkernel.fi_valid = 1;
newkernel.fi_magic = FASTBOOT_MAGIC;
postbootkernelbase = saved_kernelbase;
return;
err_out:
postbootkernelbase = saved_kernelbase;
newkernel.fi_valid = 0;
fastboot_free_newkernel(&newkernel);
}
/* ARGSUSED */
static int
fastboot_xc_func(fastboot_info_t *nk, xc_arg_t unused2, xc_arg_t unused3)
{
void (*fastboot_func)(fastboot_info_t *);
fastboot_file_t *fb = &nk->fi_files[FASTBOOT_SWTCH];
fastboot_func = (void (*)())(fb->fb_va);
kthread_t *t_intr = curthread->t_intr;
if (&kas != curproc->p_as) {
hat_devload(curproc->p_as->a_hat, (caddr_t)fb->fb_va,
MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa),
PROT_READ | PROT_WRITE | PROT_EXEC,
HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
}
/*
* If we have pinned a thread, make sure the address is mapped
* in the address space of the pinned thread.
*/
if (t_intr && t_intr->t_procp->p_as->a_hat != curproc->p_as->a_hat &&
t_intr->t_procp->p_as != &kas)
hat_devload(t_intr->t_procp->p_as->a_hat, (caddr_t)fb->fb_va,
MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa),
PROT_READ | PROT_WRITE | PROT_EXEC,
HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
(*psm_shutdownf)(A_SHUTDOWN, AD_FASTREBOOT);
(*fastboot_func)(nk);
/*NOTREACHED*/
return (0);
}
/*
* Jump to the fast reboot switcher. This function never returns.
*/
void
fast_reboot()
{
processorid_t bootcpuid = 0;
extern uintptr_t postbootkernelbase;
extern char fb_swtch_image[];
fastboot_file_t *fb;
int i;
postbootkernelbase = 0;
fb = &newkernel.fi_files[FASTBOOT_SWTCH];
/*
* Map the address into both the current proc's address
* space and the kernel's address space in case the panic
* is forced by kmdb.
*/
if (&kas != curproc->p_as) {
hat_devload(curproc->p_as->a_hat, (caddr_t)fb->fb_va,
MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa),
PROT_READ | PROT_WRITE | PROT_EXEC,
HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
}
bcopy((void *)fb_swtch_image, (void *)fb->fb_va, fb->fb_size);
/*
* Set fb_va to fake_va
*/
for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
newkernel.fi_files[i].fb_va = fake_va;
}
if (panicstr && CPU->cpu_id != bootcpuid &&
CPU_ACTIVE(cpu_get(bootcpuid))) {
cpuset_t cpuset;
CPUSET_ZERO(cpuset);
CPUSET_ADD(cpuset, bootcpuid);
xc_trycall((xc_arg_t)&newkernel, 0, 0, cpuset,
(xc_func_t)fastboot_xc_func);
/* Do what panic_idle() does */
splx(ipltospl(CLOCK_LEVEL));
(void) setjmp(&curthread->t_pcb);
for (;;)
;
} else
(void) fastboot_xc_func(&newkernel, 0, 0);
}
/*
* Get boot property value for fastreboot_onpanic.
*
* NOTE: If fastreboot_onpanic is set to non-zero in /etc/system,
* new setting passed in via "-B fastreboot_onpanic" is ignored.
* This order of precedence is to enable developers debugging panics
* that occur early in boot to utilize Fast Reboot on panic.
*/
static void
fastboot_get_bootprop(void)
{
int val = 0xaa, len, ret;
dev_info_t *devi;
char *propstr = NULL;
devi = ddi_root_node();
ret = ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
FASTREBOOT_ONPANIC, &propstr);
if (ret == DDI_PROP_SUCCESS) {
if (FASTREBOOT_ONPANIC_NOTSET(propstr))
val = 0;
else if (FASTREBOOT_ONPANIC_ISSET(propstr))
val = UA_FASTREBOOT_ONPANIC;
/*
* Only set fastreboot_onpanic to the value passed in
* if it's not already set to non-zero, and the value
* has indeed been passed in via command line.
*/
if (!fastreboot_onpanic && val != 0xaa)
fastreboot_onpanic = val;
ddi_prop_free(propstr);
} else if (ret != DDI_PROP_NOT_FOUND && ret != DDI_PROP_UNDEFINED) {
cmn_err(CE_WARN, "%s value is invalid, will be ignored",
FASTREBOOT_ONPANIC);
}
len = sizeof (fastreboot_onpanic_cmdline);
ret = ddi_getlongprop_buf(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
FASTREBOOT_ONPANIC_CMDLINE, fastreboot_onpanic_cmdline, &len);
if (ret == DDI_PROP_BUF_TOO_SMALL)
cmn_err(CE_WARN, "%s value is too long, will be ignored",
FASTREBOOT_ONPANIC_CMDLINE);
}
/*
* This function is called by main() to either load the backup kernel for panic
* fast reboot, or to reserve low physical memory for fast reboot.
*/
void
fastboot_post_startup()
{
if (!fastreboot_capable)
return;
fastboot_get_bootprop();
if (fastreboot_onpanic)
fastboot_load_kernel(fastreboot_onpanic_cmdline);
else if (reserve_mem_enabled)
fastboot_reserve_mem(&newkernel);
}
/*
* Update boot configuration settings.
* If the new fastreboot_onpanic setting is false, and a kernel has
* been preloaded, free the memory;
* if the new fastreboot_onpanic setting is true and newkernel is
* not valid, load the new kernel.
*/
void
fastboot_update_config(const char *mdep)
{
uint8_t boot_config = (uint8_t)*mdep;
int cur_fastreboot_onpanic = fastreboot_onpanic;
if (!fastreboot_capable)
return;
fastreboot_onpanic = boot_config & UA_FASTREBOOT_ONPANIC;
if (fastreboot_onpanic && (!cur_fastreboot_onpanic ||
!newkernel.fi_valid))
fastboot_load_kernel(fastreboot_onpanic_cmdline);
if (cur_fastreboot_onpanic && !fastreboot_onpanic)
fastboot_free_newkernel(&newkernel);
}