dumpsubr.c revision 9dd77bc84fd62eb844d67cc7311833ea3ea6c889
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/compress.h>
#include <sys/systeminfo.h>
#include <vm/seg_kmem.h>
#include <sys/clock_impl.h>
/*
* Crash dump time is dominated by disk write time. To reduce this,
* the stronger compression method bzip2 is applied to reduce the dump
* size and hence reduce I/O time. However, bzip2 is much more
* computationally expensive than the existing lzjb algorithm, so to
* avoid increasing compression time, CPUs that are otherwise idle
* during panic are employed to parallelize the compression task.
* Many helper CPUs are needed to prevent bzip2 from being a
* bottleneck, and on systems with too few CPUs, the lzjb algorithm is
* parallelized instead. Lastly, I/O and compression are performed by
* different CPUs, and are hence overlapped in time, unlike the older
* serial code.
*
* Another important consideration is the speed of the dump
* device. Faster disks need less CPUs in order to benefit from
* parallel lzjb versus parallel bzip2. Therefore, the CPU count
* threshold for switching from parallel lzjb to paralled bzip2 is
* elevated for faster disks. The dump device speed is adduced from
* the setting for dumpbuf.iosize, see dump_update_clevel.
*/
/*
* exported vars
*/
char *dumppath; /* pathname of dump device */
int dump_timeleft; /* portion of dump_timeout remaining */
int dump_ioerr; /* dump i/o error */
int dump_check_used; /* enable check for used pages */
/*
* Tunables for dump compression and parallelism. These can be set via
*
* dump_ncpu_low number of helpers for parallel lzjb
* This is also the minimum configuration.
*
* dump_bzip2_level bzip2 compression level: 1-9
* Higher numbers give greater compression, but take more memory
* and time. Memory used per helper is ~(dump_bzip2_level * 1MB).
*
* dump_plat_mincpu the cross-over limit for using bzip2 (per platform):
* if dump_plat_mincpu == 0, then always do single threaded dump
* if ncpu >= dump_plat_mincpu then try to use bzip2
*
* dump_metrics_on if set, metrics are collected in the kernel, passed
* to savecore via the dump file, and recorded by savecore in
* METRICS.txt.
*/
/* tunables for pre-reserved heap */
/* Define multiple buffers per helper to avoid stalling */
#define NCBUF_PER_HELPER 2
#define NCMAP_PER_HELPER 4
/* minimum number of helpers configured */
#define MINHELPERS (dump_ncpu_low)
/*
* Define constant parameters.
*
* CBUF_SIZE size of an output buffer
*
* CBUF_MAPSIZE size of virtual range for mapping pages
*
* CBUF_MAPNP size of virtual range in pages
*
*/
#define CBUF_MAPSHIFT (22)
/*
* Compression metrics are accumulated nano-second subtotals. The
* results are normalized by the number of pages dumped. A report is
* generated when dumpsys() completes and is saved in the dump image
* after the trailing dump header.
*
* Metrics are always collected. Set the variable dump_metrics_on to
* cause metrics to be saved in the crash file, where savecore will
* save it in the file METRICS.txt.
*/
#define PERPAGES \
typedef struct perpage {
} perpage_t;
/*
* This macro controls the code generation for collecting dump
* performance information. By default, the code is generated, but
* automatic saving of the information is disabled. If dump_metrics_on
* is set to 1, the timing information is passed to savecore via the
* crash file, where it is appended to the file dump-dir/METRICS.txt.
*/
#define COLLECT_METRICS
#ifdef COLLECT_METRICS
#define HRNORM(v, m, n) v.m /= (n)
#else
#define HRSTART(v, m)
#define HRSTOP(v, m)
#define HRBEGIN(v, m, s)
#define HREND(v, m)
#define HRNORM(v, m, n)
#endif /* COLLECT_METRICS */
/*
* Buffers for copying and compressing memory pages.
*
* cbuf_t buffer controllers: used for both input and output.
*
* The buffer state indicates how it is being used:
*
* CBUF_FREEMAP: CBUF_MAPSIZE virtual address range is available for
* mapping input pages.
*
* CBUF_INREADY: input pages are mapped and ready for compression by a
* helper.
*
* CBUF_USEDMAP: mapping has been consumed by a helper. Needs unmap.
*
* CBUF_FREEBUF: CBUF_SIZE output buffer, which is available.
*
* CBUF_WRITE: CBUF_SIZE block of compressed pages from a helper,
* ready to write out.
*
* CBUF_ERRMSG: CBUF_SIZE block of error messages from a helper
* (reports UE errors.)
*/
typedef enum cbufstate {
} cbufstate_t;
struct cbuf {
char *buf; /* kmem or vmem */
int off; /* byte offset to first pfn */
};
/*
* cqueue_t queues: a uni-directional channel for communication
* from the master to helper tasks or vice-versa using put and
* get primitives. Both mappings and data buffers are passed via
* queues. Producers close a queue when done. The number of
* active producers is reference counted so the consumer can
* detect end of data. Concurrent access is mediated by atomic
*
* There a four queues, used as follows:
*
* Queue Dataflow NewState
* --------------------------------------------------
* mainq master -> master FREEMAP
* master has initialized or unmapped an input buffer
* --------------------------------------------------
* helperq master -> helper INREADY
* master has mapped input for use by helper
* --------------------------------------------------
* mainq master <- helper USEDMAP
* helper is done with input
* --------------------------------------------------
* freebufq master -> helper FREEBUF
* master has initialized or written an output buffer
* --------------------------------------------------
* mainq master <- helper WRITE
* block of compressed pages from a helper
* --------------------------------------------------
* mainq master <- helper ERRMSG
* error messages from a helper (memory error case)
* --------------------------------------------------
* writerq master <- master WRITE
* non-blocking queue of blocks to write
* --------------------------------------------------
*/
typedef struct cqueue {
} cqueue_t;
/*
* Convenience macros for using the cqueue functions
* Note that the caller must have defined "dumpsync_t *ds"
*/
#define CQ_IS_EMPTY(q) \
#define CQ_OPEN(q) \
#define CQ_CLOSE(q) \
#define CQ_GET(q) \
/*
* Dynamic state when dumpsys() is running.
*/
typedef struct dumpsync {
int dumpcpu; /* master cpu */
} dumpsync_t;
/*
* helper_t helpers: contains the context for a stream. CPUs run in
* parallel at dump time; each CPU creates a single stream of
* compression data. Stream data is divided into CBUF_SIZE blocks.
* The blocks are written in order within a stream. But, blocks from
* multiple streams can be interleaved. Each stream is identified by a
* unique tag.
*/
typedef struct helper {
int helper; /* bound helper id */
int tag; /* compression stream tag */
char *page; /* buffer for page copy */
char *lzbuf; /* lzjb output */
} helper_t;
/*
* configuration vars for dumpsys
*/
typedef struct dumpcfg {
int threshold; /* ncpu threshold for bzip2 */
int nhelper; /* number of helpers */
int nhelper_used; /* actual number of helpers used */
int ncmap; /* number VA pages for compression */
int ncbuf; /* number of bufs for compression */
int ncbuf_used; /* number of bufs in use */
char *maxvm; /* reserved VM for spare pages */
char helpers_wanted; /* flag to enable parallelism */
} dumpcfg_t;
/*
* The dump I/O buffer.
*
* There is one I/O buffer used by dumpvp_write and dumvp_flush. It is
* sized according to the optimum device transfer speed.
*/
typedef struct dumpbuf {
char *cur; /* dump write pointer */
char *start; /* dump buffer address */
char *end; /* dump buffer end */
} dumpbuf_t;
/*
* The dump I/O buffer must be at least one page, at most xfer_size
* bytes, and should scale with physmem in between. The transfer size
* passed in will either represent a global default (maxphys) or the
* best size for the device. The size of the dumpbuf I/O buffer is
* limited by dumpbuf_limit (8MB by default) because the dump
* performance saturates beyond a certain size. The default is to
* select 1/4096 of the memory.
*/
static size_t
{
if (iosize > dumpbuf_limit)
}
/*
* resize the I/O buffer
*/
static void
dumpbuf_resize(void)
{
char *new_buf;
return; /* no need to reallocate buffer */
}
/*
* dump_update_clevel is called when dumpadm configures the dump device.
* Calculate number of helpers and buffers.
* Allocate the minimum configuration for now.
*
* When the dump file is configured we reserve a minimum amount of
* memory for use at crash time. But we reserve VA for all the memory
* we really want in order to do the fastest dump possible. The VA is
* backed by pages not being dumped, according to the bitmap. If
* there is insufficient spare memory, however, we fall back to the
* minimum.
*
* Live dump (savecore -L) always uses the minimum config.
*
* clevel 0 is single threaded lzjb
* clevel 1 is parallel lzjb
* clevel 2 is parallel bzip2
*
* The ncpu threshold is selected with dump_plat_mincpu.
* On OPL, set_platform_defaults() overrides the sun4u setting.
* The actual values are defined via DUMP_PLAT_*_MINCPU macros.
*
* Architecture Threshold Algorithm
* sun4u < 51 parallel lzjb
* sun4u >= 51 parallel bzip2(*)
* sun4u OPL < 8 parallel lzjb
* sun4u OPL >= 8 parallel bzip2(*)
* sun4v < 128 parallel lzjb
* sun4v >= 128 parallel bzip2(*)
* x86 < 11 parallel lzjb
* x86 >= 11 parallel bzip2(*)
* 32-bit N/A single-threaded lzjb
*
* (*) bzip2 is only chosen if there is sufficient available
* memory for buffers at dump time. See dumpsys_get_maxmem().
*
* Faster dump devices have larger I/O buffers. The threshold value is
* increased according to the size of the dump I/O buffer, because
* parallel lzjb performs better with faster disks. For buffers >= 1MB
* the threshold is 3X; for buffers >= 256K threshold is 2X.
*
* For parallel dumps, the number of helpers is ncpu-1. The CPU
* running panic runs the main task. For single-threaded dumps, the
* panic CPU does lzjb compression (it is tagged as MAINHELPER.)
*
* Need multiple buffers per helper so that they do not block waiting
* for the main task.
* parallel single-threaded
* Number of output buffers: nhelper*2 1
* Number of mapping buffers: nhelper*4 1
*
*/
static void
{
int tag;
/*
* Free the previously allocated bufs and VM.
*/
/* helpers */
}
/* VM space for mapping pages */
/* output bufs */
/* reserved VM for dumpsys_get_maxmem */
}
/*
* Allocate memory and VM.
* One CPU runs dumpsys, the rest are helpers.
*/
/* increase threshold for faster disks */
/* figure compression level based upon the computed threshold. */
} else {
}
} else {
}
/*
* Allocate new data structures and buffers for MINHELPERS,
* and also figure the max desired size.
*/
tag = 1;
} else {
}
}
else
}
}
/* reserve VA to be backed with spare pages at crash time */
}
/*
* Reserve memory for kmem allocation calls made during crash
* dump. The hat layer allocates memory for each mapping
* created, and the I/O path allocates buffers and data structs.
* Add a few pages for safety.
*/
(dump_kmem_pages * PAGESIZE));
/* set new config pointers */
}
/*
* Define a struct memlist walker to optimize bitnum to pfn
* lookup. The walker maintains the state of the list traversal.
*/
typedef struct dumpmlw {
} dumpmlw_t;
/* initialize the walker */
static inline void
{
}
/*
* Lookup pfn given bitnum. The memlist can be quite long on some
* systems (e.g.: one per board). To optimize sequential lookups, the
* caller initializes and presents a memlist walker.
*/
static pfn_t
{
}
}
}
return (PFN_INVALID);
}
static pgcnt_t
{
}
return ((pgcnt_t)-1);
}
/*
* mapping of pfn to range index is imperfect because pfn and bitnum
* do not have the same phase. To make sure a CBUF_MAPSIZE range is
* covered, call this for both ends:
* dump_set_used(base)
* dump_set_used(base+CBUF_MAPNP-1)
*
* This is used during a panic dump to mark pages allocated by
* dumpsys_get_maxmem(). The macro IS_DUMP_PAGE(pp) is used by
* page_get_mnode_freelist() to make sure pages used by dump are never
* allocated.
*/
static void
{
}
int
{
}
/*
* dumpbzalloc and dumpbzfree are callbacks from the bzip2 library.
* dumpsys_get_maxmem() uses them for BZ2_bzCompressInit().
*/
static void *
{
char *ret;
return (ret);
}
/*ARGSUSED*/
static void
{
}
/*
* Perform additional checks on the page to see if we can really use
* it. The kernel (kas) pages are always set in the bitmap. However,
* boot memory pages (prom_ppages or P_BOOTPAGES) are not in the
* bitmap. So we check for them.
*/
static inline int
{
#if defined(__sparc)
extern struct vnode prom_ppages;
#endif
#if defined(__sparc)
#else
PP_ISBOOTPAGES(pp) ||
#endif
return (0);
return (1);
}
/*
* Check a range to see if all contained pages are available and
* return non-zero if the range can be used.
*/
static inline int
{
return (0);
if (!dump_pfn_check(pfn))
return (0);
}
return (1);
}
/*
* dumpsys_get_maxmem() is called during panic. Find unused ranges
* and use them for buffers. If we find enough memory switch to
* parallel bzip2, otherwise use parallel lzjb.
*
* It searches the dump bitmap in 2 passes. The first time it looks
* for CBUF_MAPSIZE ranges. On the second pass it uses small pages.
*/
static void
{
int k;
(dump_conflags & DUMP_ALL) != 0)
return;
sz = 0;
/* bitmap of ranges used to estimate which pfns are being used */
/* find ranges that are not being dumped to use for buffers */
/* skip partial range at end of mem segment */
continue;
}
/* skip non aligned pages */
if (off != 0) {
continue;
}
continue;
sz += CBUF_MAPSIZE;
/* set the bitmap for both ends to be sure to cover the range */
goto foundmax;
}
/* Add small pages if we can't find enough large pages. */
/* Find any non-aligned pages at start and end of segment. */
} else if (off != 0) {
continue;
}
continue;
if (!dump_pfn_check(pfn))
continue;
goto foundmax;
}
}
/* Fall back to lzjb if we did not get enough memory for bzip2. */
}
/* Allocate memory for as many helpers as we can. */
/* Byte offsets into memory found and mapped above */
sz = 0;
/* Set the size for bzip2 state. Only bzip2 needs it. */
/* Skip the preallocate output buffers. */
/* Use this to move memory up from the preallocated helpers. */
/* Loop over all helpers and allocate memory. */
/* Skip preallocated helpers by checking hp->page. */
/* lzjb needs 2 1-page buffers */
break;
/* re-use the preallocted lzjb page for bzip2 */
++ohp;
} else {
/* bzip2 needs a 1-page buffer */
break;
}
}
/*
* Add output buffers per helper. The number of
* buffers per helper is determined by the ratio of
* ncbuf to nhelper.
*/
k < NCBUF_PER_HELPER; k++) {
++cp;
}
/*
* bzip2 needs compression state. Use the dumpbzalloc
* and dumpbzfree callbacks to allocate the memory.
* bzip2 does allocation only at init time.
*/
break;
} else {
dump_bzip2_level, 0, 0);
}
}
}
/* Finish allocating output buffers */
}
/* Enable IS_DUMP_PAGE macro, which checks for pages we took. */
dump_check_used = 1;
}
static void
dumphdr_init(void)
{
}
npages = num_phys_pages();
bitmapsize));
rbitmapsize));
}
}
/*
* Establish a new dump device.
*/
int
{
int error = 0;
dumphdr_init();
return (0);
/*
* Determine whether this is a plausible dump device. We want either:
* (1) a real device that's not mounted and has a cb_dump routine, or
* (2) a swapfile on some filesystem that has a vop_dump routine.
*/
return (error);
ZFS_DRIVER) == 0 &&
} else {
}
}
if (error || justchecking) {
return (error);
}
dumpfini(); /* unconfigure the old dump device */
/*
* If the dump device is a block device, attempt to open up the
* corresponding character device and determine its maximum transfer
* size. We use this information to potentially resize dumpbuf to a
* larger and more optimal size for performing i/o to the dump device.
*/
== 0 && minf.dki_lbsize != 0)
else
}
/*
* If we are working with a zvol then dumpify it
* if it's not being used as swap.
*/
dumpfini();
}
}
}
return (error);
}
void
dumpfini(void)
{
/*
* Determine if we are using zvols for our dump device
*/
}
/*
* If we have a zvol dump device then we call into zfs so
* that it may have a chance to cleanup.
*/
if (is_zfs &&
}
}
dumpvp_size = 0;
}
static offset_t
dumpvp_flush(void)
{
int err;
dump_ioerr = ENOSPC;
} else if (size != 0) {
if (panicstr)
else
kcred, 0);
if (err && dump_ioerr == 0)
dump_ioerr = err;
}
}
/* maximize write speed by keeping seek offset aligned with size */
void
{
while (size != 0) {
if (len == 0) {
(void) dumpvp_flush();
} else {
(void) dumpvp_flush();
}
} else {
}
}
}
/*ARGSUSED*/
static void
{
}
/*
* Mark 'pfn' in the bitmap and dump its translation table entry.
*/
void
{
dumphdr->dump_npages++;
}
dumphdr->dump_nvtop++;
}
}
/*
* Mark 'pfn' in the bitmap
*/
void
{
dumphdr->dump_npages++;
}
}
}
/*
* Dump the <as, va, pfn> information for a given address space.
* SEGOP_DUMP() will call dump_addpage() for each page in the segment.
*/
static void
{
break;
continue;
}
}
static int
{
if (p == NULL)
return (-1);
mutex_exit(&p->p_lock);
mutex_enter(&p->p_lock);
}
sprunlock(p);
return (0);
}
void
dump_ereports(void)
{
return;
if (panicstr)
errorq_dump();
(void) dumpvp_flush();
if (!panicstr) {
}
}
void
dump_messages(void)
{
return;
do {
continue;
}
(void) dumpvp_flush();
if (!panicstr) {
}
}
/*
* The following functions are called on multiple CPUs during dump.
* They must not use most kernel services, because all cross-calls are
* disabled during panic. Therefore, blocking locks and cache flushes
* will not work.
*/
/*
* Copy pages, trapping ECC errors. Also, for robustness, trap data
* access in case something goes wrong in the hat layer and the
* mapping is broken.
*/
static int
{
volatile int w = 0;
volatile int ueoff = -1;
if (ueoff == -1)
ueoff = w * sizeof (long);
/* report "bad ECC" or "bad address" */
#ifdef _LP64
wdst[w++] = 0x00badecc00badecc;
else
wdst[w++] = 0x00badadd00badadd;
#else
wdst[w++] = 0x00badecc;
else
wdst[w++] = 0x00badadd;
#endif
}
while (w < ncopies) {
w++;
}
no_trap();
return (ueoff);
}
static void
{
if (live) {
} else {
}
}
static inline void
{
int loop_count = 0;
if (++loop_count >= ncpus) {
backoff = mutex_lock_backoff(0);
loop_count = 0;
} else {
}
}
}
static inline void
{
lock_clear(lp);
}
static inline void
{
if (live)
else
}
static inline void
{
if (live) {
if (signal)
} else {
}
}
static void
{
if (live) {
} else {
break;
}
}
static void
{
return;
}
else
}
static cbuf_t *
{
/* CONSTCOND */
while (1) {
break;
continue;
}
}
break;
}
return (cp);
}
/*
* Send an error message to the console. If the main task is running
* just write the message via uprintf. If a helper is running the
* message has to be put on a queue for the main task. Setting fmt to
* NULL means flush the error message buffer. If fmt is not NULL, just
* add the text to the existing buffer.
*/
static void
{
uprintf("\n");
}
}
}
} else {
}
}
}
}
/*
* Write an output buffer to the dump file. If the main task is
* running just write the data. If a helper is running the output is
* placed on a queue for the main task.
*/
static void
{
} else {
}
}
/*
* Copy one page within the mapped range. The offset starts at 0 and
* is relative to the first pfn. cp->buf + cp->off is the address of
* the first pfn. If dump_pagecopy returns a UE offset, create an
* error message. Returns the offset to the next pfn in the range
* selected by the bitmap.
*/
static int
{
int ueoff;
/* ueoff is the offset in the page to a UE error */
if (ueoff != -1) {
}
/*
* Advance bitnum and offset to the next input page for the
* next call to this function.
*/
break;
}
return (offset);
}
/*
* Read the helper queue, and copy one mapped page. Return 0 when
* done. Return 1 when a page has been copied into hp->page.
*/
static int
{
/* CONSTCOND */
while (1) {
/* Find the next input buffer. */
/* CONSTCOND */
while (1) {
/*
* NULL return means the helper queue
* is closed and empty.
*/
break;
/* Have input, check for dump I/O error. */
if (!dump_ioerr)
break;
/*
* If an I/O error occurs, stay in the
* loop in order to empty the helper
* queue. Return the buffers to the
* main task to unmap and free it.
*/
}
/* Stop here when the helper queue is closed. */
break;
/* Set the offset=0 to get the first pfn. */
/* Set the total processed to 0 */
}
/* Process the next page. */
/*
* Get the next page from the input buffer and
* return a copy.
*/
break;
} else {
/*
* Done with the input. Flush the VM and
* return the buffer to the main task.
*/
}
}
}
/*
* Compress size bytes starting at buf with bzip2
* mode:
* BZ_RUN add one more compressed page
* BZ_FINISH no more input, flush the state
*/
static void
{
const int CSIZE = sizeof (dumpcsize_t);
int rc = 0;
/* Set input pointers to new input page */
if (size > 0) {
}
/* CONSTCOND */
while (1) {
/* Quit when all input has been consumed */
break;
/* Get a new output buffer */
}
/* Compress input, or finalize */
/* Check for error */
break;
}
/* Write the buffer if it is full, or we are flushing */
if (csize > 0) {
}
}
/* Check for final complete */
if (rc == BZ_STREAM_END)
break;
if (rc != BZ_FINISH_OK) {
break;
}
}
}
/* Cleanup state and buffers */
/* Reset state so that it is re-usable. */
/* Give any unused outout buffer to the main task */
}
}
}
static void
{
sh.stream_npages = 0;
/* Bump reference to mainq while we are running */
/* Get one page at a time */
while (dumpsys_sread(hp)) {
}
}
/* Done with input, flush any partial buffer */
}
/* Decrement main queue count, we are done */
}
/*
* Compress with lzjb
* write stream block if full or size==0
* if csize==0 write stream header, else write <csize, data>
* size==0 is a call to flush a buffer
* hp->cpout is the buffer we are flushing or filling
* hp->out is the next index to fill data
* osize is either csize+data, or the size of a stream header
*/
static void
{
const int CSIZE = sizeof (dumpcsize_t);
/* If flush, and there is no buffer, just return */
return;
/* If flush, or cpout is full, write it out */
if (size == 0 ||
/* Set tag+size word at the front of the stream block. */
/* Write block to dump file. */
/* Clear pointer to indicate we need a new buffer */
/* flushing, we are done */
if (size == 0)
return;
}
/* Get an output buffer if we dont have one. */
}
/* Store csize word. This is the size of compressed data. */
if (csize > 0) {
}
/* Store the data. */
}
static void
{
sh.stream_npages = 0;
/* Bump reference to mainq while we are running */
/* Get one page at a time */
while (dumpsys_sread(hp)) {
/* Create a stream header for each new input map */
}
/* Compress one page */
/* Add csize+data to output block */
}
/* Done with input, flush any partial buffer */
}
/* Decrement main queue count, we are done */
}
/*
* Dump helper called from panic_idle() to compress pages. CPUs in
* this path must not call most kernel services.
*
* During panic, all but one of the CPUs is idle. These CPUs are used
* as helpers working in parallel to copy and compress memory
* pages. During a panic, however, these processors cannot call any
* kernel services. This is because mutexes become no-ops during
* panic, and, cross-call interrupts are inhibited. Therefore, during
* panic dump the helper CPUs communicate with the panic CPU using
* memory variables. All memory mapping and I/O is performed by the
* panic CPU.
*/
void
{
if (dumpcfg.helpers_wanted) {
else
return;
}
}
}
}
/*
* Dump helper for live dumps.
* These run as a system task.
*/
static void
dumpsys_live_helper(void *arg)
{
else
}
/*
* Compress one page with lzjb (single threaded case)
*/
static void
{
}
}
/*
* Main task to dump pages. This is called on the dump CPU.
*/
static void
dumpsys_main_task(void *arg)
{
int sec;
/* CONSTCOND */
while (1) {
uprintf("^\r%2d:%02d %3d%% done",
}
/* the writerq never blocks */
break;
}
/*
* Wait here for some buffers to process. Returns NULL
* when all helpers have terminated and all buffers
* have been processed.
*/
/* Drain the write queue. */
if (!CQ_IS_EMPTY(writerq))
continue;
/* Main task exits here. */
break;
}
case CBUF_FREEMAP:
/*
* Note that we drop CBUF_FREEMAP buffers on
* the floor (they will not be on any cqueue)
* when we no longer need them.
*/
break;
if (dump_ioerr) {
break;
}
break;
break;
}
/*
* Try to map CBUF_MAPSIZE ranges. Can't
* assume that memory segment size is a
* multiple of CBUF_MAPSIZE. Can't assume that
* the segment starts on a CBUF_MAPSIZE
* boundary.
*/
} else {
baseoff = 0;
}
} else {
}
pagenum++;
/*
* If there are no helpers the main task does
* non-streams lzjb compress.
*/
break;
}
/* pass mapped pages to a helper */
/* the last page was done */
break;
case CBUF_USEDMAP:
break;
case CBUF_WRITE:
break;
case CBUF_ERRMSG:
} else {
}
/* wait for console output */
drv_usecwait(200000);
}
break;
default:
uprintf("dump: unexpected buffer state %d, "
break;
} /* end switch */
} /* end while(1) */
}
#ifdef COLLECT_METRICS
{
int i, compress_ratio;
char *p = buf;
if (sec < 1)
sec = 1;
#define P(...) (p += p < e ? snprintf(p, e - p, __VA_ARGS__) : 0)
P("dump_ioerr,%d\n", dump_ioerr);
P("Helpers:\n");
for (i = 0; i < ncpus; i++) {
if ((i & 15) == 0)
P(",,%03d,", i);
if (i == myid)
P(" M");
else
P(" *");
if ((i & 15) == 15)
P("\n");
}
100);
P("Dump time,%d\n", sec);
if (ds->pages_mapped > 0)
/ ds->pages_mapped));
P("\nPer-page metrics:\n");
}
#define PERPAGE(x) \
}
#undef P
if (p < e)
bzero(p, e - p);
return (p - buf);
}
#endif /* COLLECT_METRICS */
/*
* Dump the system.
*/
void
dumpsys(void)
{
proc_t *p;
char *content;
char *buf;
int save_dump_clevel;
uprintf("skipping system dump - no dump device configured\n");
if (panicstr) {
dumpcfg.helpers_wanted = 0;
}
return;
}
/* clear the sync variables */
/*
* Calculate the starting block for dump. If we're dumping on a
* swap device, start 1/5 of the way in; otherwise, start at the
* beginning. And never use the first page -- it may be a disk label.
*/
else
dumphdr->dump_npages = 0;
dumphdr->dump_nvtop = 0;
if (panicstr) {
}
if (dump_conflags & DUMP_ALL)
content = "all";
else if (dump_conflags & DUMP_CURPROC)
content = "kernel + curproc";
else
content = "kernel";
/* Make sure nodename is current */
/*
* If this is a live dump, try to open a VCHR vnode for better
* performance. We must take care to flush the buffer cache
* first.
*/
if (!panicstr) {
== 0) {
if (vn_has_cached_data(dumpvp))
} else {
}
}
}
/*
* Store a hires timestamp so we can look it up during debugging.
*/
/*
* Leave room for the message and ereport save areas and terminal dump
* header.
*/
/*
* Write out the symbol table. It's no longer compressed,
* so its 'size' and 'csize' are equal.
*/
/*
* Write out the translation map.
*/
/*
* call into hat, which may have unmapped pages that also need to
* be in the dump
*/
hat_dump();
if (dump_conflags & DUMP_ALL) {
}
} else if (dump_conflags & DUMP_CURPROC) {
/*
* Determine which pid is to be dumped. If we're panicking, we
* dump the process associated with panic_thread (if any). If
* this is a live dump, we dump the process associated with
* curthread.
*/
npids = 0;
if (panicstr) {
if (panic_thread != NULL &&
}
} else {
}
else
} else {
}
/*
* Write out the pfn table.
*/
continue;
}
/*
* Write out all the pages.
* Map pages, copy them handling UEs, compress, and write them out.
* Cooperate with any helpers running on CPUs in panic_idle().
*/
if (panicstr)
dumpcfg.nhelper_used = 0;
continue;
}
}
dumpcfg.ncbuf_used = 0;
}
}
/* start helpers */
int n = dumpcfg.nhelper_used;
continue;
}
} else {
if (panicstr)
}
/* run main task */
uprintf("\n");
}
/* record actual pages dumped */
/* platform-specific data */
/* note any errors by clearing DF_COMPLETE */
/* end of stream blocks */
datatag = 0;
/* buffer for metrics */
sizeof (dumpdatahdr_t));
/* finish the kmem intercepts, collect kmem verbose info */
if (panicstr) {
}
/* compression info in data header */
#ifdef COLLECT_METRICS
if (dump_metrics_on)
#endif
/*
* Write out the initial and terminal dump headers.
*/
(void) dumpvp_flush();
(void) dumpvp_flush();
uprintf("\r%3d%% done: %llu pages dumped, ",
if (dump_ioerr == 0) {
uprintf("dump succeeded\n");
} else {
#ifdef DEBUG
if (panicstr)
debug_enter("dump failed");
#endif
}
/*
* Write out all undelivered messages. This has to be the *last*
* thing we do because the dump process itself emits messages.
*/
if (panicstr) {
}
dump_timeleft = 0;
dump_ioerr = 0;
/* restore settings after live dump completes */
if (!panicstr) {
/* release any VCHR open of the dump device */
}
}
}
/*
* This function is called whenever the memory size, as represented
* by the phys_install list, changes.
*/
void
{
dumphdr_init();
}
/*
* This function allows for dynamic resizing of a dump area. It assumes that
* the underlying device has update its appropriate size(9P).
*/
int
{
int error;
return (error);
}
return (ENOSPC);
}
return (0);
}