savecore.c revision 6d89ca534e2138511ecb76c02bcec1bcb83f685b
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/*
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <deflt.h>
#include <time.h>
#include <syslog.h>
#include <stropts.h>
#include <pthread.h>
#include <limits.h>
#include <atomic.h>
#include <libnvpair.h>
#include <libintl.h>
#include <sys/compress.h>
#include <sys/sysmacros.h>
#include <sys/resource.h>
#include <fm/libfmevent.h>
#include <sys/int_fmtio.h>
/* minimum size for output buffering */
/* create this file if metrics collection is enabled in the kernel */
#define METRICSFILE "METRICS.csv"
static char *savedir; /* savecore directory */
static char *dumpfile; /* source of raw crash dump */
static long pagesize; /* dump pagesize */
static int verbose; /* chatty mode */
static int disregard_valid_flag; /* disregard valid flag */
static int livedump; /* dump the current running system */
static int interactive; /* user invoked; no syslog */
static int csave; /* save dump compressed */
static int filemode; /* processing file, not dump device */
static int percent_done; /* progress indicator */
static long coreblksize; /* preferred write size (st_blksize) */
static int cflag; /* run as savecore -c */
static int mflag; /* run as savecore -m */
/*
* Payload information for the events we raise. These are used
* in raise_event to determine what payload to include.
*/
enum sc_event_type {
};
/*
* Common payload
*/
#define _SC_PAYLOAD_CMN \
static const struct {
const char *sce_subclass;
} sc_event[] = {
/*
* SC_EVENT_DUMP_PENDING
*/
{
"dump_pending_on_device",
},
/*
* SC_EVENT_SAVECORE_FAILURE
*/
{
"savecore_failure",
},
/*
* SC_EVENT_DUMP_AVAILABLE
*/
{
"dump_available",
},
};
static void raise_event(enum sc_event_type, char *);
static void
usage(void)
{
"usage: %s [-Lvd] [-f dumpfile] [dirname]\n", progname);
exit(1);
}
static void
{
char buf[1024];
int code;
static int logprint_raised = 0;
/*LINTED: E_SEC_PRINTF_VAR_FMT*/
if (!interactive) {
case SC_SL_ERR:
/*LINTED: E_SEC_PRINTF_VAR_FMT*/
break;
case SC_SL_WARN:
/*LINTED: E_SEC_PRINTF_VAR_FMT*/
break;
default:
break;
}
}
}
switch (flags & _SC_ALLEXIT) {
case 0:
return;
case SC_EXIT_OK:
code = 0;
break;
case SC_EXIT_PEND:
/*
* Raise an ireport saying why we are exiting. Do not
* raise if run as savecore -m. If something in the
* raise_event codepath calls logprint avoid recursion.
*/
if (!mflag && logprint_raised++ == 0)
code = 2;
break;
case SC_EXIT_FM:
code = 3;
break;
case SC_EXIT_ERR:
default:
if (!mflag && logprint_raised++ == 0)
code = 1;
break;
}
}
/*
* System call / libc wrappers that exit on error.
*/
static int
{
int fd;
return (fd);
}
static void
{
}
static void
{
}
static void
{
}
static void
{
}
static void
{
}
static void
{
if (sz < 0)
}
static void
{
}
static void *
{
void *buf;
return (buf);
}
static long
{
long file_value = -1;
}
}
static void
read_dumphdr(void)
{
if (filemode)
else
"dump already processed");
"dump version (%d) != %s version (%d)",
"dump is from %u-bit kernel - cannot save on %u-bit kernel",
"dump data version (%d) != %s data version (%d)",
} else {
}
/*
* Read the initial header, clear the valid bits, and compare headers.
* The main header may have been overwritten by swapping if we're
* using a swap partition as the dump device, in which case we bail.
*/
/*
* Clear valid bit so we don't complain on every invocation.
*/
if (!filemode)
"initial dump header corrupt");
}
}
static void
check_space(int csave)
{
if (!csave)
else
"not enough space in %s (%lld MB avail, %lld MB needed)",
}
}
static void
{
long i;
static long misses = 0;
for (i = 0; i < corehdr.dump_nvtop; i++) {
long first = 0;
long middle = 0;
uintptr_t h;
break;
else
}
if (++misses <= 10)
"pfn %ld not found for as=%p, va=%p\n",
continue;
}
}
}
/*
* Copy whole sections of the dump device to the file.
*/
static void
{
while (nb > 0) {
}
}
/*
* Copy pages when the dump data header is missing.
* This supports older kernels with latest savecore.
*/
static void
{
while (np > 0) {
"CopyPages: page %lu csize %d (0x%x) pagesize %d",
pagesize);
break;
}
np--;
}
}
/*
* Concatenate dump contents into a new file.
* Update corehdr with new offsets.
*/
static void
copy_crashfile(const char *corefile)
{
/*
* This dump file is still compressed
*/
/*
* Leave room for corehdr, it is updated and written last
*/
corehdr.dump_start = 0;
/*
* Read in the compressed symbol table, copy it to corefile.
*/
/*
* Save the pfn table.
*/
/*
* Save the dump map.
*/
/*
* Save the data pages.
*/
if (datahdr.dump_data_csize != 0)
else
/*
* Now write the modified dump header to front and end of the copy.
* Make it look like a valid dump device.
*
* From dumphdr.h: Two headers are written out: one at the
* beginning of the dump, and the other at the very end of the
* dump device. The terminal header is at a known location
* (end of device) so we can always find it.
*
* Pad with zeros to each DUMP_OFFSET boundary.
*/
if (nb > 0) {
}
if (nb > 0) {
}
/*
* Write out the modified dump header to the dump device.
* The dump device has been processed, so DF_VALID is clear.
*/
if (!filemode)
}
/*
* compressed streams
*/
typedef struct blockhdr blockhdr_t;
struct blockhdr {
};
struct block {
char *block;
int size;
};
typedef enum streamstate {
typedef struct stream {
int init;
int tag;
int bound;
int nout;
char *blkbuf;
} stream_t;
static stream_t *endstreams;
const int cs = sizeof (dumpcsize_t);
typedef struct tinfo {
int corefd;
} tinfo_t;
static int threads_stop;
static int threads_active;
static blockhdr_t freeblocks;
static void
{
h->head = b;
else
h->tail = b;
}
static block_t *
deqh(blockhdr_t *h)
{
if (b != NULL) {
}
return (b);
}
static void *runstreams(void *arg);
static void
{
int nthreads;
int nblocks;
int i;
block_t *b;
tinfo_t *t;
if (nthreads < 1)
nthreads = 1;
/* init streams */
/* init stream block buffers */
for (i = 0; i < nblocks; i++) {
enqt(&freeblocks, b);
}
/* init worker threads */
(void) pthread_mutex_lock(&lock);
threads_active = 1;
threads_stop = 0;
if (t->corefd < 0) {
endtinfo = t;
break;
}
}
(void) pthread_mutex_unlock(&lock);
}
static void
sbarrier()
{
stream_t *s;
(void) pthread_mutex_lock(&lock);
for (s = streams; s != endstreams; s++) {
}
(void) pthread_mutex_unlock(&lock);
}
static void
{
tinfo_t *t;
if (threads_active) {
sbarrier();
(void) pthread_mutex_lock(&lock);
threads_stop = 1;
(void) pthread_cond_signal(&cvwork);
(void) pthread_mutex_unlock(&lock);
threads_active = 0;
}
}
static block_t *
{
block_t *b;
(void) pthread_mutex_lock(&lock);
(void) pthread_mutex_unlock(&lock);
return (b);
}
/* data page offset from page number */
/* check for coreblksize boundary */
static int
{
}
static int
{
/*LINTED:E_BAD_PTR_CAST_ALIGN*/
if (*pl++ != 0)
return (0);
return (1);
}
/* write pages to the core file */
static void
{
if (np > 0)
}
/*
* Process one lzjb block.
* No object (stream header or page) will be split over a block boundary.
*/
static void
{
int in = 0;
int csize;
int doflush;
char *out;
if (!s->init) {
s->init = 1;
s->state = STREAMSTART;
}
switch (s->state) {
case STREAMSTART:
"LZJB STREAMSTART: bad stream header");
"LZJB STREAMSTART: bad range: %d > %d",
s->nout = 0;
s->done = 0;
s->state = STREAMPAGES;
break;
case STREAMPAGES:
"LZJB STREAMPAGES: bad csize=%d", csize);
"LZJB STREAMPAGES: dsize %d != pagesize %d",
doflush = 0;
doflush = 1;
doflush = 1;
}
s->state = STREAMSTART;
doflush = 1;
}
if (doflush) {
s->nout = 0;
}
break;
}
}
}
/* bzlib library reports errors with this callback */
void
bz_internal_error(int errcode)
{
}
/*
* Return one object in the stream.
*
* An object (stream header or page) will likely span an input block
* of compression data. Return non-zero when an entire object has been
* retrieved from the stream.
*/
static int
{
int rc;
}
if (rc == BZ_STREAM_END) {
"BZ2_bzDecompressReset: %s",
continue;
}
break;
}
}
/*
* Process one bzip2 block.
* The interface is documented here:
*/
static void
{
int rc = 0;
int doflush;
char *out;
if (!s->init) {
s->init = 1;
s->state = STREAMSTART;
}
switch (s->state) {
case STREAMSTART:
return;
"BZ2 STREAMSTART: bad stream header");
"BZ2 STREAMSTART: bad range: %d > %d",
s->nout = 0;
s->done = 0;
s->state = STREAMPAGES;
break;
case STREAMPAGES:
return;
doflush = 0;
doflush = 1;
doflush = 1;
}
s->state = STREAMSTART;
doflush = 1;
}
if (doflush) {
s->nout = 0;
}
break;
}
}
}
/* report progress */
static void
{
if (!interactive)
return;
if (percent > percent_done) {
percent);
}
}
/* thread body */
static void *
runstreams(void *arg)
{
stream_t *s;
block_t *b;
int bound;
(void) pthread_mutex_lock(&lock);
while (!threads_stop) {
bound = 0;
for (s = streams; s != endstreams; s++) {
continue;
s->bound = 1;
bound = 1;
(void) pthread_cond_signal(&cvwork);
(void) pthread_mutex_unlock(&lock);
b->size);
else
b->size);
(void) pthread_mutex_lock(&lock);
enqt(&freeblocks, b);
(void) pthread_cond_signal(&cvfree);
}
s->bound = 0;
(void) pthread_cond_signal(&cvbarrier);
}
if (!bound && !threads_stop)
}
(void) pthread_cond_signal(&cvwork);
(void) pthread_mutex_unlock(&lock);
return (arg);
}
/*
* Process compressed pages.
*
* The old format, now called single-threaded lzjb, is a 32-bit size
* word followed by 'size' bytes of lzjb compression data for one
* page. The new format extends this by storing a 12-bit "tag" in the
* upper bits of the size word. When the size word is pagesize or
* less, it is assumed to be one lzjb page. When the size word is
* greater than pagesize, it is assumed to be a "stream block",
* belonging to up to 4095 streams. In practice, the number of streams
* is set to one less than the number of CPUs running at crash
* time. One CPU processes the crash dump, the remaining CPUs
* separately process groups of data pages.
*
* savecore creates a thread per stream, but never more threads than
* the number of CPUs running savecore. This is because savecore can
* be processing a crash file from a remote machine, which may have
* more CPUs.
*
* When the kernel uses parallel lzjb or parallel bzip2, we expect a
* series of 128KB blocks of compression data. In this case, each
* block has a "tag", in the range 1-4095. Each block is handed off to
* to the threads running "runstreams". The dump format is either lzjb
* or bzip2, never a mixture. These threads, in turn, process the
* compression data for groups of pages. Groups of pages are delimited
* by a "stream header", which indicates a starting pfn and number of
* pages. When a stream block has been read, the condition variable
* "cvwork" is signalled, which causes one of the avaiable threads to
* wake up and process the stream.
*
* In the parallel case there will be streams blocks encoding all data
* pages. The stream of blocks is terminated by a zero size
* word. There can be a few lzjb pages tacked on the end, depending on
* the architecture. The sbarrier function ensures that all stream
* blocks have been processed so that the page number for the few
* single pages at the end can be known.
*/
static void
decompress_pages(int corefd)
{
char *out;
block_t *b;
stream_t *s;
/*LINTED: E_CONSTANT_CONDITION*/
while (1) {
/*
* The csize word delimits stream blocks.
* See dumphdr.h for a description.
*/
if (tag != 0) { /* a stream block */
if (nstreams == 0)
"starting data header is missing");
"stream tag %d not in range 1..%d",
"block size 0x%x > max csize 0x%x",
b = getfreeblock();
(void) pthread_mutex_lock(&lock);
if (!s->bound)
(void) pthread_cond_signal(&cvwork);
(void) pthread_mutex_unlock(&lock);
} else if (csize > 0) { /* one lzjb page */
"csize 0x%x > pagesize 0x%x",
nout = 0;
}
"dsize 0x%x != pagesize 0x%x",
/*
* wait for streams to flush so that 'saved' is correct
*/
if (threads_active)
sbarrier();
doflush = 0;
if (nout == 0)
doflush = 1;
doflush = 1;
}
if (doflush) {
nout = 0;
}
/*
* Non-streams lzjb does not use blocks. Stop
* here if all the pages have been decompressed.
*/
break;
} else {
break; /* end of data */
}
}
stopstreams();
if (inbuf)
if (cpage)
if (dpage)
if (streams)
}
static void
{
/*
* Determine the optimum write size for the core file
*/
if (verbose > 1)
(long)st.st_blksize);
/*
* This dump file is now uncompressed
*/
/*
* Read in the compressed symbol table, copy it to corefile,
* decompress it, and write the result to namelist.
*/
if (ksyms_dsize != ksyms_size)
"bad data in symbol table, %lu of %lu bytes saved",
/*
* Read in and write out the pfn table.
*/
/*
* Convert the raw translation data into a hashed dump map.
*/
/*
* Decompress the pages
*/
if (verbose)
(void) printf("%ld (%ld%%) zero pages were not written\n",
/*
* Write out the modified dump headers.
*/
if (!filemode)
}
/*
* When the system panics, the kernel saves all undelivered messages (messages
* that never made it out to syslogd(1M)) in the dump. At a mimimum, the
* panic message itself will always fall into this category. Upon reboot,
* the syslog startup script runs savecore -m to recover these messages.
*
* To do this, we read the unsent messages from the dump and send them to
* to any already-accumulated messages in the console backlog, thus preserving
* temporal ordering across the reboot.
*
* Note: since savecore -m is used *only* for this purpose, it does *not*
* attempt to save the crash dump. The dump will be saved later, after
* syslogd(1M) starts, by the savecore startup script.
*/
static int
message_save(void)
{
int logfd;
for (;;) {
dumpoff += sizeof (log_dump_t);
break;
"bad log_ctl checksum");
"bad message checksum");
}
return (0);
}
static long
getbounds(const char *f)
{
long b = -1;
const char *p = strrchr(f, '/');
p = strstr(f, "vmdump");
if (p != NULL && *p == '/')
p++;
(void) sscanf(p ? p : f, "vmdump.%ld", &b);
return (b);
}
static void
stack_retrieve(char *stack)
{
dumpoff += sizeof (summary_dump_t);
*stack = '\0';
return;
}
*stack = '\0';
return;
}
}
static void
{
char panic_stack[STACK_BUF_SIZE];
int err = 0;
goto publish; /* try to send payload-free event */
if (pl & SC_PAYLOAD_ISCOMPRESSED) {
}
if (pl & SC_PAYLOAD_DUMPADM_EN) {
}
if (pl & SC_PAYLOAD_IMAGEUUID) {
}
if (pl & SC_PAYLOAD_CRASHTIME) {
}
}
if (pl & SC_PAYLOAD_PANICSTACK) {
if (panic_stack[0] != '\0') {
/*
* The summary page may not be present if the dump
* was previously recorded compressed.
*/
}
}
/* add warning string if this is an ireport for dump failure */
if (pl & SC_PAYLOAD_DUMPCOMPLETE)
if (pl & SC_PAYLOAD_FM_PANIC) {
}
if (pl & SC_PAYLOAD_JUSTCHECKING) {
}
if (err)
"event payload; will try to publish anyway.");
attr) != FMEV_SUCCESS) {
}
}
int
{
int i, c, bfd;
long filebounds = -1;
(void) defopen("/etc/dumpadm.conf");
switch (c) {
case 'L':
livedump++;
break;
case 'v':
verbose++;
break;
case 'c':
cflag++;
break;
case 'd':
break;
case 'm':
mflag++;
break;
case 'f':
break;
case '?':
usage();
}
}
/*
* If doing something other than extracting an existing dump (i.e.
* dumpfile has been provided as an option), the user must be root.
*/
exit(1);
}
usage();
"no dump device configured");
}
if (mflag)
return (message_save());
usage();
"dedicated dump device required");
dumpfd = -1;
csave = 1;
read_dumphdr();
/*
* We want this message to go to the log file, but not the console.
* There's no good way to do that with the existing syslog facility.
* We could extend it to handle this, but there doesn't seem to be
* a general need for it, so we isolate the complexity here instead.
*/
char fmt[] = "reboot after panic: %s";
/* LINTED: E_SEC_SPRINTF_UNBOUNDED_COPY */
/* LINTED: E_SEC_PRINTF_VAR_FMT */
}
}
if (dumphdr.dump_fm_panic)
/*
* We have a valid dump on a dump device and know as much about
* it as we're going to at this stage. Raise an event for
* logging and so that FMA can open a case for this panic.
* Avoid this step for FMA-initiated panics - FMA will replay
* ereports off the dump device independently of savecore and
* will make a diagnosis, so we don't want to open two cases
* for the same event. Also avoid raising an event for a
* livedump, or when we inflating a compressed dump.
*/
/*
* Option -c is designed for use from svc-dumpadm where we know
* that dumpadm -n is in effect but run savecore -c just to
* get the above dump_pending_on_device event raised. If it is run
* interactively then just print further panic details.
*/
if (cflag) {
"Panic crashdump pending on dump device%s "
"run savecore(1M) manually to extract. "
"Image UUID %s%s.",
/*NOTREACHED*/
}
if (filebounds < 0)
else
bounds = filebounds;
if (csave) {
datahdr.dump_metrics = 0;
"Saving compressed system crash dump in %s/%s",
/*
* Raise a fault management event that indicates the system
* has panicked. We know a reasonable amount about the
* condition at this time, but the dump is still compressed.
*/
if (metrics_size > 0) {
if (sec < 1)
sec = 1;
"Can't create %s:\n%s",
} else {
for (i = 0; i < argc; i++)
corefile);
metrics);
}
}
"Decompress the crash dump with "
"\n'savecore -vf %s/%s'",
} else {
== 0)
"%s already exists: remove with "
"'rm -f %s/{unix,vmcore}.%ld'",
"saving system crash dump in %s/{unix,vmcore}.%ld",
if (sec < 1)
sec = 1;
for (i = 0; i < argc; i++)
saved);
}
}
if (filebounds < 0) {
}
if (verbose) {
(void) printf("%d:%02d dump %s is done\n",
}
int i, nw;
(void) printf("pages count %%\n");
for (i = 0; i <= BTOP(coreblksize); ++i) {
if (hist[i] == 0)
continue;
(void) printf("%3d %5u %6.2f\n",
}
}
dumpfd = -1;
return (0);
}