fmd_ckpt.c revision c7d6cfd6e2ae5d7536ef67f65110733890f370a4
2N/A * The contents of this file are subject to the terms of the 2N/A * Common Development and Distribution License (the "License"). 2N/A * You may not use this file except in compliance with the License. 2N/A * See the License for the specific language governing permissions 2N/A * and limitations under the License. 2N/A * When distributing Covered Code, include this CDDL HEADER in each 2N/A * If applicable, add the following below this CDDL HEADER, with the 2N/A * fields enclosed by brackets "[]" replaced with your own identifying 2N/A * information: Portions Copyright [yyyy] [name of copyright owner] 2N/A * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 2N/A * Use is subject to license terms. * The fmd_ckpt_t structure is used to manage all of the state needed by the * various subroutines that save and restore checkpoints. The structure is * initialized using fmd_ckpt_create() or fmd_ckpt_open() and is destroyed * by fmd_ckpt_destroy(). Refer to the subroutines below for more details. char *
ckp_strs;
/* string table base pointer */ char *
ckp_strp;
/* string table pointer */ int ckp_fd;
/* output descriptor */ void *
ckp_arg;
/* private arg for callbacks */ * Table of FCF section descriptions. Here we record the minimum size for each * section (for use during restore) and the expected entry size and alignment * for each section (for use during both checkpoint and restore). { 0, 0,
sizeof (
uint8_t) },
/* NONE */{
1, 0,
sizeof (
char) },
/* STRTAB */ return (-
1);
/* failed to open checkpoint file */ * Once we've read in a consistent copy of the FCF file and we're sure * the header can be accessed, go through it and make sure everything * is valid. We also check that unused bits are zero so we can expand * to use them safely in the future and support old files if needed. "bad checkpoint padding at id[%d]", i));
"bad header and/or section size\n"));
* Once the header is validated, iterate over the section headers * ensuring that each one is valid w.r.t. offset, alignment, and size. * We also pick up the string table pointer during this pass. "size or entsize\n", i));
"tables are present in checkpoint file\n"));
"is missing terminating nul byte\n", i));
"sects are present in checkpoint file\n"));
* Ensure that the first section is an empty one of type FCF_SECT_NONE. * This is done to ensure that links can use index 0 as a null section. "appropriate size and/or attributes (SECT_NONE)\n"));
"no module section found in file\n"));
* fmd_ckpt_error() is used as a wrapper around fmd_error() for ckpt routines. * It calls fmd_module_unlock() on behalf of its caller, logs the error, and * then aborts the API call and the surrounding module entry point by doing an * fmd_module_abort(), which longjmps to the place where we entered the module. * Depending on the type of error and conf settings, we will reset or fail. * If the data pointer is non-NULL, copy the data to our buffer; else * the caller is responsible for doing so and updating ckp->ckp_ptr. * We've added up all the sections by now: add two more for SECT_NONE * and SECT_STRTAB, and add the size of the section header table and * string table to the total size. We know that the fcf_hdr_t is * aligned so that that fcf_sec_t's can follow it, and that fcf_sec_t * is aligned so that any section can follow it, so no extra padding * bytes need to be allocated between any of these items. ckp->
ckp_secs +=
2;
/* for FCF_SECT_NONE and FCF_SECT_STRTAB */ return (-
1);
/* errno is set for us */ ckp->
ckp_secs = 0;
/* reset section counter for second pass */ * Before committing the checkpoint, we assert that fmd_ckpt_t's sizes * and current pointer locations all add up appropriately. Any ASSERTs * which trip here likely indicate an inconsistency in the code for the * reservation pass and the buffer update pass of the FCF subroutines. return (-
1);
/* errno is set for us */ return;
/* do not checkpoint cases from remote transports */ cip->
ci_nvsz = 0;
/* compute size of packed suspect nvlist array */ return;
/* do not checkpoint cases from remote transports */ fmd_panic(
"case %p (%s) has invalid state %u",
* If checkpointing is disabled for the module, just return. We must * commit the module state anyway to transition pending log events. return;
/* no checkpoint is necessary for this module */ * If the per-module checkpoint directory isn't found or isn't of type * directory, move aside whatever is there (if anything) and attempt * to mkdir(2) a new module checkpoint directory. If this fails, we * have no choice but to abort the checkpoint and try again later. return;
/* return without clearing dirty bits */ * Create a temporary file to write out the checkpoint into, and create * a fmd_ckpt_t structure to manage construction of the checkpoint. We * then figure out how much space will be required, and allocate it. * Fill in the checkpoint content, write it to disk, sync it, and then * atomically rename it to the destination path. If this fails, we * have no choice but to leave all our dirty bits set and return. return;
/* return without clearing dirty bits */ * Utility function to retrieve a pointer to a section's header and verify that * it is of the expected type or it is a FCF_SECT_NONE reference. * Utility function to retrieve the data pointer for a particular section. The * validity of the header values has already been checked by fmd_ckpt_open(). * Utility function to retrieve the end of the data region for a particular * section. The validity of this value has been confirmed by fmd_ckpt_open(). * Utility function to retrieve a string pointer (fcf_stridx_t). If the string * index is valid, the string data is returned; otherwise 'defstr' is returned. "invalid link to section %u: expected events\n",
sid);
return;
/* empty events section or type none */ * Hold the reader lock on log pointers to block log rotation during * the section restore so that we can safely insert refs to d_errlog. for (i = 0; i < n; i++) {
"invalid link to section %u: expected nvlists\n",
sid);
"size %u exceeds buffer\n",
sid, i,
size);
"unpack nvlist %u [%d]: %s\n",
sid, i,
"invalid link to section %u: expected bufs\n",
sid);
return;
/* empty events section or type none */ for (i = 0; i < n; i++) {
"duplicate case uuid: %s\n",
uuid);
* Once solved, treat suspects from resource cache as master copy. for (i = 0; i < n; i++) {
"checkpoint is not for module %s\n",
mp->
mod_name);
* Restore a checkpoint for the specified module. Any errors which occur * during restore will call fmd_ckpt_error() or trigger an fmd_api_error(), * either of which will automatically unlock the module and trigger an abort. return;
/* never restore checkpoints for this module */ * Delete the module's checkpoint file. This is used by the ckpt.zero property * code or by the fmadm reset RPC service path to force a checkpoint delete. * Move aside the module's checkpoint file if checkpoint restore has failed. * We rename the file rather than deleting it in the hopes that someone might * send it to us for post-mortem analysis of whether we have a checkpoint bug.