log_get.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)log_get.c 10.38 (Sleepycat) 10/3/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "shqueue.h"
#include "db_page.h"
#include "log.h"
#include "hash.h"
#include "common_ext.h"
/*
* log_get --
* Get a log record.
*/
int
log_get(dblp, alsn, dbt, flags)
DB_LOG *dblp;
DB_LSN *alsn;
DBT *dbt;
u_int32_t flags;
{
int ret;
LOG_PANIC_CHECK(dblp);
/* Validate arguments. */
if (flags != DB_CHECKPOINT && flags != DB_CURRENT &&
flags != DB_FIRST && flags != DB_LAST &&
flags != DB_NEXT && flags != DB_PREV && flags != DB_SET)
return (__db_ferr(dblp->dbenv, "log_get", 1));
if (F_ISSET(dblp, DB_AM_THREAD)) {
if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT)
return (__db_ferr(dblp->dbenv, "log_get", 1));
if (!F_ISSET(dbt, DB_DBT_USERMEM | DB_DBT_MALLOC))
return (__db_ferr(dblp->dbenv, "threaded data", 1));
}
LOCK_LOGREGION(dblp);
/*
* If we get one of the log's header records, repeat the operation.
* This assumes that applications don't ever request the log header
* records by LSN, but that seems reasonable to me.
*/
ret = __log_get(dblp, alsn, dbt, flags, 0);
if (ret == 0 && alsn->offset == 0) {
switch (flags) {
case DB_FIRST:
flags = DB_NEXT;
break;
case DB_LAST:
flags = DB_PREV;
break;
}
ret = __log_get(dblp, alsn, dbt, flags, 0);
}
UNLOCK_LOGREGION(dblp);
return (ret);
}
/*
* __log_get --
* Get a log record; internal version.
*
* PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int));
*/
int
__log_get(dblp, alsn, dbt, flags, silent)
DB_LOG *dblp;
DB_LSN *alsn;
DBT *dbt;
u_int32_t flags;
int silent;
{
DB_LSN nlsn;
HDR hdr;
LOG *lp;
size_t len;
ssize_t nr;
int cnt, ret;
char *np, *tbuf;
const char *fail;
void *p, *shortp;
lp = dblp->lp;
fail = np = tbuf = NULL;
nlsn = dblp->c_lsn;
switch (flags) {
case DB_CHECKPOINT:
nlsn = lp->chkpt_lsn;
if (IS_ZERO_LSN(nlsn)) {
__db_err(dblp->dbenv,
"log_get: unable to find checkpoint record: no checkpoint set.");
ret = ENOENT;
goto err2;
}
break;
case DB_NEXT: /* Next log record. */
if (!IS_ZERO_LSN(nlsn)) {
/* Increment the cursor by the cursor record size. */
nlsn.offset += dblp->c_len;
break;
}
/* FALLTHROUGH */
case DB_FIRST: /* Find the first log record. */
/* Find the first log file. */
if ((ret = __log_find(dblp, 1, &cnt)) != 0)
goto err2;
/*
* We may have only entered records in the buffer, and not
* yet written a log file. If no log files were found and
* there's anything in the buffer, it belongs to file 1.
*/
if (cnt == 0)
cnt = 1;
nlsn.file = cnt;
nlsn.offset = 0;
break;
case DB_CURRENT: /* Current log record. */
break;
case DB_PREV: /* Previous log record. */
if (!IS_ZERO_LSN(nlsn)) {
/* If at start-of-file, move to the previous file. */
if (nlsn.offset == 0) {
if (nlsn.file == 1 ||
__log_valid(dblp, nlsn.file - 1, 0) != 0)
return (DB_NOTFOUND);
--nlsn.file;
nlsn.offset = dblp->c_off;
} else
nlsn.offset = dblp->c_off;
break;
}
/* FALLTHROUGH */
case DB_LAST: /* Last log record. */
nlsn.file = lp->lsn.file;
nlsn.offset = lp->lsn.offset - lp->len;
break;
case DB_SET: /* Set log record. */
nlsn = *alsn;
break;
}
retry:
/* Return 1 if the request is past end-of-file. */
if (nlsn.file > lp->lsn.file ||
(nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset))
return (DB_NOTFOUND);
/* If we've switched files, discard the current fd. */
if (dblp->c_lsn.file != nlsn.file && dblp->c_fd != -1) {
(void)__os_close(dblp->c_fd);
dblp->c_fd = -1;
}
/* If the entire record is in the in-memory buffer, copy it out. */
if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) {
/* Copy the header. */
p = lp->buf + (nlsn.offset - lp->w_off);
memcpy(&hdr, p, sizeof(HDR));
/* Copy the record. */
len = hdr.len - sizeof(HDR);
if ((ret = __db_retcopy(dbt, (u_int8_t *)p + sizeof(HDR),
len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
goto err1;
goto cksum;
}
/* Acquire a file descriptor. */
if (dblp->c_fd == -1) {
if ((ret = __log_name(dblp, nlsn.file,
&np, &dblp->c_fd, DB_RDONLY | DB_SEQUENTIAL)) != 0) {
fail = np;
goto err1;
}
__os_freestr(np);
np = NULL;
}
/* Seek to the header offset and read the header. */
if ((ret =
__os_seek(dblp->c_fd, 0, 0, nlsn.offset, 0, SEEK_SET)) != 0) {
fail = "seek";
goto err1;
}
if ((ret = __os_read(dblp->c_fd, &hdr, sizeof(HDR), &nr)) != 0) {
fail = "read";
goto err1;
}
if (nr == sizeof(HDR))
shortp = NULL;
else {
/* If read returns EOF, try the next file. */
if (nr == 0) {
if (flags != DB_NEXT || nlsn.file == lp->lsn.file)
goto corrupt;
/* Move to the next file. */
++nlsn.file;
nlsn.offset = 0;
goto retry;
}
/*
* If read returns a short count the rest of the record has
* to be in the in-memory buffer.
*/
if (lp->b_off < sizeof(HDR) - nr)
goto corrupt;
/* Get the rest of the header from the in-memory buffer. */
memcpy((u_int8_t *)&hdr + nr, lp->buf, sizeof(HDR) - nr);
shortp = lp->buf + (sizeof(HDR) - nr);
}
/*
* Check for buffers of 0's, that's what we usually see during
* recovery, although it's certainly not something on which we
* can depend.
*/
if (hdr.len <= sizeof(HDR))
goto corrupt;
len = hdr.len - sizeof(HDR);
/* If we've already moved to the in-memory buffer, fill from there. */
if (shortp != NULL) {
if (lp->b_off < ((u_int8_t *)shortp - lp->buf) + len)
goto corrupt;
if ((ret = __db_retcopy(dbt, shortp, len,
&dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
goto err1;
goto cksum;
}
/*
* Allocate temporary memory to hold the record.
*
* XXX
* We're calling malloc(3) with a region locked. This isn't
* a good idea.
*/
if ((ret = __os_malloc(len, NULL, &tbuf)) != 0)
goto err1;
/*
* Read the record into the buffer. If read returns a short count,
* there was an error or the rest of the record is in the in-memory
* buffer. Note, the information may be garbage if we're in recovery,
* so don't read past the end of the buffer's memory.
*/
if ((ret = __os_read(dblp->c_fd, tbuf, len, &nr)) != 0) {
fail = "read";
goto err1;
}
if (len - nr > sizeof(lp->buf))
goto corrupt;
if (nr != (ssize_t)len) {
if (lp->b_off < len - nr)
goto corrupt;
/* Get the rest of the record from the in-memory buffer. */
memcpy((u_int8_t *)tbuf + nr, lp->buf, len - nr);
}
/* Copy the record into the user's DBT. */
if ((ret = __db_retcopy(dbt, tbuf, len,
&dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
goto err1;
__os_free(tbuf, 0);
tbuf = NULL;
cksum: if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) {
if (!silent)
__db_err(dblp->dbenv, "log_get: checksum mismatch");
goto corrupt;
}
/* Update the cursor and the return lsn. */
dblp->c_off = hdr.prev;
dblp->c_len = hdr.len;
dblp->c_lsn = *alsn = nlsn;
return (0);
corrupt:/*
* This is the catchall -- for some reason we didn't find enough
* information or it wasn't reasonable information, and it wasn't
* because a system call failed.
*/
ret = EIO;
fail = "read";
err1: if (!silent)
if (fail == NULL)
__db_err(dblp->dbenv, "log_get: %s", strerror(ret));
else
__db_err(dblp->dbenv,
"log_get: %s: %s", fail, strerror(ret));
err2: if (np != NULL)
__os_freestr(np);
if (tbuf != NULL)
__os_free(tbuf, 0);
return (ret);
}