db/log/log_get.c

1N/A/*-
1N/A * See the file LICENSE for redistribution information.
1N/A *
1N/A * Copyright (c) 1996, 1997, 1998
1N/A *  Sleepycat Software.  All rights reserved.
1N/A */
1N/A#include "config.h"
1N/A
1N/A#ifndef lint
1N/Astatic const char sccsid[] = "@(#)log_get.c 10.38 (Sleepycat) 10/3/98";
1N/A#endif /* not lint */
1N/A
1N/A#ifndef NO_SYSTEM_INCLUDES
1N/A#include <sys/types.h>
1N/A
1N/A#include <errno.h>
1N/A#include <string.h>
1N/A#include <unistd.h>
1N/A#endif
1N/A
1N/A#include "db_int.h"
1N/A#include "shqueue.h"
1N/A#include "db_page.h"
1N/A#include "log.h"
1N/A#include "hash.h"
1N/A#include "common_ext.h"
1N/A
1N/A/*
1N/A * log_get --
1N/A *  Get a log record.
1N/A */
1N/Aint
1N/Alog_get(dblp, alsn, dbt, flags)
1N/A    DB_LOG *dblp;
1N/A    DB_LSN *alsn;
1N/A    DBT *dbt;
1N/A    u_int32_t flags;
1N/A{
1N/A    int ret;
1N/A
1N/A    LOG_PANIC_CHECK(dblp);
1N/A
1N/A    /* Validate arguments. */
1N/A    if (flags != DB_CHECKPOINT && flags != DB_CURRENT &&
1N/A        flags != DB_FIRST && flags != DB_LAST &&
1N/A        flags != DB_NEXT && flags != DB_PREV && flags != DB_SET)
1N/A        return (__db_ferr(dblp->dbenv, "log_get", 1));
1N/A
1N/A    if (F_ISSET(dblp, DB_AM_THREAD)) {
1N/A        if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT)
1N/A            return (__db_ferr(dblp->dbenv, "log_get", 1));
1N/A        if (!F_ISSET(dbt, DB_DBT_USERMEM | DB_DBT_MALLOC))
1N/A            return (__db_ferr(dblp->dbenv, "threaded data", 1));
1N/A    }
1N/A
1N/A    LOCK_LOGREGION(dblp);
1N/A
1N/A    /*
1N/A     * If we get one of the log's header records, repeat the operation.
1N/A     * This assumes that applications don't ever request the log header
1N/A     * records by LSN, but that seems reasonable to me.
1N/A     */
1N/A    ret = __log_get(dblp, alsn, dbt, flags, 0);
1N/A    if (ret == 0 && alsn->offset == 0) {
1N/A        switch (flags) {
1N/A        case DB_FIRST:
1N/A            flags = DB_NEXT;
1N/A            break;
1N/A        case DB_LAST:
1N/A            flags = DB_PREV;
1N/A            break;
1N/A        }
1N/A        ret = __log_get(dblp, alsn, dbt, flags, 0);
1N/A    }
1N/A
1N/A    UNLOCK_LOGREGION(dblp);
1N/A
1N/A    return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __log_get --
1N/A *  Get a log record; internal version.
1N/A *
1N/A * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int));
1N/A */
1N/Aint
1N/A__log_get(dblp, alsn, dbt, flags, silent)
1N/A    DB_LOG *dblp;
1N/A    DB_LSN *alsn;
1N/A    DBT *dbt;
1N/A    u_int32_t flags;
1N/A    int silent;
1N/A{
1N/A    DB_LSN nlsn;
1N/A    HDR hdr;
1N/A    LOG *lp;
1N/A    size_t len;
1N/A    ssize_t nr;
1N/A    int cnt, ret;
1N/A    char *np, *tbuf;
1N/A    const char *fail;
1N/A    void *p, *shortp;
1N/A
1N/A    lp = dblp->lp;
1N/A    fail = np = tbuf = NULL;
1N/A
1N/A    nlsn = dblp->c_lsn;
1N/A    switch (flags) {
1N/A    case DB_CHECKPOINT:
1N/A        nlsn = lp->chkpt_lsn;
1N/A        if (IS_ZERO_LSN(nlsn)) {
1N/A            __db_err(dblp->dbenv,
1N/A    "log_get: unable to find checkpoint record: no checkpoint set.");
1N/A            ret = ENOENT;
1N/A            goto err2;
1N/A        }
1N/A        break;
1N/A    case DB_NEXT:               /* Next log record. */
1N/A        if (!IS_ZERO_LSN(nlsn)) {
1N/A            /* Increment the cursor by the cursor record size. */
1N/A            nlsn.offset += dblp->c_len;
1N/A            break;
1N/A        }
1N/A        /* FALLTHROUGH */
1N/A    case DB_FIRST:              /* Find the first log record. */
1N/A        /* Find the first log file. */
1N/A        if ((ret = __log_find(dblp, 1, &cnt)) != 0)
1N/A            goto err2;
1N/A
1N/A        /*
1N/A         * We may have only entered records in the buffer, and not
1N/A         * yet written a log file.  If no log files were found and
1N/A         * there's anything in the buffer, it belongs to file 1.
1N/A         */
1N/A        if (cnt == 0)
1N/A            cnt = 1;
1N/A
1N/A        nlsn.file = cnt;
1N/A        nlsn.offset = 0;
1N/A        break;
1N/A    case DB_CURRENT:            /* Current log record. */
1N/A        break;
1N/A    case DB_PREV:               /* Previous log record. */
1N/A        if (!IS_ZERO_LSN(nlsn)) {
1N/A            /* If at start-of-file, move to the previous file. */
1N/A            if (nlsn.offset == 0) {
1N/A                if (nlsn.file == 1 ||
1N/A                    __log_valid(dblp, nlsn.file - 1, 0) != 0)
1N/A                    return (DB_NOTFOUND);
1N/A
1N/A                --nlsn.file;
1N/A                nlsn.offset = dblp->c_off;
1N/A            } else
1N/A                nlsn.offset = dblp->c_off;
1N/A            break;
1N/A        }
1N/A        /* FALLTHROUGH */
1N/A    case DB_LAST:               /* Last log record. */
1N/A        nlsn.file = lp->lsn.file;
1N/A        nlsn.offset = lp->lsn.offset - lp->len;
1N/A        break;
1N/A    case DB_SET:                /* Set log record. */
1N/A        nlsn = *alsn;
1N/A        break;
1N/A    }
1N/A
1N/Aretry:
1N/A    /* Return 1 if the request is past end-of-file. */
1N/A    if (nlsn.file > lp->lsn.file ||
1N/A        (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset))
1N/A        return (DB_NOTFOUND);
1N/A
1N/A    /* If we've switched files, discard the current fd. */
1N/A    if (dblp->c_lsn.file != nlsn.file && dblp->c_fd != -1) {
1N/A        (void)__os_close(dblp->c_fd);
1N/A        dblp->c_fd = -1;
1N/A    }
1N/A
1N/A    /* If the entire record is in the in-memory buffer, copy it out. */
1N/A    if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) {
1N/A        /* Copy the header. */
1N/A        p = lp->buf + (nlsn.offset - lp->w_off);
1N/A        memcpy(&hdr, p, sizeof(HDR));
1N/A
1N/A        /* Copy the record. */
1N/A        len = hdr.len - sizeof(HDR);
1N/A        if ((ret = __db_retcopy(dbt, (u_int8_t *)p + sizeof(HDR),
1N/A            len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
1N/A            goto err1;
1N/A        goto cksum;
1N/A    }
1N/A
1N/A    /* Acquire a file descriptor. */
1N/A    if (dblp->c_fd == -1) {
1N/A        if ((ret = __log_name(dblp, nlsn.file,
1N/A            &np, &dblp->c_fd, DB_RDONLY | DB_SEQUENTIAL)) != 0) {
1N/A            fail = np;
1N/A            goto err1;
1N/A        }
1N/A        __os_freestr(np);
1N/A        np = NULL;
1N/A    }
1N/A
1N/A    /* Seek to the header offset and read the header. */
1N/A    if ((ret =
1N/A        __os_seek(dblp->c_fd, 0, 0, nlsn.offset, 0, SEEK_SET)) != 0) {
1N/A        fail = "seek";
1N/A        goto err1;
1N/A    }
1N/A    if ((ret = __os_read(dblp->c_fd, &hdr, sizeof(HDR), &nr)) != 0) {
1N/A        fail = "read";
1N/A        goto err1;
1N/A    }
1N/A    if (nr == sizeof(HDR))
1N/A        shortp = NULL;
1N/A    else {
1N/A        /* If read returns EOF, try the next file. */
1N/A        if (nr == 0) {
1N/A            if (flags != DB_NEXT || nlsn.file == lp->lsn.file)
1N/A                goto corrupt;
1N/A
1N/A            /* Move to the next file. */
1N/A            ++nlsn.file;
1N/A            nlsn.offset = 0;
1N/A            goto retry;
1N/A        }
1N/A
1N/A        /*
1N/A         * If read returns a short count the rest of the record has
1N/A         * to be in the in-memory buffer.
1N/A         */
1N/A        if (lp->b_off < sizeof(HDR) - nr)
1N/A            goto corrupt;
1N/A
1N/A        /* Get the rest of the header from the in-memory buffer. */
1N/A        memcpy((u_int8_t *)&hdr + nr, lp->buf, sizeof(HDR) - nr);
1N/A        shortp = lp->buf + (sizeof(HDR) - nr);
1N/A    }
1N/A
1N/A    /*
1N/A     * Check for buffers of 0's, that's what we usually see during
1N/A     * recovery, although it's certainly not something on which we
1N/A     * can depend.
1N/A     */
1N/A    if (hdr.len <= sizeof(HDR))
1N/A        goto corrupt;
1N/A    len = hdr.len - sizeof(HDR);
1N/A
1N/A    /* If we've already moved to the in-memory buffer, fill from there. */
1N/A    if (shortp != NULL) {
1N/A        if (lp->b_off < ((u_int8_t *)shortp - lp->buf) + len)
1N/A            goto corrupt;
1N/A        if ((ret = __db_retcopy(dbt, shortp, len,
1N/A            &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
1N/A            goto err1;
1N/A        goto cksum;
1N/A    }
1N/A
1N/A    /*
1N/A     * Allocate temporary memory to hold the record.
1N/A     *
1N/A     * XXX
1N/A     * We're calling malloc(3) with a region locked.  This isn't
1N/A     * a good idea.
1N/A     */
1N/A    if ((ret = __os_malloc(len, NULL, &tbuf)) != 0)
1N/A        goto err1;
1N/A
1N/A    /*
1N/A     * Read the record into the buffer.  If read returns a short count,
1N/A     * there was an error or the rest of the record is in the in-memory
1N/A     * buffer.  Note, the information may be garbage if we're in recovery,
1N/A     * so don't read past the end of the buffer's memory.
1N/A     */
1N/A    if ((ret = __os_read(dblp->c_fd, tbuf, len, &nr)) != 0) {
1N/A        fail = "read";
1N/A        goto err1;
1N/A    }
1N/A    if (len - nr > sizeof(lp->buf))
1N/A        goto corrupt;
1N/A    if (nr != (ssize_t)len) {
1N/A        if (lp->b_off < len - nr)
1N/A            goto corrupt;
1N/A
1N/A        /* Get the rest of the record from the in-memory buffer. */
1N/A        memcpy((u_int8_t *)tbuf + nr, lp->buf, len - nr);
1N/A    }
1N/A
1N/A    /* Copy the record into the user's DBT. */
1N/A    if ((ret = __db_retcopy(dbt, tbuf, len,
1N/A        &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
1N/A        goto err1;
1N/A    __os_free(tbuf, 0);
1N/A    tbuf = NULL;
1N/A
1N/Acksum:  if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) {
1N/A        if (!silent)
1N/A            __db_err(dblp->dbenv, "log_get: checksum mismatch");
1N/A        goto corrupt;
1N/A    }
1N/A
1N/A    /* Update the cursor and the return lsn. */
1N/A    dblp->c_off = hdr.prev;
1N/A    dblp->c_len = hdr.len;
1N/A    dblp->c_lsn = *alsn = nlsn;
1N/A
1N/A    return (0);
1N/A
1N/Acorrupt:/*
1N/A     * This is the catchall -- for some reason we didn't find enough
1N/A     * information or it wasn't reasonable information, and it wasn't
1N/A     * because a system call failed.
1N/A     */
1N/A    ret = EIO;
1N/A    fail = "read";
1N/A
1N/Aerr1:   if (!silent)
1N/A        if (fail == NULL)
1N/A            __db_err(dblp->dbenv, "log_get: %s", strerror(ret));
1N/A        else
1N/A            __db_err(dblp->dbenv,
1N/A                "log_get: %s: %s", fail, strerror(ret));
1N/Aerr2:   if (np != NULL)
1N/A        __os_freestr(np);
1N/A    if (tbuf != NULL)
1N/A        __os_free(tbuf, 0);
1N/A    return (ret);
1N/A}