db/btree/bt_recno.c

1N/A/*-
1N/A * See the file LICENSE for redistribution information.
1N/A *
1N/A * Copyright (c) 1997, 1998
1N/A *  Sleepycat Software.  All rights reserved.
1N/A */
1N/A
1N/A#include "config.h"
1N/A
1N/A#ifndef lint
1N/Astatic const char sccsid[] = "@(#)bt_recno.c    10.53 (Sleepycat) 12/11/98";
1N/A#endif /* not lint */
1N/A
1N/A#ifndef NO_SYSTEM_INCLUDES
1N/A#include <sys/types.h>
1N/A
1N/A#include <errno.h>
1N/A#include <limits.h>
1N/A#include <string.h>
1N/A#endif
1N/A
1N/A#include "db_int.h"
1N/A#include "db_page.h"
1N/A#include "btree.h"
1N/A#include "db_ext.h"
1N/A#include "shqueue.h"
1N/A#include "db_shash.h"
1N/A#include "lock.h"
1N/A#include "lock_ext.h"
1N/A
1N/Astatic int __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t));
1N/Astatic int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
1N/Astatic int __ram_fmap __P((DBC *, db_recno_t));
1N/Astatic int __ram_i_delete __P((DBC *));
1N/Astatic int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
1N/Astatic int __ram_source __P((DB *, RECNO *, const char *));
1N/Astatic int __ram_sync __P((DB *, u_int32_t));
1N/Astatic int __ram_update __P((DBC *, db_recno_t, int));
1N/Astatic int __ram_vmap __P((DBC *, db_recno_t));
1N/Astatic int __ram_writeback __P((DBC *));
1N/A
1N/A/*
1N/A * In recno, there are two meanings to the on-page "deleted" flag.  If we're
1N/A * re-numbering records, it means the record was implicitly created.  We skip
1N/A * over implicitly created records if doing a cursor "next" or "prev", and
1N/A * return DB_KEYEMPTY if they're explicitly requested..  If not re-numbering
1N/A * records, it means that the record was implicitly created, or was deleted.
1N/A * We skip over implicitly created or deleted records if doing a cursor "next"
1N/A * or "prev", and return DB_KEYEMPTY if they're explicitly requested.
1N/A *
1N/A * If we're re-numbering records, then we have to detect in the cursor that
1N/A * a record was deleted, and adjust the cursor as necessary on the next get.
1N/A * If we're not re-numbering records, then we can detect that a record has
1N/A * been deleted by looking at the actual on-page record, so we completely
1N/A * ignore the cursor's delete flag.  This is different from the B+tree code.
1N/A * It also maintains whether the cursor references a deleted record in the
1N/A * cursor, and it doesn't always check the on-page value.
1N/A */
1N/A#define CD_SET(dbp, cp) {                       \
1N/A    if (F_ISSET(dbp, DB_RE_RENUMBER))               \
1N/A        F_SET(cp, C_DELETED);                   \
1N/A}
1N/A#define CD_CLR(dbp, cp) {                       \
1N/A    if (F_ISSET(dbp, DB_RE_RENUMBER))               \
1N/A        F_CLR(cp, C_DELETED);                   \
1N/A}
1N/A#define CD_ISSET(dbp, cp)                       \
1N/A    (F_ISSET(dbp, DB_RE_RENUMBER) && F_ISSET(cp, C_DELETED))
1N/A
1N/A/*
1N/A * __ram_open --
1N/A *  Recno open function.
1N/A *
1N/A * PUBLIC: int __ram_open __P((DB *, DB_INFO *));
1N/A */
1N/Aint
1N/A__ram_open(dbp, dbinfo)
1N/A    DB *dbp;
1N/A    DB_INFO *dbinfo;
1N/A{
1N/A    BTREE *t;
1N/A    DBC *dbc;
1N/A    RECNO *rp;
1N/A    int ret, t_ret;
1N/A
1N/A    /* Allocate and initialize the private btree structure. */
1N/A    if ((ret = __os_calloc(1, sizeof(BTREE), &t)) != 0)
1N/A        return (ret);
1N/A    dbp->internal = t;
1N/A    __bam_setovflsize(dbp);
1N/A
1N/A    /* Allocate and initialize the private recno structure. */
1N/A    if ((ret = __os_calloc(1, sizeof(*rp), &rp)) != 0)
1N/A        return (ret);
1N/A    /* Link in the private recno structure. */
1N/A    t->recno = rp;
1N/A
1N/A    /*
1N/A     * Intention is to make sure all of the user's selections are okay
1N/A     * here and then use them without checking.
1N/A     */
1N/A    if (dbinfo == NULL) {
1N/A        rp->re_delim = '\n';
1N/A        rp->re_pad = ' ';
1N/A        rp->re_fd = -1;
1N/A        F_SET(rp, RECNO_EOF);
1N/A    } else {
1N/A        /*
1N/A         * If the user specified a source tree, open it and map it in.
1N/A         *
1N/A         * !!!
1N/A         * We don't complain if the user specified transactions or
1N/A         * threads.  It's possible to make it work, but you'd better
1N/A         * know what you're doing!
1N/A         */
1N/A        if (dbinfo->re_source == NULL) {
1N/A            rp->re_fd = -1;
1N/A            F_SET(rp, RECNO_EOF);
1N/A        } else {
1N/A            if ((ret =
1N/A                __ram_source(dbp, rp, dbinfo->re_source)) != 0)
1N/A            goto err;
1N/A        }
1N/A
1N/A        /* Copy delimiter, length and padding values. */
1N/A        rp->re_delim =
1N/A            F_ISSET(dbp, DB_RE_DELIMITER) ? dbinfo->re_delim : '\n';
1N/A        rp->re_pad = F_ISSET(dbp, DB_RE_PAD) ? dbinfo->re_pad : ' ';
1N/A
1N/A        if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
1N/A            if ((rp->re_len = dbinfo->re_len) == 0) {
1N/A                __db_err(dbp->dbenv,
1N/A                    "record length must be greater than 0");
1N/A                ret = EINVAL;
1N/A                goto err;
1N/A            }
1N/A        } else
1N/A            rp->re_len = 0;
1N/A    }
1N/A
1N/A    /* Initialize the remaining fields/methods of the DB. */
1N/A    dbp->am_close = __ram_close;
1N/A    dbp->del = __ram_delete;
1N/A    dbp->put = __ram_put;
1N/A    dbp->stat = __bam_stat;
1N/A    dbp->sync = __ram_sync;
1N/A
1N/A    /* Start up the tree. */
1N/A    if ((ret = __bam_read_root(dbp)) != 0)
1N/A        goto err;
1N/A
1N/A    /* Set the overflow page size. */
1N/A    __bam_setovflsize(dbp);
1N/A
1N/A    /* If we're snapshotting an underlying source file, do it now. */
1N/A    if (dbinfo != NULL && F_ISSET(dbinfo, DB_SNAPSHOT)) {
1N/A        /* Allocate a cursor. */
1N/A        if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
1N/A            goto err;
1N/A
1N/A        /* Do the snapshot. */
1N/A        if ((ret = __ram_update(dbc,
1N/A            DB_MAX_RECORDS, 0)) != 0 && ret == DB_NOTFOUND)
1N/A            ret = 0;
1N/A
1N/A        /* Discard the cursor. */
1N/A        if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
1N/A            ret = t_ret;
1N/A
1N/A        if (ret != 0)
1N/A            goto err;
1N/A    }
1N/A
1N/A    return (0);
1N/A
1N/Aerr:    /* If we mmap'd a source file, discard it. */
1N/A    if (rp->re_smap != NULL)
1N/A        (void)__db_unmapfile(rp->re_smap, rp->re_msize);
1N/A
1N/A    /* If we opened a source file, discard it. */
1N/A    if (rp->re_fd != -1)
1N/A        (void)__os_close(rp->re_fd);
1N/A    if (rp->re_source != NULL)
1N/A        __os_freestr(rp->re_source);
1N/A
1N/A    __os_free(rp, sizeof(*rp));
1N/A
1N/A    return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __ram_delete --
1N/A *  Recno db->del function.
1N/A */
1N/Astatic int
1N/A__ram_delete(dbp, txn, key, flags)
1N/A    DB *dbp;
1N/A    DB_TXN *txn;
1N/A    DBT *key;
1N/A    u_int32_t flags;
1N/A{
1N/A    CURSOR *cp;
1N/A    DBC *dbc;
1N/A    db_recno_t recno;
1N/A    int ret, t_ret;
1N/A
1N/A    DB_PANIC_CHECK(dbp);
1N/A
1N/A    /* Check for invalid flags. */
1N/A    if ((ret = __db_delchk(dbp,
1N/A        key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
1N/A        return (ret);
1N/A
1N/A    /* Acquire a cursor. */
1N/A    if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
1N/A        return (ret);
1N/A
1N/A    DEBUG_LWRITE(dbc, txn, "ram_delete", key, NULL, flags);
1N/A
1N/A    /* Check the user's record number and fill in as necessary. */
1N/A    if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0)
1N/A        goto err;
1N/A
1N/A    /* Do the delete. */
1N/A    cp = dbc->internal;
1N/A    cp->recno = recno;
1N/A    ret = __ram_i_delete(dbc);
1N/A
1N/A    /* Release the cursor. */
1N/Aerr:    if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
1N/A        ret = t_ret;
1N/A
1N/A    return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __ram_i_delete --
1N/A *  Internal version of recno delete, called by __ram_delete and
1N/A *  __ram_c_del.
1N/A */
1N/Astatic int
1N/A__ram_i_delete(dbc)
1N/A    DBC *dbc;
1N/A{
1N/A    BKEYDATA bk;
1N/A    BTREE *t;
1N/A    CURSOR *cp;
1N/A    DB *dbp;
1N/A    DBT hdr, data;
1N/A    PAGE *h;
1N/A    db_indx_t indx;
1N/A    int exact, ret, stack;
1N/A
1N/A    dbp = dbc->dbp;
1N/A    cp = dbc->internal;
1N/A    t = dbp->internal;
1N/A    stack = 0;
1N/A
1N/A    /*
1N/A     * If this is CDB and this isn't a write cursor, then it's an error.
1N/A     * If it is a write cursor, but we don't yet hold the write lock, then
1N/A     * we need to upgrade to the write lock.
1N/A     */
1N/A    if (F_ISSET(dbp, DB_AM_CDB)) {
1N/A        /* Make sure it's a valid update cursor. */
1N/A        if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
1N/A            return (EINVAL);
1N/A
1N/A        if (F_ISSET(dbc, DBC_RMW) &&
1N/A            (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
1N/A            DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
1N/A            &dbc->mylock)) != 0)
1N/A            return (EAGAIN);
1N/A    }
1N/A
1N/A    /* Search the tree for the key; delete only deletes exact matches. */
1N/A    if ((ret = __bam_rsearch(dbc, &cp->recno, S_DELETE, 1, &exact)) != 0)
1N/A        goto err;
1N/A    if (!exact) {
1N/A        ret = DB_NOTFOUND;
1N/A        goto err;
1N/A    }
1N/A    stack = 1;
1N/A
1N/A    h = cp->csp->page;
1N/A    indx = cp->csp->indx;
1N/A
1N/A    /*
1N/A     * If re-numbering records, the on-page deleted flag can only mean
1N/A     * that this record was implicitly created.  Applications aren't
1N/A     * permitted to delete records they never created, return an error.
1N/A     *
1N/A     * If not re-numbering records, the on-page deleted flag means that
1N/A     * this record was implicitly created, or, was deleted at some time.
1N/A     * The former is an error because applications aren't permitted to
1N/A     * delete records they never created, the latter is an error because
1N/A     * if the record was "deleted", we could never have found it.
1N/A     */
1N/A    if (B_DISSET(GET_BKEYDATA(h, indx)->type)) {
1N/A        ret = DB_KEYEMPTY;
1N/A        goto err;
1N/A    }
1N/A
1N/A    if (F_ISSET(dbp, DB_RE_RENUMBER)) {
1N/A        /* Delete the item, adjust the counts, adjust the cursors. */
1N/A        if ((ret = __bam_ditem(dbc, h, indx)) != 0)
1N/A            goto err;
1N/A        __bam_adjust(dbc, -1);
1N/A        __ram_ca(dbp, cp->recno, CA_DELETE);
1N/A
1N/A        /*
1N/A         * If the page is empty, delete it.   The whole tree is locked
1N/A         * so there are no preparations to make.
1N/A         */
1N/A        if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) {
1N/A            stack = 0;
1N/A            ret = __bam_dpages(dbc);
1N/A        }
1N/A    } else {
1N/A        /* Use a delete/put pair to replace the record with a marker. */
1N/A        if ((ret = __bam_ditem(dbc, h, indx)) != 0)
1N/A            goto err;
1N/A
1N/A        B_TSET(bk.type, B_KEYDATA, 1);
1N/A        bk.len = 0;
1N/A        memset(&hdr, 0, sizeof(hdr));
1N/A        hdr.data = &bk;
1N/A        hdr.size = SSZA(BKEYDATA, data);
1N/A        memset(&data, 0, sizeof(data));
1N/A        data.data = (char *)"";
1N/A        data.size = 0;
1N/A        if ((ret = __db_pitem(dbc,
1N/A            h, indx, BKEYDATA_SIZE(0), &hdr, &data)) != 0)
1N/A            goto err;
1N/A    }
1N/A    F_SET(t->recno, RECNO_MODIFIED);
1N/A
1N/Aerr:    if (stack)
1N/A        __bam_stkrel(dbc, 0);
1N/A
1N/A    /* If we upgraded the CDB lock upon entry; downgrade it now. */
1N/A    if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
1N/A        (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
1N/A            DB_LOCK_IWRITE, 0);
1N/A    return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __ram_put --
1N/A *  Recno db->put function.
1N/A */
1N/Astatic int
1N/A__ram_put(dbp, txn, key, data, flags)
1N/A    DB *dbp;
1N/A    DB_TXN *txn;
1N/A    DBT *key, *data;
1N/A    u_int32_t flags;
1N/A{
1N/A    DBC *dbc;
1N/A    db_recno_t recno;
1N/A    int ret, t_ret;
1N/A
1N/A    DB_PANIC_CHECK(dbp);
1N/A
1N/A    /* Check for invalid flags. */
1N/A    if ((ret = __db_putchk(dbp,
1N/A        key, data, flags, F_ISSET(dbp, DB_AM_RDONLY), 0)) != 0)
1N/A        return (ret);
1N/A
1N/A    /* Allocate a cursor. */
1N/A    if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
1N/A        return (ret);
1N/A
1N/A    DEBUG_LWRITE(dbc, txn, "ram_put", key, data, flags);
1N/A
1N/A    /*
1N/A     * If we're appending to the tree, make sure we've read in all of
1N/A     * the backing source file.  Otherwise, check the user's record
1N/A     * number and fill in as necessary.
1N/A     */
1N/A    ret = flags == DB_APPEND ?
1N/A        __ram_update(dbc, DB_MAX_RECORDS, 0) :
1N/A        __ram_getno(dbc, key, &recno, 1);
1N/A
1N/A    /* Add the record. */
1N/A    if (ret == 0)
1N/A        ret = __ram_add(dbc, &recno, data, flags, 0);
1N/A
1N/A    /* Discard the cursor. */
1N/A    if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
1N/A        ret = t_ret;
1N/A
1N/A    /* Return the record number if we're appending to the tree. */
1N/A    if (ret == 0 && flags == DB_APPEND)
1N/A        *(db_recno_t *)key->data = recno;
1N/A
1N/A    return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __ram_sync --
1N/A *  Recno db->sync function.
1N/A */
1N/Astatic int
1N/A__ram_sync(dbp, flags)
1N/A    DB *dbp;
1N/A    u_int32_t flags;
1N/A{
1N/A    DBC *dbc;
1N/A    int ret, t_ret;
1N/A
1N/A    /*
1N/A     * Sync the underlying btree.
1N/A     *
1N/A     * !!!
1N/A     * We don't need to do a panic check or flags check, the "real"
1N/A     * sync function does all that for us.
1N/A     */
1N/A    if ((ret = __db_sync(dbp, flags)) != 0)
1N/A        return (ret);
1N/A
1N/A    /* Allocate a cursor. */
1N/A    if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
1N/A        return (ret);
1N/A
1N/A    DEBUG_LWRITE(dbc, NULL, "ram_sync", NULL, NULL, flags);
1N/A
1N/A    /* Copy back the backing source file. */
1N/A    ret = __ram_writeback(dbc);
1N/A
1N/A    /* Discard the cursor. */
1N/A    if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
1N/A        ret = t_ret;
1N/A
1N/A    return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __ram_close --
1N/A *  Recno db->close function.
1N/A *
1N/A * PUBLIC: int __ram_close __P((DB *));
1N/A */
1N/Aint
1N/A__ram_close(dbp)
1N/A    DB *dbp;
1N/A{
1N/A    RECNO *rp;
1N/A
1N/A    rp = ((BTREE *)dbp->internal)->recno;
1N/A
1N/A    /* Close any underlying mmap region. */
1N/A    if (rp->re_smap != NULL)
1N/A        (void)__db_unmapfile(rp->re_smap, rp->re_msize);
1N/A
1N/A    /* Close any backing source file descriptor. */
1N/A    if (rp->re_fd != -1)
1N/A        (void)__os_close(rp->re_fd);
1N/A
1N/A    /* Free any backing source file name. */
1N/A    if (rp->re_source != NULL)
1N/A        __os_freestr(rp->re_source);
1N/A
1N/A    /* Free allocated memory. */
1N/A    __os_free(rp, sizeof(RECNO));
1N/A    ((BTREE *)dbp->internal)->recno = NULL;
1N/A
1N/A    /* Close the underlying btree. */
1N/A    return (__bam_close(dbp));
1N/A}
1N/A
1N/A/*
1N/A * __ram_c_del --
1N/A *  Recno cursor->c_del function.
1N/A *
1N/A * PUBLIC: int __ram_c_del __P((DBC *, u_int32_t));
1N/A */
1N/Aint
1N/A__ram_c_del(dbc, flags)
1N/A    DBC *dbc;
1N/A    u_int32_t flags;
1N/A{
1N/A    CURSOR *cp;
1N/A    DB *dbp;
1N/A    int ret;
1N/A
1N/A    dbp = dbc->dbp;
1N/A    cp = dbc->internal;
1N/A
1N/A    DB_PANIC_CHECK(dbp);
1N/A
1N/A    /* Check for invalid flags. */
1N/A    if ((ret = __db_cdelchk(dbp, flags,
1N/A        F_ISSET(dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
1N/A        return (ret);
1N/A
1N/A    DEBUG_LWRITE(dbc, dbc->txn, "ram_c_del", NULL, NULL, flags);
1N/A
1N/A    /*
1N/A     * If we are running CDB, this had better be either a write
1N/A     * cursor or an immediate writer.
1N/A     */
1N/A    if (F_ISSET(dbp, DB_AM_CDB))
1N/A        if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
1N/A            return (EINVAL);
1N/A
1N/A    /*
1N/A     * The semantics of cursors during delete are as follows: if record
1N/A     * numbers are mutable (DB_RE_RENUMBER is set), deleting a record
1N/A     * causes the cursor to automatically point to the record immediately
1N/A     * following.  In this case it is possible to use a single cursor for
1N/A     * repeated delete operations, without intervening operations.
1N/A     *
1N/A     * If record numbers are not mutable, then records are replaced with
1N/A     * a marker containing a delete flag.  If the record referenced by
1N/A     * this cursor has already been deleted, we will detect that as part
1N/A     * of the delete operation, and fail.
1N/A     */
1N/A    return (__ram_i_delete(dbc));
1N/A}
1N/A
1N/A/*
1N/A * __ram_c_get --
1N/A *  Recno cursor->c_get function.
1N/A *
1N/A * PUBLIC: int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
1N/A */
1N/Aint
1N/A__ram_c_get(dbc, key, data, flags)
1N/A    DBC *dbc;
1N/A    DBT *key, *data;
1N/A    u_int32_t flags;
1N/A{
1N/A    CURSOR *cp, copy;
1N/A    DB *dbp;
1N/A    PAGE *h;
1N/A    db_indx_t indx;
1N/A    int exact, ret, stack, tmp_rmw;
1N/A
1N/A    dbp = dbc->dbp;
1N/A    cp = dbc->internal;
1N/A
1N/A    DB_PANIC_CHECK(dbp);
1N/A
1N/A    /* Check for invalid flags. */
1N/A    if ((ret = __db_cgetchk(dbc->dbp,
1N/A        key, data, flags, cp->recno != RECNO_OOB)) != 0)
1N/A        return (ret);
1N/A
1N/A    /* Clear OR'd in additional bits so we can check for flag equality. */
1N/A    tmp_rmw = 0;
1N/A    if (LF_ISSET(DB_RMW)) {
1N/A        if (!F_ISSET(dbp, DB_AM_CDB)) {
1N/A            tmp_rmw = 1;
1N/A            F_SET(dbc, DBC_RMW);
1N/A        }
1N/A        LF_CLR(DB_RMW);
1N/A    }
1N/A
1N/A    DEBUG_LREAD(dbc, dbc->txn, "ram_c_get",
1N/A        flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags);
1N/A
1N/A    /* Initialize the cursor for a new retrieval. */
1N/A    copy = *cp;
1N/A
1N/Aretry:  /* Update the record number. */
1N/A    stack = 0;
1N/A    switch (flags) {
1N/A    case DB_CURRENT:
1N/A        /*
1N/A         * If record numbers are mutable: if we just deleted a record,
1N/A         * there is no action necessary, we return the record following
1N/A         * the deleted item by virtue of renumbering the tree.
1N/A         */
1N/A        break;
1N/A    case DB_NEXT:
1N/A        /*
1N/A         * If record numbers are mutable: if we just deleted a record,
1N/A         * we have to avoid incrementing the record number so that we
1N/A         * return the right record by virtue of renumbering the tree.
1N/A         */
1N/A        if (CD_ISSET(dbp, cp))
1N/A            break;
1N/A
1N/A        if (cp->recno != RECNO_OOB) {
1N/A            ++cp->recno;
1N/A            break;
1N/A        }
1N/A        /* FALLTHROUGH */
1N/A    case DB_FIRST:
1N/A        flags = DB_NEXT;
1N/A        cp->recno = 1;
1N/A        break;
1N/A    case DB_PREV:
1N/A        if (cp->recno != RECNO_OOB) {
1N/A            if (cp->recno == 1) {
1N/A                ret = DB_NOTFOUND;
1N/A                goto err;
1N/A            }
1N/A            --cp->recno;
1N/A            break;
1N/A        }
1N/A        /* FALLTHROUGH */
1N/A    case DB_LAST:
1N/A        flags = DB_PREV;
1N/A        if (((ret = __ram_update(dbc,
1N/A            DB_MAX_RECORDS, 0)) != 0) && ret != DB_NOTFOUND)
1N/A            goto err;
1N/A        if ((ret = __bam_nrecs(dbc, &cp->recno)) != 0)
1N/A            goto err;
1N/A        if (cp->recno == 0) {
1N/A            ret = DB_NOTFOUND;
1N/A            goto err;
1N/A        }
1N/A        break;
1N/A    case DB_SET:
1N/A    case DB_SET_RANGE:
1N/A        if ((ret = __ram_getno(dbc, key, &cp->recno, 0)) != 0)
1N/A            goto err;
1N/A        break;
1N/A    }
1N/A
1N/A    /* Return the key if the user didn't give us one. */
1N/A    if (flags != DB_SET && flags != DB_SET_RANGE &&
1N/A        (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno),
1N/A        &dbc->rkey.data, &dbc->rkey.ulen, dbp->db_malloc)) != 0)
1N/A        goto err;
1N/A
1N/A    /* Search the tree for the record. */
1N/A    if ((ret = __bam_rsearch(dbc, &cp->recno,
1N/A        F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND, 1, &exact)) != 0)
1N/A        goto err;
1N/A    stack = 1;
1N/A    if (!exact) {
1N/A        ret = DB_NOTFOUND;
1N/A        goto err;
1N/A    }
1N/A    h = cp->csp->page;
1N/A    indx = cp->csp->indx;
1N/A
1N/A    /*
1N/A     * If re-numbering records, the on-page deleted flag means this record
1N/A     * was implicitly created.  If not re-numbering records, the on-page
1N/A     * deleted flag means this record was implicitly created, or, it was
1N/A     * deleted at some time.  Regardless, we skip such records if doing
1N/A     * cursor next/prev operations, and fail if the application requested
1N/A     * them explicitly.
1N/A     */
1N/A    if (B_DISSET(GET_BKEYDATA(h, indx)->type)) {
1N/A        if (flags == DB_NEXT || flags == DB_PREV) {
1N/A            (void)__bam_stkrel(dbc, 0);
1N/A            goto retry;
1N/A        }
1N/A        ret = DB_KEYEMPTY;
1N/A        goto err;
1N/A    }
1N/A
1N/A    /* Return the data item. */
1N/A    if ((ret = __db_ret(dbp,
1N/A        h, indx, data, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
1N/A        goto err;
1N/A
1N/A    /* The cursor was reset, no further delete adjustment is necessary. */
1N/A    CD_CLR(dbp, cp);
1N/A
1N/Aerr:    if (stack)
1N/A        (void)__bam_stkrel(dbc, 0);
1N/A
1N/A    /* Release temporary lock upgrade. */
1N/A    if (tmp_rmw)
1N/A        F_CLR(dbc, DBC_RMW);
1N/A
1N/A    if (ret != 0)
1N/A        *cp = copy;
1N/A
1N/A    return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __ram_c_put --
1N/A *  Recno cursor->c_put function.
1N/A *
1N/A * PUBLIC: int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
1N/A */
1N/Aint
1N/A__ram_c_put(dbc, key, data, flags)
1N/A    DBC *dbc;
1N/A    DBT *key, *data;
1N/A    u_int32_t flags;
1N/A{
1N/A    CURSOR *cp, copy;
1N/A    DB *dbp;
1N/A    int exact, ret;
1N/A    void *arg;
1N/A
1N/A    dbp = dbc->dbp;
1N/A    cp = dbc->internal;
1N/A
1N/A    DB_PANIC_CHECK(dbp);
1N/A
1N/A    if ((ret = __db_cputchk(dbc->dbp, key, data, flags,
1N/A        F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
1N/A        return (ret);
1N/A
1N/A    DEBUG_LWRITE(dbc, dbc->txn, "ram_c_put", NULL, data, flags);
1N/A
1N/A    /*
1N/A     * If we are running CDB, this had better be either a write
1N/A     * cursor or an immediate writer.  If it's a regular writer,
1N/A     * that means we have an IWRITE lock and we need to upgrade
1N/A     * it to a write lock.
1N/A     */
1N/A    if (F_ISSET(dbp, DB_AM_CDB)) {
1N/A        if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
1N/A            return (EINVAL);
1N/A
1N/A        if (F_ISSET(dbc, DBC_RMW) &&
1N/A            (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
1N/A            DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
1N/A            &dbc->mylock)) != 0)
1N/A            return (EAGAIN);
1N/A    }
1N/A
1N/A    /* Initialize the cursor for a new retrieval. */
1N/A    copy = *cp;
1N/A
1N/A    /*
1N/A     * To split, we need a valid key for the page.  Since it's a cursor,
1N/A     * we have to build one.
1N/A     *
1N/A     * The split code discards all short-term locks and stack pages.
1N/A     */
1N/A    if (0) {
1N/Asplit:      arg = &cp->recno;
1N/A        if ((ret = __bam_split(dbc, arg)) != 0)
1N/A            goto err;
1N/A    }
1N/A
1N/A    if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
1N/A        goto err;
1N/A    if (!exact) {
1N/A        ret = DB_NOTFOUND;
1N/A        goto err;
1N/A    }
1N/A    if ((ret = __bam_iitem(dbc, &cp->csp->page,
1N/A        &cp->csp->indx, key, data, flags, 0)) == DB_NEEDSPLIT) {
1N/A        if ((ret = __bam_stkrel(dbc, 0)) != 0)
1N/A            goto err;
1N/A        goto split;
1N/A    }
1N/A    if ((ret = __bam_stkrel(dbc, 0)) != 0)
1N/A        goto err;
1N/A
1N/A    switch (flags) {
1N/A    case DB_AFTER:
1N/A        /* Adjust the cursors. */
1N/A        __ram_ca(dbp, cp->recno, CA_IAFTER);
1N/A
1N/A        /* Set this cursor to reference the new record. */
1N/A        cp->recno = copy.recno + 1;
1N/A        break;
1N/A    case DB_BEFORE:
1N/A        /* Adjust the cursors. */
1N/A        __ram_ca(dbp, cp->recno, CA_IBEFORE);
1N/A
1N/A        /* Set this cursor to reference the new record. */
1N/A        cp->recno = copy.recno;
1N/A        break;
1N/A    }
1N/A
1N/A    /* The cursor was reset, no further delete adjustment is necessary. */
1N/A    CD_CLR(dbp, cp);
1N/A
1N/Aerr:    if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
1N/A        (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
1N/A            DB_LOCK_IWRITE, 0);
1N/A
1N/A    if (ret != 0)
1N/A        *cp = copy;
1N/A
1N/A    return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __ram_ca --
1N/A *  Adjust cursors.
1N/A *
1N/A * PUBLIC: void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
1N/A */
1N/Avoid
1N/A__ram_ca(dbp, recno, op)
1N/A    DB *dbp;
1N/A    db_recno_t recno;
1N/A    ca_recno_arg op;
1N/A{
1N/A    CURSOR *cp;
1N/A    DBC *dbc;
1N/A
1N/A    /*
1N/A     * Adjust the cursors.  See the comment in __bam_ca_delete().
1N/A     */
1N/A    DB_THREAD_LOCK(dbp);
1N/A    for (dbc = TAILQ_FIRST(&dbp->active_queue);
1N/A        dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
1N/A        cp = dbc->internal;
1N/A        switch (op) {
1N/A        case CA_DELETE:
1N/A            if (recno > cp->recno)
1N/A                --cp->recno;
1N/A            if (recno == cp->recno)
1N/A                CD_SET(dbp, cp);
1N/A            break;
1N/A        case CA_IAFTER:
1N/A            if (recno > cp->recno)
1N/A                ++cp->recno;
1N/A            break;
1N/A        case CA_IBEFORE:
1N/A            if (recno >= cp->recno)
1N/A                ++cp->recno;
1N/A            break;
1N/A        }
1N/A    }
1N/A    DB_THREAD_UNLOCK(dbp);
1N/A}
1N/A
1N/A/*
1N/A * __ram_getno --
1N/A *  Check the user's record number, and make sure we've seen it.
1N/A *
1N/A * PUBLIC: int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int));
1N/A */
1N/Aint
1N/A__ram_getno(dbc, key, rep, can_create)
1N/A    DBC *dbc;
1N/A    const DBT *key;
1N/A    db_recno_t *rep;
1N/A    int can_create;
1N/A{
1N/A    DB *dbp;
1N/A    db_recno_t recno;
1N/A
1N/A    dbp = dbc->dbp;
1N/A
1N/A    /* Check the user's record number. */
1N/A    if ((recno = *(db_recno_t *)key->data) == 0) {
1N/A        __db_err(dbp->dbenv, "illegal record number of 0");
1N/A        return (EINVAL);
1N/A    }
1N/A    if (rep != NULL)
1N/A        *rep = recno;
1N/A
1N/A    /*
1N/A     * Btree can neither create records nor read them in.  Recno can
1N/A     * do both, see if we can find the record.
1N/A     */
1N/A    return (dbp->type == DB_RECNO ?
1N/A        __ram_update(dbc, recno, can_create) : 0);
1N/A}
1N/A
1N/A/*
1N/A * __ram_update --
1N/A *  Ensure the tree has records up to and including the specified one.
1N/A */
1N/Astatic int
1N/A__ram_update(dbc, recno, can_create)
1N/A    DBC *dbc;
1N/A    db_recno_t recno;
1N/A    int can_create;
1N/A{
1N/A    BTREE *t;
1N/A    DB *dbp;
1N/A    RECNO *rp;
1N/A    db_recno_t nrecs;
1N/A    int ret;
1N/A
1N/A    dbp = dbc->dbp;
1N/A    t = dbp->internal;
1N/A    rp = t->recno;
1N/A
1N/A    /*
1N/A     * If we can't create records and we've read the entire backing input
1N/A     * file, we're done.
1N/A     */
1N/A    if (!can_create && F_ISSET(rp, RECNO_EOF))
1N/A        return (0);
1N/A
1N/A    /*
1N/A     * If we haven't seen this record yet, try to get it from the original
1N/A     * file.
1N/A     */
1N/A    if ((ret = __bam_nrecs(dbc, &nrecs)) != 0)
1N/A        return (ret);
1N/A    if (!F_ISSET(rp, RECNO_EOF) && recno > nrecs) {
1N/A        if ((ret = rp->re_irec(dbc, recno)) != 0)
1N/A            return (ret);
1N/A        if ((ret = __bam_nrecs(dbc, &nrecs)) != 0)
1N/A            return (ret);
1N/A    }
1N/A
1N/A    /*
1N/A     * If we can create records, create empty ones up to the requested
1N/A     * record.
1N/A     */
1N/A    if (!can_create || recno <= nrecs + 1)
1N/A        return (0);
1N/A
1N/A    dbc->rdata.dlen = 0;
1N/A    dbc->rdata.doff = 0;
1N/A    dbc->rdata.flags = 0;
1N/A    if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
1N/A        if (dbc->rdata.ulen < rp->re_len) {
1N/A            if ((ret =
1N/A                __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) {
1N/A                dbc->rdata.ulen = 0;
1N/A                dbc->rdata.data = NULL;
1N/A                return (ret);
1N/A            }
1N/A            dbc->rdata.ulen = rp->re_len;
1N/A        }
1N/A        dbc->rdata.size = rp->re_len;
1N/A        memset(dbc->rdata.data, rp->re_pad, rp->re_len);
1N/A    } else
1N/A        dbc->rdata.size = 0;
1N/A
1N/A    while (recno > ++nrecs)
1N/A        if ((ret = __ram_add(dbc,
1N/A            &nrecs, &dbc->rdata, 0, BI_DELETED)) != 0)
1N/A            return (ret);
1N/A    return (0);
1N/A}
1N/A
1N/A/*
1N/A * __ram_source --
1N/A *  Load information about the backing file.
1N/A */
1N/Astatic int
1N/A__ram_source(dbp, rp, fname)
1N/A    DB *dbp;
1N/A    RECNO *rp;
1N/A    const char *fname;
1N/A{
1N/A    size_t size;
1N/A    u_int32_t bytes, mbytes, oflags;
1N/A    int ret;
1N/A
1N/A    /*
1N/A     * !!!
1N/A     * The caller has full responsibility for cleaning up on error --
1N/A     * (it has to anyway, in case it fails after this routine succeeds).
1N/A     */
1N/A    if ((ret = __db_appname(dbp->dbenv,
1N/A        DB_APP_DATA, NULL, fname, 0, NULL, &rp->re_source)) != 0)
1N/A        return (ret);
1N/A
1N/A    oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0;
1N/A    if ((ret =
1N/A        __db_open(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) {
1N/A        __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
1N/A        return (ret);
1N/A    }
1N/A
1N/A    /*
1N/A     * XXX
1N/A     * We'd like to test to see if the file is too big to mmap.  Since we
1N/A     * don't know what size or type off_t's or size_t's are, or the largest
1N/A     * unsigned integral type is, or what random insanity the local C
1N/A     * compiler will perpetrate, doing the comparison in a portable way is
1N/A     * flatly impossible.  Hope that mmap fails if the file is too large.
1N/A     */
1N/A    if ((ret = __os_ioinfo(rp->re_source,
1N/A        rp->re_fd, &mbytes, &bytes, NULL)) != 0) {
1N/A        __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
1N/A        return (ret);
1N/A    }
1N/A    if (mbytes == 0 && bytes == 0) {
1N/A        F_SET(rp, RECNO_EOF);
1N/A        return (0);
1N/A    }
1N/A
1N/A    size = mbytes * MEGABYTE + bytes;
1N/A    if ((ret = __db_mapfile(rp->re_source,
1N/A        rp->re_fd, (size_t)size, 1, &rp->re_smap)) != 0)
1N/A        return (ret);
1N/A    rp->re_cmap = rp->re_smap;
1N/A    rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size);
1N/A    rp->re_irec = F_ISSET(dbp, DB_RE_FIXEDLEN) ?  __ram_fmap : __ram_vmap;
1N/A    return (0);
1N/A}
1N/A
1N/A/*
1N/A * __ram_writeback --
1N/A *  Rewrite the backing file.
1N/A */
1N/Astatic int
1N/A__ram_writeback(dbc)
1N/A    DBC *dbc;
1N/A{
1N/A    DB *dbp;
1N/A    DBT key, data;
1N/A    RECNO *rp;
1N/A    db_recno_t keyno;
1N/A    ssize_t nw;
1N/A    int fd, ret, t_ret;
1N/A    u_int8_t delim, *pad;
1N/A
1N/A    dbp = dbc->dbp;
1N/A    rp = ((BTREE *)dbp->internal)->recno;
1N/A
1N/A    /* If the file wasn't modified, we're done. */
1N/A    if (!F_ISSET(rp, RECNO_MODIFIED))
1N/A        return (0);
1N/A
1N/A    /* If there's no backing source file, we're done. */
1N/A    if (rp->re_source == NULL) {
1N/A        F_CLR(rp, RECNO_MODIFIED);
1N/A        return (0);
1N/A    }
1N/A
1N/A    /*
1N/A     * Read any remaining records into the tree.
1N/A     *
1N/A     * !!!
1N/A     * This is why we can't support transactions when applications specify
1N/A     * backing (re_source) files.  At this point we have to read in the
1N/A     * rest of the records from the file so that we can write all of the
1N/A     * records back out again, which could modify a page for which we'd
1N/A     * have to log changes and which we don't have locked.  This could be
1N/A     * partially fixed by taking a snapshot of the entire file during the
1N/A     * db_open(), or, since db_open() isn't transaction protected, as part
1N/A     * of the first DB operation.  But, if a checkpoint occurs then, the
1N/A     * part of the log holding the copy of the file could be discarded, and
1N/A     * that would make it impossible to recover in the face of disaster.
1N/A     * This could all probably be fixed, but it would require transaction
1N/A     * protecting the backing source file, i.e. mpool would have to know
1N/A     * about it, and we don't want to go there.
1N/A     */
1N/A    if ((ret =
1N/A        __ram_update(dbc, DB_MAX_RECORDS, 0)) != 0 && ret != DB_NOTFOUND)
1N/A        return (ret);
1N/A
1N/A    /*
1N/A     * !!!
1N/A     * Close any underlying mmap region.  This is required for Windows NT
1N/A     * (4.0, Service Pack 2) -- if the file is still mapped, the following
1N/A     * open will fail.
1N/A     */
1N/A    if (rp->re_smap != NULL) {
1N/A        (void)__db_unmapfile(rp->re_smap, rp->re_msize);
1N/A        rp->re_smap = NULL;
1N/A    }
1N/A
1N/A    /* Get rid of any backing file descriptor, just on GP's. */
1N/A    if (rp->re_fd != -1) {
1N/A        (void)__os_close(rp->re_fd);
1N/A        rp->re_fd = -1;
1N/A    }
1N/A
1N/A    /* Open the file, truncating it. */
1N/A    if ((ret = __db_open(rp->re_source,
1N/A        DB_SEQUENTIAL | DB_TRUNCATE,
1N/A        DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) {
1N/A        __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
1N/A        return (ret);
1N/A    }
1N/A
1N/A    /*
1N/A     * We step through the records, writing each one out.  Use the record
1N/A     * number and the dbp->get() function, instead of a cursor, so we find
1N/A     * and write out "deleted" or non-existent records.
1N/A     */
1N/A    memset(&key, 0, sizeof(key));
1N/A    memset(&data, 0, sizeof(data));
1N/A    key.size = sizeof(db_recno_t);
1N/A    key.data = &keyno;
1N/A
1N/A    /*
1N/A     * We'll need the delimiter if we're doing variable-length records,
1N/A     * and the pad character if we're doing fixed-length records.
1N/A     */
1N/A    delim = rp->re_delim;
1N/A    if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
1N/A        if ((ret = __os_malloc(rp->re_len, NULL, &pad)) != 0)
1N/A            goto err;
1N/A        memset(pad, rp->re_pad, rp->re_len);
1N/A    } else
1N/A        COMPQUIET(pad, NULL);
1N/A    for (keyno = 1;; ++keyno) {
1N/A        switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) {
1N/A        case 0:
1N/A            if ((ret =
1N/A                __os_write(fd, data.data, data.size, &nw)) != 0)
1N/A                goto err;
1N/A            if (nw != (ssize_t)data.size) {
1N/A                ret = EIO;
1N/A                goto err;
1N/A            }
1N/A            break;
1N/A        case DB_KEYEMPTY:
1N/A            if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
1N/A                if ((ret =
1N/A                    __os_write(fd, pad, rp->re_len, &nw)) != 0)
1N/A                    goto err;
1N/A                if (nw != (ssize_t)rp->re_len) {
1N/A                    ret = EIO;
1N/A                    goto err;
1N/A                }
1N/A            }
1N/A            break;
1N/A        case DB_NOTFOUND:
1N/A            ret = 0;
1N/A            goto done;
1N/A        }
1N/A        if (!F_ISSET(dbp, DB_RE_FIXEDLEN)) {
1N/A            if ((ret = __os_write(fd, &delim, 1, &nw)) != 0)
1N/A                goto err;
1N/A            if (nw != 1) {
1N/A                ret = EIO;
1N/A                goto err;
1N/A            }
1N/A        }
1N/A    }
1N/A
1N/Aerr:
1N/Adone:   /* Close the file descriptor. */
1N/A    if ((t_ret = __os_close(fd)) != 0 || ret == 0)
1N/A        ret = t_ret;
1N/A
1N/A    if (ret == 0)
1N/A        F_CLR(rp, RECNO_MODIFIED);
1N/A    return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __ram_fmap --
1N/A *  Get fixed length records from a file.
1N/A */
1N/Astatic int
1N/A__ram_fmap(dbc, top)
1N/A    DBC *dbc;
1N/A    db_recno_t top;
1N/A{
1N/A    DB *dbp;
1N/A    DBT data;
1N/A    RECNO *rp;
1N/A    db_recno_t recno;
1N/A    u_int32_t len;
1N/A    u_int8_t *sp, *ep, *p;
1N/A    int ret;
1N/A
1N/A    if ((ret = __bam_nrecs(dbc, &recno)) != 0)
1N/A        return (ret);
1N/A
1N/A    dbp = dbc->dbp;
1N/A    rp = ((BTREE *)(dbp->internal))->recno;
1N/A
1N/A    if (dbc->rdata.ulen < rp->re_len) {
1N/A        if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) {
1N/A            dbc->rdata.ulen = 0;
1N/A            dbc->rdata.data = NULL;
1N/A            return (ret);
1N/A        }
1N/A        dbc->rdata.ulen = rp->re_len;
1N/A    }
1N/A
1N/A    memset(&data, 0, sizeof(data));
1N/A    data.data = dbc->rdata.data;
1N/A    data.size = rp->re_len;
1N/A
1N/A    sp = (u_int8_t *)rp->re_cmap;
1N/A    ep = (u_int8_t *)rp->re_emap;
1N/A    while (recno < top) {
1N/A        if (sp >= ep) {
1N/A            F_SET(rp, RECNO_EOF);
1N/A            return (DB_NOTFOUND);
1N/A        }
1N/A        len = rp->re_len;
1N/A        for (p = dbc->rdata.data;
1N/A            sp < ep && len > 0; *p++ = *sp++, --len)
1N/A            ;
1N/A
1N/A        /*
1N/A         * Another process may have read this record from the input
1N/A         * file and stored it into the database already, in which
1N/A         * case we don't need to repeat that operation.  We detect
1N/A         * this by checking if the last record we've read is greater
1N/A         * or equal to the number of records in the database.
1N/A         *
1N/A         * XXX
1N/A         * We should just do a seek, since the records are fixed
1N/A         * length.
1N/A         */
1N/A        if (rp->re_last >= recno) {
1N/A            if (len != 0)
1N/A                memset(p, rp->re_pad, len);
1N/A
1N/A            ++recno;
1N/A            if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0)
1N/A                return (ret);
1N/A        }
1N/A        ++rp->re_last;
1N/A    }
1N/A    rp->re_cmap = sp;
1N/A    return (0);
1N/A}
1N/A
1N/A/*
1N/A * __ram_vmap --
1N/A *  Get variable length records from a file.
1N/A */
1N/Astatic int
1N/A__ram_vmap(dbc, top)
1N/A    DBC *dbc;
1N/A    db_recno_t top;
1N/A{
1N/A    DBT data;
1N/A    RECNO *rp;
1N/A    db_recno_t recno;
1N/A    u_int8_t *sp, *ep;
1N/A    int delim, ret;
1N/A
1N/A    rp = ((BTREE *)(dbc->dbp->internal))->recno;
1N/A
1N/A    if ((ret = __bam_nrecs(dbc, &recno)) != 0)
1N/A        return (ret);
1N/A
1N/A    memset(&data, 0, sizeof(data));
1N/A
1N/A    delim = rp->re_delim;
1N/A
1N/A    sp = (u_int8_t *)rp->re_cmap;
1N/A    ep = (u_int8_t *)rp->re_emap;
1N/A    while (recno < top) {
1N/A        if (sp >= ep) {
1N/A            F_SET(rp, RECNO_EOF);
1N/A            return (DB_NOTFOUND);
1N/A        }
1N/A        for (data.data = sp; sp < ep && *sp != delim; ++sp)
1N/A            ;
1N/A
1N/A        /*
1N/A         * Another process may have read this record from the input
1N/A         * file and stored it into the database already, in which
1N/A         * case we don't need to repeat that operation.  We detect
1N/A         * this by checking if the last record we've read is greater
1N/A         * or equal to the number of records in the database.
1N/A         */
1N/A        if (rp->re_last >= recno) {
1N/A            data.size = sp - (u_int8_t *)data.data;
1N/A            ++recno;
1N/A            if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0)
1N/A                return (ret);
1N/A        }
1N/A        ++rp->re_last;
1N/A        ++sp;
1N/A    }
1N/A    rp->re_cmap = sp;
1N/A    return (0);
1N/A}
1N/A
1N/A/*
1N/A * __ram_add --
1N/A *  Add records into the tree.
1N/A */
1N/Astatic int
1N/A__ram_add(dbc, recnop, data, flags, bi_flags)
1N/A    DBC *dbc;
1N/A    db_recno_t *recnop;
1N/A    DBT *data;
1N/A    u_int32_t flags, bi_flags;
1N/A{
1N/A    BKEYDATA *bk;
1N/A    CURSOR *cp;
1N/A    DB *dbp;
1N/A    PAGE *h;
1N/A    db_indx_t indx;
1N/A    int exact, isdeleted, ret, stack;
1N/A
1N/A    dbp = dbc->dbp;
1N/A    cp = dbc->internal;
1N/A
1N/Aretry:  /* Find the slot for insertion. */
1N/A    if ((ret = __bam_rsearch(dbc, recnop,
1N/A        S_INSERT | (flags == DB_APPEND ? S_APPEND : 0), 1, &exact)) != 0)
1N/A        return (ret);
1N/A    h = cp->csp->page;
1N/A    indx = cp->csp->indx;
1N/A    stack = 1;
1N/A
1N/A    /*
1N/A     * If re-numbering records, the on-page deleted flag means this record
1N/A     * was implicitly created.  If not re-numbering records, the on-page
1N/A     * deleted flag means this record was implicitly created, or, it was
1N/A     * deleted at some time.
1N/A     *
1N/A     * If DB_NOOVERWRITE is set and the item already exists in the tree,
1N/A     * return an error unless the item was either marked for deletion or
1N/A     * only implicitly created.
1N/A     */
1N/A    isdeleted = 0;
1N/A    if (exact) {
1N/A        bk = GET_BKEYDATA(h, indx);
1N/A        if (B_DISSET(bk->type))
1N/A            isdeleted = 1;
1N/A        else
1N/A            if (flags == DB_NOOVERWRITE) {
1N/A                ret = DB_KEYEXIST;
1N/A                goto err;
1N/A            }
1N/A    }
1N/A
1N/A    /*
1N/A     * Select the arguments for __bam_iitem() and do the insert.  If the
1N/A     * key is an exact match, or we're replacing the data item with a
1N/A     * new data item, replace the current item.  If the key isn't an exact
1N/A     * match, we're inserting a new key/data pair, before the search
1N/A     * location.
1N/A     */
1N/A    switch (ret = __bam_iitem(dbc,
1N/A        &h, &indx, NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) {
1N/A    case 0:
1N/A        /*
1N/A         * Don't adjust anything.
1N/A         *
1N/A         * If we inserted a record, no cursors need adjusting because
1N/A         * the only new record it's possible to insert is at the very
1N/A         * end of the tree.  The necessary adjustments to the internal
1N/A         * page counts were made by __bam_iitem().
1N/A         *
1N/A         * If we overwrote a record, no cursors need adjusting because
1N/A         * future DBcursor->get calls will simply return the underlying
1N/A         * record (there's no adjustment made for the DB_CURRENT flag
1N/A         * when a cursor get operation immediately follows a cursor
1N/A         * delete operation, and the normal adjustment for the DB_NEXT
1N/A         * flag is still correct).
1N/A         */
1N/A        break;
1N/A    case DB_NEEDSPLIT:
1N/A        /* Discard the stack of pages and split the page. */
1N/A        (void)__bam_stkrel(dbc, 0);
1N/A        stack = 0;
1N/A
1N/A        if ((ret = __bam_split(dbc, recnop)) != 0)
1N/A            goto err;
1N/A
1N/A        goto retry;
1N/A        /* NOTREACHED */
1N/A    default:
1N/A        goto err;
1N/A    }
1N/A
1N/A
1N/Aerr:    if (stack)
1N/A        __bam_stkrel(dbc, 0);
1N/A
1N/A    return (ret);
1N/A}