1N/A/*-
1N/A * See the file LICENSE for redistribution information.
1N/A *
1N/A * Copyright (c) 1996, 1997, 1998
1N/A * Sleepycat Software. All rights reserved.
1N/A */
1N/A/*
1N/A * Copyright (c) 1990, 1993, 1994, 1995, 1996
1N/A * Keith Bostic. All rights reserved.
1N/A */
1N/A/*
1N/A * Copyright (c) 1990, 1993
1N/A * The Regents of the University of California. All rights reserved.
1N/A *
1N/A * Redistribution and use in source and binary forms, with or without
1N/A * modification, are permitted provided that the following conditions
1N/A * are met:
1N/A * 1. Redistributions of source code must retain the above copyright
1N/A * notice, this list of conditions and the following disclaimer.
1N/A * 2. Redistributions in binary form must reproduce the above copyright
1N/A * notice, this list of conditions and the following disclaimer in the
1N/A * documentation and/or other materials provided with the distribution.
1N/A * 3. All advertising materials mentioning features or use of this software
1N/A * must display the following acknowledgement:
1N/A * This product includes software developed by the University of
1N/A * California, Berkeley and its contributors.
1N/A * 4. Neither the name of the University nor the names of its contributors
1N/A * may be used to endorse or promote products derived from this software
1N/A * without specific prior written permission.
1N/A *
1N/A * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
1N/A * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1N/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1N/A * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
1N/A * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1N/A * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1N/A * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1N/A * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
1N/A * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
1N/A * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
1N/A * SUCH DAMAGE.
1N/A */
1N/A
1N/A#include "config.h"
1N/A
1N/A#ifndef lint
1N/Astatic const char sccsid[] = "@(#)bt_rsearch.c 10.21 (Sleepycat) 12/2/98";
1N/A#endif /* not lint */
1N/A
1N/A#ifndef NO_SYSTEM_INCLUDES
1N/A#include <sys/types.h>
1N/A#endif
1N/A
1N/A#include "db_int.h"
1N/A#include "db_page.h"
1N/A#include "btree.h"
1N/A
1N/A/*
1N/A * __bam_rsearch --
1N/A * Search a btree for a record number.
1N/A *
1N/A * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *));
1N/A */
1N/Aint
1N/A__bam_rsearch(dbc, recnop, flags, stop, exactp)
1N/A DBC *dbc;
1N/A db_recno_t *recnop;
1N/A u_int32_t flags;
1N/A int stop, *exactp;
1N/A{
1N/A BINTERNAL *bi;
1N/A CURSOR *cp;
1N/A DB *dbp;
1N/A DB_LOCK lock;
1N/A PAGE *h;
1N/A RINTERNAL *ri;
1N/A db_indx_t indx, top;
1N/A db_pgno_t pg;
1N/A db_recno_t i, recno, total;
1N/A int ret, stack;
1N/A
1N/A dbp = dbc->dbp;
1N/A cp = dbc->internal;
1N/A
1N/A BT_STK_CLR(cp);
1N/A
1N/A /*
1N/A * There are several ways we search a btree tree. The flags argument
1N/A * specifies if we're acquiring read or write locks and if we are
1N/A * locking pairs of pages. In addition, if we're adding or deleting
1N/A * an item, we have to lock the entire tree, regardless. See btree.h
1N/A * for more details.
1N/A *
1N/A * If write-locking pages, we need to know whether or not to acquire a
1N/A * write lock on a page before getting it. This depends on how deep it
1N/A * is in tree, which we don't know until we acquire the root page. So,
1N/A * if we need to lock the root page we may have to upgrade it later,
1N/A * because we won't get the correct lock initially.
1N/A *
1N/A * Retrieve the root page.
1N/A */
1N/A pg = PGNO_ROOT;
1N/A stack = LF_ISSET(S_STACK);
1N/A if ((ret = __bam_lget(dbc,
1N/A 0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
1N/A return (ret);
1N/A if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
1N/A (void)__BT_LPUT(dbc, lock);
1N/A return (ret);
1N/A }
1N/A
1N/A /*
1N/A * Decide if we need to save this page; if we do, write lock it.
1N/A * We deliberately don't lock-couple on this call. If the tree
1N/A * is tiny, i.e., one page, and two threads are busily updating
1N/A * the root page, we're almost guaranteed deadlocks galore, as
1N/A * each one gets a read lock and then blocks the other's attempt
1N/A * for a write lock.
1N/A */
1N/A if (!stack &&
1N/A ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
1N/A (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
1N/A (void)memp_fput(dbp->mpf, h, 0);
1N/A (void)__BT_LPUT(dbc, lock);
1N/A if ((ret = __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
1N/A return (ret);
1N/A if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
1N/A (void)__BT_LPUT(dbc, lock);
1N/A return (ret);
1N/A }
1N/A stack = 1;
1N/A }
1N/A
1N/A /*
1N/A * If appending to the tree, set the record number now -- we have the
1N/A * root page locked.
1N/A *
1N/A * Delete only deletes exact matches, read only returns exact matches.
1N/A * Note, this is different from __bam_search(), which returns non-exact
1N/A * matches for read.
1N/A *
1N/A * The record may not exist. We can only return the correct location
1N/A * for the record immediately after the last record in the tree, so do
1N/A * a fast check now.
1N/A */
1N/A total = RE_NREC(h);
1N/A if (LF_ISSET(S_APPEND)) {
1N/A *exactp = 0;
1N/A *recnop = recno = total + 1;
1N/A } else {
1N/A recno = *recnop;
1N/A if (recno <= total)
1N/A *exactp = 1;
1N/A else {
1N/A *exactp = 0;
1N/A if (!LF_ISSET(S_PAST_EOF) || recno > total + 1) {
1N/A (void)memp_fput(dbp->mpf, h, 0);
1N/A (void)__BT_LPUT(dbc, lock);
1N/A return (DB_NOTFOUND);
1N/A }
1N/A }
1N/A }
1N/A
1N/A /*
1N/A * !!!
1N/A * Record numbers in the tree are 0-based, but the recno is
1N/A * 1-based. All of the calculations below have to take this
1N/A * into account.
1N/A */
1N/A for (total = 0;;) {
1N/A switch (TYPE(h)) {
1N/A case P_LBTREE:
1N/A recno -= total;
1N/A
1N/A /*
1N/A * There may be logically deleted records on the page,
1N/A * walk the page correcting for them. The record may
1N/A * not exist if there are enough deleted records in the
1N/A * page.
1N/A */
1N/A if (recno <= (db_recno_t)NUM_ENT(h) / P_INDX)
1N/A for (i = recno - 1;; --i) {
1N/A if (B_DISSET(GET_BKEYDATA(h,
1N/A i * P_INDX + O_INDX)->type))
1N/A ++recno;
1N/A if (i == 0)
1N/A break;
1N/A }
1N/A if (recno > (db_recno_t)NUM_ENT(h) / P_INDX) {
1N/A *exactp = 0;
1N/A if (!LF_ISSET(S_PAST_EOF) || recno >
1N/A (db_recno_t)(NUM_ENT(h) / P_INDX + 1)) {
1N/A ret = DB_NOTFOUND;
1N/A goto err;
1N/A }
1N/A
1N/A }
1N/A
1N/A /* Correct from 1-based to 0-based for a page offset. */
1N/A --recno;
1N/A BT_STK_ENTER(cp, h, recno * P_INDX, lock, ret);
1N/A return (ret);
1N/A case P_IBTREE:
1N/A for (indx = 0, top = NUM_ENT(h);;) {
1N/A bi = GET_BINTERNAL(h, indx);
1N/A if (++indx == top || total + bi->nrecs >= recno)
1N/A break;
1N/A total += bi->nrecs;
1N/A }
1N/A pg = bi->pgno;
1N/A break;
1N/A case P_LRECNO:
1N/A recno -= total;
1N/A
1N/A /* Correct from 1-based to 0-based for a page offset. */
1N/A --recno;
1N/A BT_STK_ENTER(cp, h, recno, lock, ret);
1N/A return (ret);
1N/A case P_IRECNO:
1N/A for (indx = 0, top = NUM_ENT(h);;) {
1N/A ri = GET_RINTERNAL(h, indx);
1N/A if (++indx == top || total + ri->nrecs >= recno)
1N/A break;
1N/A total += ri->nrecs;
1N/A }
1N/A pg = ri->pgno;
1N/A break;
1N/A default:
1N/A return (__db_pgfmt(dbp, h->pgno));
1N/A }
1N/A --indx;
1N/A
1N/A if (stack) {
1N/A /* Return if this is the lowest page wanted. */
1N/A if (LF_ISSET(S_PARENT) && stop == h->level) {
1N/A BT_STK_ENTER(cp, h, indx, lock, ret);
1N/A return (ret);
1N/A }
1N/A BT_STK_PUSH(cp, h, indx, lock, ret);
1N/A if (ret != 0)
1N/A goto err;
1N/A
1N/A if ((ret =
1N/A __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
1N/A goto err;
1N/A } else {
1N/A /*
1N/A * Decide if we want to return a pointer to the next
1N/A * page in the stack. If we do, write lock it and
1N/A * never unlock it.
1N/A */
1N/A if ((LF_ISSET(S_PARENT) &&
1N/A (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
1N/A (h->level - 1) == LEAFLEVEL)
1N/A stack = 1;
1N/A
1N/A (void)memp_fput(dbp->mpf, h, 0);
1N/A
1N/A if ((ret =
1N/A __bam_lget(dbc, 1, pg, stack && LF_ISSET(S_WRITE) ?
1N/A DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
1N/A goto err;
1N/A }
1N/A
1N/A if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
1N/A goto err;
1N/A }
1N/A /* NOTREACHED */
1N/A
1N/Aerr: BT_STK_POP(cp);
1N/A __bam_stkrel(dbc, 0);
1N/A return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __bam_adjust --
1N/A * Adjust the tree after adding or deleting a record.
1N/A *
1N/A * PUBLIC: int __bam_adjust __P((DBC *, int32_t));
1N/A */
1N/Aint
1N/A__bam_adjust(dbc, adjust)
1N/A DBC *dbc;
1N/A int32_t adjust;
1N/A{
1N/A CURSOR *cp;
1N/A DB *dbp;
1N/A EPG *epg;
1N/A PAGE *h;
1N/A int ret;
1N/A
1N/A dbp = dbc->dbp;
1N/A cp = dbc->internal;
1N/A
1N/A /* Update the record counts for the tree. */
1N/A for (epg = cp->sp; epg <= cp->csp; ++epg) {
1N/A h = epg->page;
1N/A if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
1N/A if (DB_LOGGING(dbc) &&
1N/A (ret = __bam_cadjust_log(dbp->dbenv->lg_info,
1N/A dbc->txn, &LSN(h), 0, dbp->log_fileid,
1N/A PGNO(h), &LSN(h), (u_int32_t)epg->indx,
1N/A adjust, 1)) != 0)
1N/A return (ret);
1N/A
1N/A if (TYPE(h) == P_IBTREE)
1N/A GET_BINTERNAL(h, epg->indx)->nrecs += adjust;
1N/A else
1N/A GET_RINTERNAL(h, epg->indx)->nrecs += adjust;
1N/A
1N/A if (PGNO(h) == PGNO_ROOT)
1N/A RE_NREC_ADJ(h, adjust);
1N/A
1N/A if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
1N/A return (ret);
1N/A }
1N/A }
1N/A return (0);
1N/A}
1N/A
1N/A/*
1N/A * __bam_nrecs --
1N/A * Return the number of records in the tree.
1N/A *
1N/A * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *));
1N/A */
1N/Aint
1N/A__bam_nrecs(dbc, rep)
1N/A DBC *dbc;
1N/A db_recno_t *rep;
1N/A{
1N/A DB *dbp;
1N/A DB_LOCK lock;
1N/A PAGE *h;
1N/A db_pgno_t pgno;
1N/A int ret;
1N/A
1N/A dbp = dbc->dbp;
1N/A
1N/A pgno = PGNO_ROOT;
1N/A if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) != 0)
1N/A return (ret);
1N/A if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
1N/A return (ret);
1N/A
1N/A *rep = RE_NREC(h);
1N/A
1N/A (void)memp_fput(dbp->mpf, h, 0);
1N/A (void)__BT_TLPUT(dbc, lock);
1N/A
1N/A return (0);
1N/A}
1N/A
1N/A/*
1N/A * __bam_total --
1N/A * Return the number of records below a page.
1N/A *
1N/A * PUBLIC: db_recno_t __bam_total __P((PAGE *));
1N/A */
1N/Adb_recno_t
1N/A__bam_total(h)
1N/A PAGE *h;
1N/A{
1N/A db_recno_t nrecs;
1N/A db_indx_t indx, top;
1N/A
1N/A nrecs = 0;
1N/A top = NUM_ENT(h);
1N/A
1N/A switch (TYPE(h)) {
1N/A case P_LBTREE:
1N/A /* Check for logically deleted records. */
1N/A for (indx = 0; indx < top; indx += P_INDX)
1N/A if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
1N/A ++nrecs;
1N/A break;
1N/A case P_IBTREE:
1N/A for (indx = 0; indx < top; indx += O_INDX)
1N/A nrecs += GET_BINTERNAL(h, indx)->nrecs;
1N/A break;
1N/A case P_LRECNO:
1N/A nrecs = NUM_ENT(h);
1N/A break;
1N/A case P_IRECNO:
1N/A for (indx = 0; indx < top; indx += O_INDX)
1N/A nrecs += GET_RINTERNAL(h, indx)->nrecs;
1N/A break;
1N/A }
1N/A
1N/A return (nrecs);
1N/A}