hash_page.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
* Margo Seltzer. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)hash_page.c 10.55 (Sleepycat) 1/3/99";
#endif /* not lint */
/*
* PACKAGE: hashing
*
* DESCRIPTION:
* Page manipulation for hashing package.
*
* ROUTINES:
*
* External
* __get_page
* __add_ovflpage
* __overflow_page
* Internal
* open_temp
*/
#ifndef NO_SYSTEM_INCLUDES
#include <errno.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "hash.h"
#ifdef DEBUG_SLOW
#endif
/*
* PUBLIC: int __ham_item __P((DBC *, db_lockmode_t));
*/
int
{
int ret;
return (EINVAL);
/* Check if we need to get a page for this cursor. */
return (ret);
/* Check if we are looking for space in which to insert an item. */
/* Check if we need to go on to the next page. */
/*
* ISDUP is set, and offset is at the beginning of the datum.
* We need to grab the length of the datum, then set the datum
* pointer to be the beginning of the datum.
*/
/* Make sure we're not about to run off the page. */
return (ret);
return (0);
}
return (ret);
return (ret);
}
}
/* Fetch next page. */
return (ret);
return (DB_NOTFOUND);
}
return (ret);
}
return (0);
}
/*
* PUBLIC: int __ham_item_reset __P((DBC *));
*/
int
{
int ret;
ret = 0;
return (ret);
}
/*
* PUBLIC: void __ham_item_init __P((HASH_CURSOR *));
*/
void
{
/*
* If this cursor still holds any locks, we must
* release them if we are not running with transactions.
*/
/*
* The following fields must *not* be initialized here
* because they may have meaning across inits.
* hlock, hdr, split_buf, stats
*/
}
/*
* PUBLIC: int __ham_item_done __P((DBC *, int));
*/
int
int dirty;
{
/*
* We don't throw out the page number since we might want to
* continue getting on this page.
*/
}
/*
* Returns the last item in a bucket.
*
* PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t));
*/
int
{
int ret;
return (ret);
}
/*
* PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t));
*/
int
{
int ret;
return (ret);
}
/*
* __ham_item_prev --
* bigkeys, just returns the page number and index of the bigkey
* pointer pair.
*
* PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t));
*/
int
{
int ret;
/*
* There are N cases for backing up in a hash file.
* Case 1: In the middle of a page, no duplicates, just dec the index.
* Case 2: In the middle of a duplicate set, back up one.
* Case 3: At the beginning of a duplicate set, get out of set and
* back up to next key.
* Case 4: At the beginning of a page; go to previous page.
* Case 5: At the beginning of a bucket; go to prev bucket.
*/
/*
* First handle the duplicates. Either you'll get the key here
* or you'll exit the duplicate set and drop into the code below
* to handle backing up through keys.
*/
/* Duplicates are on-page. */
return (ret);
else {
HASH_CURSOR *h;
h = hcp;
sizeof(db_indx_t));
}
return (ret);
return (0);
} else {
(void)__ham_put_page(dbp,
}
return (ret);
else {
}
}
/*
* If we get here, we are not in a duplicate set, and just need
* to back up the cursor. There are still three cases:
* midpage, beginning of page, beginning of bucket.
*/
return (0);
}
return (ret);
/* Beginning of bucket. */
return (DB_NOTFOUND);
} else if ((ret =
return (ret);
else
}
/*
* Either we've got the cursor set up to be decremented, or we
* have to find the end of a bucket.
*/
else
goto got_page;
do {
return (ret);
} while (next_pgno != PGNO_INVALID);
/* Bucket was empty. */
return (DB_NOTFOUND);
}
}
}
/*
*
* PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t));
*/
int
{
/*
* Deleted on-page duplicates are a weird case. If we delete the last
* one, then our cursor is at the very end of a duplicate set and
* we actually need to go on to the next key.
*/
return (0);
} else {
}
return (0);
}
return (0);
}
}
return (0);
} else
}
/*
* PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
*
* This is a little bit sleazy in that we're overloading the meaning
* of the H_OFFPAGE type here. When we recover deletes, we have the
* entire entry instead of having only the DBT, so we'll pass type
* H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
* an H_KEYDATA around it.
*/
void
PAGE *p;
int type;
{
n = NUM_ENT(p);
/* Put the item element on the page. */
} else {
}
/* Adjust page info. */
NUM_ENT(p) += 1;
}
/*
* PUBLIC: void __ham_reputpair
* PUBLIC: __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *));
*
* location during recovery. We are guaranteed that the pair fits
* on the page and is not the last pair on the page (because if it's
* the last pair, the normal insert works).
*/
void
PAGE *p;
{
/* First shuffle the existing items up on the page. */
/*
* Adjust the indices and move them up 2 spaces. Note that we
* have to check the exit condition inside the loop just in case
* we are dealing with index 0 (db_indx_t's are unsigned).
*/
for (i = NUM_ENT(p) - 1; ; i-- ) {
if (i == H_KEYINDEX(ndx))
break;
}
/* Put the key and data on the page. */
/* Adjust page info. */
NUM_ENT(p) += 2;
}
/*
* PUBLIC: int __ham_del_pair __P((DBC *, int));
*/
int
int reclaim_page;
{
PAGE *p;
return (ret);
/*
* We optimize for the normal case which is when neither the key nor
* the data are large. In this case, we write a single log record
* and do the delete. If either is large, we'll call __big_delete
* to remove the big item and then update the page to remove the
* entry referring to the big item.
*/
ret = 0;
sizeof(db_pgno_t));
}
if (ret == 0)
case H_OFFPAGE:
sizeof(db_pgno_t));
break;
case H_OFFDUP:
sizeof(db_pgno_t));
break;
case H_DUPLICATE:
/*
* we had better clear the flag so that we update the
* cursor appropriately.
*/
break;
}
if (ret)
return (ret);
/* Now log the delete off this page. */
if (DB_LOGGING(dbc)) {
return (ret);
/* Move lsn onto page. */
}
/*
* If we are locking, we will not maintain this, because it is
* a hot spot.
* XXX perhaps we can retain incremental numbers and apply them
* later.
*/
/*
* If we need to reclaim the page, then check if the page is empty.
* There are two cases. If it's empty and it's not the first page
* in the bucket (i.e., the bucket page) then we can simply remove
* it. If it is the first chain in the bucket, then we need to copy
* the second page into it and remove the second page.
*/
NEXT_PGNO(p) != PGNO_INVALID) {
/*
* First page in chain is empty and we know that there
* are more pages in the chain.
*/
if ((ret =
return (ret);
if ((ret =
&nn_pagep)) != 0) {
return (ret);
}
}
if (DB_LOGGING(dbc)) {
return (ret);
/* Move lsn onto page. */
}
}
PREV_PGNO(p) = PGNO_INVALID;
/*
* Cursor is advanced to the beginning of the next page.
*/
return (ret);
} else if (reclaim_page &&
if ((ret =
return (ret);
if (NEXT_PGNO(p) != PGNO_INVALID) {
return (ret);
}
} else {
}
if (DB_LOGGING(dbc)) {
return (ret);
/* Move lsn onto page. */
if (n_pagep)
}
/*
* Since we are about to delete the cursor page and we have
* just moved the cursor, we need to make sure that the
* old page pointer isn't left hanging around in the cursor.
*/
ret == 0)
ret == 0)
if (ret != 0)
return (ret);
} else {
/*
* Mark item deleted so that we don't try to return it, and
* so that we update the cursor correctly on the next call
* to next.
*/
}
/*
* Since we just deleted a pair from the master page, anything
* in hcp->dpgno should be cleared.
*/
return (ret);
}
/*
* __ham_replpair --
* according to the fields in the dbt.
*
* PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t));
*/
int
{
/*
* Big item replacements are handled in generic code.
* Items that fit on the current page fall into 4 classes.
* 1. On-page element, same size
* 2. On-page element, new is bigger (fits)
* 3. On-page element, new is bigger (does not fit)
* 4. On-page element, old is bigger
* Numbers 1, 2, and 4 are essentially the same (and should
* be the common case). We handle case 3 as a delete and
* add.
*/
/*
* We need to compute the number of bytes that we are adding or
* removing from the entry. Normally, we can simply substract
* the number of bytes we are replacing (dbt->dlen) from the
* number of bytes we are inserting (dbt->size). However, if
* we are doing a partial put off the end of a record, then this
* formula doesn't work, because we are essentially adding
* new bytes.
*/
if (is_big)
else
/*
* Case 3 -- two subcases.
* A. This is not really a partial operation, but an overwrite.
* Simple del and add works.
* B. This is a partial and we need to construct the data that
* we are really inserting (yuck).
* In both cases, we need to grab the key off the page (in
* some cases we could do this outside of this routine; for
* cleanliness we do it here. If you happen to be on a big
* key, this could be a performance hit).
*/
if ((ret =
return (ret);
if (ret == 0)
} else { /* Case B */
goto err;
/* Now we can delete the item. */
goto err;
}
/* Now shift old data around to make room for new. */
if (change > 0) {
return (ret);
0, change);
}
}
/* Now add the pair. */
}
return (ret);
}
/*
* Set up pointer into existing data. Do it before the log
* message so we can use it inside of the log setup.
*/
/*
* If we are going to have to move bytes at all, figure out
* all the parameters here. Then log the call before moving
* anything around.
*/
if (DB_LOGGING(dbc)) {
return (ret);
}
return (0);
}
/*
* Replace data on a page with new data, possibly growing or shrinking what's
* there. This is called on two different occasions. On one (from replpair)
* we are interested in changing only the data. On the other (from recovery)
* we are replacing the entire data (header and all) with a new element. In
* the latter case, the off argument is negative.
* pagep: the page that we're changing
* off: Offset at which we are beginning the replacement.
* dbt: the new data that gets written at beg.
* PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
* PUBLIC: int32_t, DBT *));
*/
void
{
db_indx_t i;
int zero_me;
if (change != 0) {
zero_me = 0;
if (off < 0)
zero_me = 1;
} else
if (zero_me)
/* Now update the indices. */
}
if (off >= 0)
else
}
/*
* PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t));
*/
int
{
db_indx_t n;
void *big_buf;
return (ret);
&new_pagep)) != 0)
goto err;
if (DB_LOGGING(dbc)) {
goto err;
}
PGNO_INVALID, 0, P_HASH);
if (DB_LOGGING(dbc))
big_len = 0;
while (temp_pagep != NULL) {
if ((ret =
goto err;
== obucket)
else
/*
* Figure out how many bytes we need on the new
*/
H_DATAINDEX(n)) +
H_KEYINDEX(n)) +
2 * sizeof(db_indx_t);
if (DB_LOGGING(dbc)) {
if ((ret = __ham_splitdata_log(
goto err;
}
if ((ret =
goto err;
}
}
/* Clear temp_page; if it's a link overflow page, free it. */
goto err;
if (next_pgno == PGNO_INVALID)
temp_pagep = NULL;
else if ((ret =
goto err;
goto err;
}
}
/*
* If the original bucket spanned multiple pages, then we've got
* a pointer to a page that used to be on the bucket chain. It
* should be deleted.
*/
goto err;
/*
* Write new buckets out.
*/
if (DB_LOGGING(dbc)) {
goto err;
goto err;
}
ret == 0)
if (0) {
}
return (ret);
}
/*
* Add the given pair to the page. The page in question may already be
* held (i.e. it was already gotten). If it is, then the page is passed
* in via the pagep parameter. On return, pagep will contain the page
* to which we just added something. This allows us to link overflow
* pages and return the new page having correctly put the last page.
*
* PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, int));
*/
int
int type;
{
do_expand = 0;
return (ret);
if (is_keybig)
if (is_databig)
/* Advance to first page in chain with room for item. */
PGNO_INVALID) {
/*
* This may not be the end of the chain, but the pair may fit
* anyway. Check if it's a bigpair that fits or a regular
* pair that fits.
*/
break;
if ((ret =
return (ret);
}
/*
* Check if we need to allocate a new page.
*/
do_expand = 1;
return (ret);
}
/*
* Update cursor.
*/
if (is_keybig) {
return (ret);
} else {
}
if (is_databig) {
return (ret);
} else {
}
if (DB_LOGGING(dbc)) {
if (is_databig)
rectype |= PAIR_DATAMASK;
if (is_keybig)
rectype |= PAIR_KEYMASK;
return (ret);
/* Move lsn onto page. */
}
/*
* For splits, we are going to update item_info's page number
* field, so that we can easily return to the same page the
* next time we come in here. For other operations, this shouldn't
* matter, since odds are this is the last thing that happens before
* we return to the user program.
*/
/*
* XXX Maybe keep incremental numbers here
*/
return (0);
}
/*
* Special __putitem call used in splitting -- copies one entry to
* another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
* H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we
* do not need to do any logging here.
*
* PUBLIC: void __ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *));
*/
void
{
/*
* Copy the key and data entries onto this new page.
*/
/* Set up space on dest. */
}
/*
*
* Returns:
* pointer on success
* NULL on error
*
* PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **));
*/
int
int release;
{
int ret;
return (ret);
if (DB_LOGGING(dbc)) {
return (ret);
/* Move lsn onto page. */
}
if (release)
return (ret);
}
/*
* PUBLIC: int __ham_new_page __P((DB *, u_int32_t, u_int32_t, PAGE **));
*/
int
{
int ret;
return (ret);
/* This should not be necessary because page-in should do it. */
return (0);
}
/*
* PUBLIC: int __ham_del_page __P((DBC *, PAGE *));
*/
int
{
int ret;
ret = 0;
if (ret != 0) {
"free_ovflpage: unable to lock meta data page %s\n",
/*
* If we are going to return an error, then we should free
* the page, so it doesn't stay pinned forever.
*/
return (ret);
}
if (DB_LOGGING(dbc)) {
return (ret);
}
#ifdef DIAGNOSTIC
{
}
#endif
}
/*
* PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t));
*/
int
{
#ifdef DEBUG_SLOW
#endif
}
/*
* __ham_dirty_page --
* Mark a page dirty.
*
* PUBLIC: int __ham_dirty_page __P((DB *, PAGE *));
*/
int
{
}
/*
* PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
*/
int
{
int ret;
#ifdef DEBUG_SLOW
#endif
return (ret);
}
/*
* PUBLIC: int __ham_overflow_page
* PUBLIC: __P((DBC *, u_int32_t, PAGE **));
*/
int
{
PAGE *p;
int ret;
ret = 0;
if (ret != 0)
return (ret);
/*
* This routine is split up into two parts. First we have
* to figure out the address of the new page that we are
* allocating. Then we have to log the allocation. Only
* after the log do we get to complete allocation of the
* new page.
*/
if (new_addr != PGNO_INVALID) {
return (ret);
newalloc_flag = 0;
} else {
return (ENOMEM);
}
p = NULL;
newalloc_flag = 1;
}
if (DB_LOGGING(dbc)) {
return (ret);
}
if (p != NULL) {
/* We just took something off the free list, initialize it. */
} else {
/* Get the new page. */
return (ret);
}
if (DB_LOGGING(dbc))
*pp = p;
return (0);
}
#ifdef DEBUG
/*
* PUBLIC: #ifdef DEBUG
* PUBLIC: db_pgno_t __bucket_to_page __P((HASH_CURSOR *, db_pgno_t));
* PUBLIC: #endif
*/
__bucket_to_page(hcp, n)
db_pgno_t n;
{
int ret_val;
ret_val = n + 1;
if (n != 0)
return (ret_val);
}
#endif
/*
* Create a bunch of overflow pages at the current split point.
* PUBLIC: void __ham_init_ovflpages __P((DBC *));
*/
void
{
PAGE *p;
if (DB_LOGGING(dbc)) {
} else
for (i = numpages; i > 0; i--) {
if (__ham_new_page(dbp,
P_INVALID, &p) != 0)
break;
}
}
/*
* PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t));
*/
int
{
int ret;
/*
* There are three cases with respect to buckets and locks. If there
* is no lock held, then if we are locking, we should get the lock.
* If there is a lock held and it's for the current bucket, we don't
* need to do anything. If there is a lock, but it's for a different
* bucket, then we need to release and get.
*/
/*
* If this is the original lock, don't release it,
* because we may need to restore it upon exit.
*/
return (ret);
}
return (ret);
}
}
if ((ret =
return (ret);
}
if ((ret =
return (ret);
return (0);
}
/*
* Get a new page at the cursor, putting the last page if necessary.
* If the flag is set to H_ISDUP, then we are talking about the
* duplicate page, not the main page.
*
* PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t, int, u_int32_t));
*/
int
int dirty;
{
PAGE *p;
int ret;
return (ret);
return (ret);
return (ret);
} else {
}
return (0);
}
/*
* __ham_lock_bucket --
* Get the lock on a particular bucket.
*/
static int
{
int ret;
else
}
/*
* __ham_dpair --
* Delete a pair on a page, paying no attention to what the pair
* represents. The caller is responsible for freeing up duplicates
* or offpage entries that might be referenced by this pair.
*
* PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
*/
void
PAGE *p;
{
/*
* Compute "delta", the amount we have to shift all of the
* offsets. To find the delta, we just need to calculate
* the size of the pair of elements we are removing.
*/
/*
* The hard case: we want to remove something other than
* the last item on the page. We need to shift data and
* offsets down.
*/
/*
* Move the data: src is the first occupied byte on
* the page. (Length is delta.)
*/
/*
* Destination is delta bytes beyond src. This might
* be an overlapping copy, so we have to use memmove.
*/
}
/* Adjust the offsets. */
}
/* Adjust page metadata. */
}