1N/A * See the file LICENSE for redistribution information. 1N/A * Copyright (c) 1996, 1997, 1998 1N/A * Sleepycat Software. All rights reserved. 1N/A * Copyright (c) 1990, 1993, 1994 1N/A * Margo Seltzer. All rights reserved. 1N/A * Copyright (c) 1990, 1993, 1994 1N/A * The Regents of the University of California. All rights reserved. 1N/A * This code is derived from software contributed to Berkeley by 1N/A * Redistribution and use in source and binary forms, with or without 1N/A * modification, are permitted provided that the following conditions 1N/A * 1. Redistributions of source code must retain the above copyright 1N/A * notice, this list of conditions and the following disclaimer. 1N/A * 2. Redistributions in binary form must reproduce the above copyright 1N/A * notice, this list of conditions and the following disclaimer in the 1N/A * documentation and/or other materials provided with the distribution. 1N/A * 3. All advertising materials mentioning features or use of this software 1N/A * must display the following acknowledgement: 1N/A * This product includes software developed by the University of 1N/A * California, Berkeley and its contributors. 1N/A * 4. Neither the name of the University nor the names of its contributors 1N/A * may be used to endorse or promote products derived from this software 1N/A * without specific prior written permission. 1N/A * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 1N/A * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1N/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1N/A * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 1N/A * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1N/A * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1N/A * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 1N/A * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 1N/A * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 1N/A * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 1N/A#
endif /* not lint */ 1N/A * Page manipulation for hashing package. 1N/A * PUBLIC: int __ham_item __P((DBC *, db_lockmode_t)); 1N/A /* Check if we need to get a page for this cursor. */ 1N/A /* Check if we are looking for space in which to insert an item. */ 1N/A /* Check if we need to go on to the next page. */ 1N/A * ISDUP is set, and offset is at the beginning of the datum. 1N/A * We need to grab the length of the datum, then set the datum 1N/A * pointer to be the beginning of the datum. 1N/A /* Make sure we're not about to run off the page. */ 1N/A /* Fetch next page. */ 1N/A * PUBLIC: int __ham_item_reset __P((DBC *)); 1N/A * PUBLIC: void __ham_item_init __P((HASH_CURSOR *)); 1N/A * If this cursor still holds any locks, we must 1N/A * release them if we are not running with transactions. 1N/A * The following fields must *not* be initialized here 1N/A * because they may have meaning across inits. 1N/A * hlock, hdr, split_buf, stats 1N/A * PUBLIC: int __ham_item_done __P((DBC *, int)); 1N/A * We don't throw out the page number since we might want to 1N/A * continue getting on this page. 1N/A * Returns the last item in a bucket. 1N/A * PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t)); 1N/A * PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t)); 1N/A * __ham_item_prev -- 1N/A * Returns a pointer to key/data pair on a page. In the case of 1N/A * bigkeys, just returns the page number and index of the bigkey 1N/A * PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t)); 1N/A * There are N cases for backing up in a hash file. 1N/A * Case 1: In the middle of a page, no duplicates, just dec the index. 1N/A * Case 2: In the middle of a duplicate set, back up one. 1N/A * Case 3: At the beginning of a duplicate set, get out of set and 1N/A * back up to next key. 1N/A * Case 4: At the beginning of a page; go to previous page. 1N/A * Case 5: At the beginning of a bucket; go to prev bucket. 1N/A * First handle the duplicates. Either you'll get the key here 1N/A * or you'll exit the duplicate set and drop into the code below 1N/A * to handle backing up through keys. 1N/A /* Duplicates are on-page. */ 1N/A }
else if (
hcp->
dndx > 0) {
/* Duplicates are off-page. */ 1N/A * If we get here, we are not in a duplicate set, and just need 1N/A * to back up the cursor. There are still three cases: 1N/A * midpage, beginning of page, beginning of bucket. 1N/A /* Beginning of bucket. */ 1N/A * Either we've got the cursor set up to be decremented, or we 1N/A * have to find the end of a bucket. 1N/A /* Bucket was empty. */ 1N/A * Sets the cursor to the next key/data pair on a page. 1N/A * PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t)); 1N/A * Deleted on-page duplicates are a weird case. If we delete the last 1N/A * one, then our cursor is at the very end of a duplicate set and 1N/A * we actually need to go on to the next key. 1N/A * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int)); 1N/A * This is a little bit sleazy in that we're overloading the meaning 1N/A * of the H_OFFPAGE type here. When we recover deletes, we have the 1N/A * entire entry instead of having only the DBT, so we'll pass type 1N/A * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing 1N/A * an H_KEYDATA around it. 1N/A /* Put the item element on the page. */ 1N/A /* Adjust page info. */ 1N/A * PUBLIC: void __ham_reputpair 1N/A * PUBLIC: __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *)); 1N/A * This is a special case to restore a key/data pair to its original 1N/A * location during recovery. We are guaranteed that the pair fits 1N/A * on the page and is not the last pair on the page (because if it's 1N/A * the last pair, the normal insert works). 1N/A /* First shuffle the existing items up on the page. */ 1N/A * Adjust the indices and move them up 2 spaces. Note that we 1N/A * have to check the exit condition inside the loop just in case 1N/A * we are dealing with index 0 (db_indx_t's are unsigned). 1N/A /* Put the key and data on the page. */ 1N/A /* Adjust page info. */ 1N/A * PUBLIC: int __ham_del_pair __P((DBC *, int)); 1N/A * We optimize for the normal case which is when neither the key nor 1N/A * the data are large. In this case, we write a single log record 1N/A * and do the delete. If either is large, we'll call __big_delete 1N/A * to remove the big item and then update the page to remove the 1N/A * entry referring to the big item. 1N/A * If we delete a pair that is/was a duplicate, then 1N/A * we had better clear the flag so that we update the 1N/A * cursor appropriately. 1N/A /* Now log the delete off this page. */ 1N/A /* Move lsn onto page. */ 1N/A * If we are locking, we will not maintain this, because it is 1N/A * XXX perhaps we can retain incremental numbers and apply them 1N/A * If we need to reclaim the page, then check if the page is empty. 1N/A * There are two cases. If it's empty and it's not the first page 1N/A * in the bucket (i.e., the bucket page) then we can simply remove 1N/A * it. If it is the first chain in the bucket, then we need to copy 1N/A * the second page into it and remove the second page. 1N/A * First page in chain is empty and we know that there 1N/A * are more pages in the chain. 1N/A /* Move lsn onto page. */ 1N/A * Cursor is advanced to the beginning of the next page. 1N/A /* Move lsn onto page. */ 1N/A * Since we are about to delete the cursor page and we have 1N/A * just moved the cursor, we need to make sure that the 1N/A * old page pointer isn't left hanging around in the cursor. 1N/A * Mark item deleted so that we don't try to return it, and 1N/A * so that we update the cursor correctly on the next call 1N/A * Since we just deleted a pair from the master page, anything 1N/A * in hcp->dpgno should be cleared. 1N/A * Given the key data indicated by the cursor, replace part/all of it 1N/A * according to the fields in the dbt. 1N/A * PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t)); 1N/A * Big item replacements are handled in generic code. 1N/A * Items that fit on the current page fall into 4 classes. 1N/A * 1. On-page element, same size 1N/A * 2. On-page element, new is bigger (fits) 1N/A * 3. On-page element, new is bigger (does not fit) 1N/A * 4. On-page element, old is bigger 1N/A * Numbers 1, 2, and 4 are essentially the same (and should 1N/A * be the common case). We handle case 3 as a delete and 1N/A * We need to compute the number of bytes that we are adding or 1N/A * removing from the entry. Normally, we can simply substract 1N/A * the number of bytes we are replacing (dbt->dlen) from the 1N/A * number of bytes we are inserting (dbt->size). However, if 1N/A * we are doing a partial put off the end of a record, then this 1N/A * formula doesn't work, because we are essentially adding 1N/A * Case 3 -- two subcases. 1N/A * A. This is not really a partial operation, but an overwrite. 1N/A * Simple del and add works. 1N/A * B. This is a partial and we need to construct the data that 1N/A * we are really inserting (yuck). 1N/A * In both cases, we need to grab the key off the page (in 1N/A * some cases we could do this outside of this routine; for 1N/A * cleanliness we do it here. If you happen to be on a big 1N/A * key, this could be a performance hit). 1N/A }
else {
/* Case B */ 1N/A /* Now we can delete the item. */ 1N/A /* Now shift old data around to make room for new. */ 1N/A /* Now add the pair. */ 1N/A * Set up pointer into existing data. Do it before the log 1N/A * message so we can use it inside of the log setup. 1N/A * If we are going to have to move bytes at all, figure out 1N/A * all the parameters here. Then log the call before moving 1N/A * Replace data on a page with new data, possibly growing or shrinking what's 1N/A * there. This is called on two different occasions. On one (from replpair) 1N/A * we are interested in changing only the data. On the other (from recovery) 1N/A * we are replacing the entire data (header and all) with a new element. In 1N/A * the latter case, the off argument is negative. 1N/A * pagep: the page that we're changing 1N/A * off: Offset at which we are beginning the replacement. 1N/A * dbt: the new data that gets written at beg. 1N/A * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t, 1N/A * PUBLIC: int32_t, DBT *)); 1N/A /* Now update the indices. */ 1N/A * PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t)); 1N/A * Figure out how many bytes we need on the new 1N/A /* Clear temp_page; if it's a link overflow page, free it. */ 1N/A * If the original bucket spanned multiple pages, then we've got 1N/A * a pointer to a page that used to be on the bucket chain. It 1N/A * should be deleted. 1N/A * Write new buckets out. 1N/A * Add the given pair to the page. The page in question may already be 1N/A * held (i.e. it was already gotten). If it is, then the page is passed 1N/A * in via the pagep parameter. On return, pagep will contain the page 1N/A * to which we just added something. This allows us to link overflow 1N/A * pages and return the new page having correctly put the last page. 1N/A * PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, int)); 1N/A /* Advance to first page in chain with room for item. */ 1N/A * This may not be the end of the chain, but the pair may fit 1N/A * anyway. Check if it's a bigpair that fits or a regular 1N/A * Check if we need to allocate a new page. 1N/A /* Move lsn onto page. */ 1N/A * For splits, we are going to update item_info's page number 1N/A * field, so that we can easily return to the same page the 1N/A * next time we come in here. For other operations, this shouldn't 1N/A * matter, since odds are this is the last thing that happens before 1N/A * we return to the user program. 1N/A * XXX Maybe keep incremental numbers here 1N/A * Special __putitem call used in splitting -- copies one entry to 1N/A * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA, 1N/A * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we 1N/A * do not need to do any logging here. 1N/A * PUBLIC: void __ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *)); 1N/A * Copy the key and data entries onto this new page. 1N/A /* Set up space on dest. */ 1N/A * pointer on success 1N/A * PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **)); 1N/A /* Move lsn onto page. */ 1N/A * PUBLIC: int __ham_new_page __P((DB *, u_int32_t, u_int32_t, PAGE **)); 1N/A /* This should not be necessary because page-in should do it. */ 1N/A * PUBLIC: int __ham_del_page __P((DBC *, PAGE *)); 1N/A "free_ovflpage: unable to lock meta data page %s\n",
1N/A * If we are going to return an error, then we should free 1N/A * the page, so it doesn't stay pinned forever. 1N/A * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t)); 1N/A * __ham_dirty_page -- 1N/A * Mark a page dirty. 1N/A * PUBLIC: int __ham_dirty_page __P((DB *, PAGE *)); 1N/A * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **)); 1N/A * PUBLIC: int __ham_overflow_page 1N/A * PUBLIC: __P((DBC *, u_int32_t, PAGE **)); 1N/A * This routine is split up into two parts. First we have 1N/A * to figure out the address of the new page that we are 1N/A * allocating. Then we have to log the allocation. Only 1N/A * after the log do we get to complete allocation of the 1N/A /* We just took something off the free list, initialize it. */ 1N/A /* Get the new page. */ 1N/A * PUBLIC: #ifdef DEBUG 1N/A * PUBLIC: db_pgno_t __bucket_to_page __P((HASH_CURSOR *, db_pgno_t)); 1N/A * Create a bunch of overflow pages at the current split point. 1N/A * PUBLIC: void __ham_init_ovflpages __P((DBC *)); 1N/A * PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t)); 1N/A * There are three cases with respect to buckets and locks. If there 1N/A * is no lock held, then if we are locking, we should get the lock. 1N/A * If there is a lock held and it's for the current bucket, we don't 1N/A * need to do anything. If there is a lock, but it's for a different 1N/A * bucket, then we need to release and get. 1N/A * If this is the original lock, don't release it, 1N/A * because we may need to restore it upon exit. 1N/A * Get a new page at the cursor, putting the last page if necessary. 1N/A * If the flag is set to H_ISDUP, then we are talking about the 1N/A * duplicate page, not the main page. 1N/A * PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t, int, u_int32_t)); 1N/A * __ham_lock_bucket -- 1N/A * Get the lock on a particular bucket. 1N/A * Delete a pair on a page, paying no attention to what the pair 1N/A * represents. The caller is responsible for freeing up duplicates 1N/A * or offpage entries that might be referenced by this pair. 1N/A * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t)); 1N/A * Compute "delta", the amount we have to shift all of the 1N/A * offsets. To find the delta, we just need to calculate 1N/A * the size of the pair of elements we are removing. 1N/A * The hard case: we want to remove something other than 1N/A * the last item on the page. We need to shift data and 1N/A * Move the data: src is the first occupied byte on 1N/A * the page. (Length is delta.) 1N/A * Destination is delta bytes beyond src. This might 1N/A * be an overlapping copy, so we have to use memmove. 1N/A /* Adjust the offsets. */ 1N/A /* Adjust page metadata. */