/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <errno.h>
#include <limits.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
#include "db_ext.h"
#include "shqueue.h"
#include "db_shash.h"
#include "lock.h"
#include "lock_ext.h"
/*
* In recno, there are two meanings to the on-page "deleted" flag. If we're
* re-numbering records, it means the record was implicitly created. We skip
* over implicitly created records if doing a cursor "next" or "prev", and
* return DB_KEYEMPTY if they're explicitly requested.. If not re-numbering
* records, it means that the record was implicitly created, or was deleted.
* We skip over implicitly created or deleted records if doing a cursor "next"
* or "prev", and return DB_KEYEMPTY if they're explicitly requested.
*
* If we're re-numbering records, then we have to detect in the cursor that
* a record was deleted, and adjust the cursor as necessary on the next get.
* If we're not re-numbering records, then we can detect that a record has
* been deleted by looking at the actual on-page record, so we completely
* ignore the cursor's delete flag. This is different from the B+tree code.
* It also maintains whether the cursor references a deleted record in the
* cursor, and it doesn't always check the on-page value.
*/
}
}
/*
* __ram_open --
* Recno open function.
*
* PUBLIC: int __ram_open __P((DB *, DB_INFO *));
*/
int
{
BTREE *t;
/* Allocate and initialize the private btree structure. */
return (ret);
/* Allocate and initialize the private recno structure. */
return (ret);
/* Link in the private recno structure. */
/*
* Intention is to make sure all of the user's selections are okay
* here and then use them without checking.
*/
} else {
/*
* If the user specified a source tree, open it and map it in.
*
* !!!
* We don't complain if the user specified transactions or
* threads. It's possible to make it work, but you'd better
* know what you're doing!
*/
} else {
if ((ret =
goto err;
}
/* Copy delimiter, length and padding values. */
"record length must be greater than 0");
goto err;
}
} else
}
/* Start up the tree. */
goto err;
/* Set the overflow page size. */
/* If we're snapshotting an underlying source file, do it now. */
/* Allocate a cursor. */
goto err;
/* Do the snapshot. */
ret = 0;
/* Discard the cursor. */
if (ret != 0)
goto err;
}
return (0);
err: /* If we mmap'd a source file, discard it. */
/* If we opened a source file, discard it. */
return (ret);
}
/*
* __ram_delete --
* Recno db->del function.
*/
static int
{
/* Check for invalid flags. */
return (ret);
/* Acquire a cursor. */
return (ret);
/* Check the user's record number and fill in as necessary. */
goto err;
/* Do the delete. */
/* Release the cursor. */
return (ret);
}
/*
* __ram_i_delete --
* Internal version of recno delete, called by __ram_delete and
* __ram_c_del.
*/
static int
{
BTREE *t;
PAGE *h;
stack = 0;
/*
* If this is CDB and this isn't a write cursor, then it's an error.
* If it is a write cursor, but we don't yet hold the write lock, then
* we need to upgrade to the write lock.
*/
/* Make sure it's a valid update cursor. */
return (EINVAL);
return (EAGAIN);
}
/* Search the tree for the key; delete only deletes exact matches. */
goto err;
if (!exact) {
ret = DB_NOTFOUND;
goto err;
}
stack = 1;
/*
* If re-numbering records, the on-page deleted flag can only mean
* that this record was implicitly created. Applications aren't
* permitted to delete records they never created, return an error.
*
* If not re-numbering records, the on-page deleted flag means that
* this record was implicitly created, or, was deleted at some time.
* The former is an error because applications aren't permitted to
* delete records they never created, the latter is an error because
* if the record was "deleted", we could never have found it.
*/
ret = DB_KEYEMPTY;
goto err;
}
/* Delete the item, adjust the counts, adjust the cursors. */
goto err;
/*
* If the page is empty, delete it. The whole tree is locked
* so there are no preparations to make.
*/
stack = 0;
}
} else {
goto err;
goto err;
}
__bam_stkrel(dbc, 0);
/* If we upgraded the CDB lock upon entry; downgrade it now. */
DB_LOCK_IWRITE, 0);
return (ret);
}
/*
* __ram_put --
* Recno db->put function.
*/
static int
{
/* Check for invalid flags. */
return (ret);
/* Allocate a cursor. */
return (ret);
/*
* If we're appending to the tree, make sure we've read in all of
* the backing source file. Otherwise, check the user's record
* number and fill in as necessary.
*/
/* Add the record. */
if (ret == 0)
/* Discard the cursor. */
/* Return the record number if we're appending to the tree. */
return (ret);
}
/*
* __ram_sync --
* Recno db->sync function.
*/
static int
{
/*
* Sync the underlying btree.
*
* !!!
* We don't need to do a panic check or flags check, the "real"
* sync function does all that for us.
*/
return (ret);
/* Allocate a cursor. */
return (ret);
/* Copy back the backing source file. */
/* Discard the cursor. */
return (ret);
}
/*
* __ram_close --
* Recno db->close function.
*
* PUBLIC: int __ram_close __P((DB *));
*/
int
{
/* Close any underlying mmap region. */
/* Close any backing source file descriptor. */
/* Free any backing source file name. */
/* Free allocated memory. */
/* Close the underlying btree. */
return (__bam_close(dbp));
}
/*
* __ram_c_del --
* Recno cursor->c_del function.
*
* PUBLIC: int __ram_c_del __P((DBC *, u_int32_t));
*/
int
{
int ret;
/* Check for invalid flags. */
return (ret);
/*
* If we are running CDB, this had better be either a write
* cursor or an immediate writer.
*/
return (EINVAL);
/*
* The semantics of cursors during delete are as follows: if record
* numbers are mutable (DB_RE_RENUMBER is set), deleting a record
* causes the cursor to automatically point to the record immediately
* following. In this case it is possible to use a single cursor for
* repeated delete operations, without intervening operations.
*
* If record numbers are not mutable, then records are replaced with
* a marker containing a delete flag. If the record referenced by
* this cursor has already been deleted, we will detect that as part
* of the delete operation, and fail.
*/
return (__ram_i_delete(dbc));
}
/*
* __ram_c_get --
* Recno cursor->c_get function.
*
* PUBLIC: int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
*/
int
{
PAGE *h;
/* Check for invalid flags. */
return (ret);
/* Clear OR'd in additional bits so we can check for flag equality. */
tmp_rmw = 0;
tmp_rmw = 1;
}
}
/* Initialize the cursor for a new retrieval. */
retry: /* Update the record number. */
stack = 0;
switch (flags) {
case DB_CURRENT:
/*
* If record numbers are mutable: if we just deleted a record,
* there is no action necessary, we return the record following
* the deleted item by virtue of renumbering the tree.
*/
break;
case DB_NEXT:
/*
* If record numbers are mutable: if we just deleted a record,
* we have to avoid incrementing the record number so that we
* return the right record by virtue of renumbering the tree.
*/
break;
break;
}
/* FALLTHROUGH */
case DB_FIRST:
break;
case DB_PREV:
ret = DB_NOTFOUND;
goto err;
}
break;
}
/* FALLTHROUGH */
case DB_LAST:
goto err;
goto err;
ret = DB_NOTFOUND;
goto err;
}
break;
case DB_SET:
case DB_SET_RANGE:
goto err;
break;
}
/* Return the key if the user didn't give us one. */
goto err;
/* Search the tree for the record. */
goto err;
stack = 1;
if (!exact) {
ret = DB_NOTFOUND;
goto err;
}
/*
* If re-numbering records, the on-page deleted flag means this record
* was implicitly created. If not re-numbering records, the on-page
* deleted flag means this record was implicitly created, or, it was
* deleted at some time. Regardless, we skip such records if doing
* them explicitly.
*/
(void)__bam_stkrel(dbc, 0);
goto retry;
}
ret = DB_KEYEMPTY;
goto err;
}
/* Return the data item. */
goto err;
/* The cursor was reset, no further delete adjustment is necessary. */
(void)__bam_stkrel(dbc, 0);
/* Release temporary lock upgrade. */
if (tmp_rmw)
if (ret != 0)
return (ret);
}
/*
* __ram_c_put --
* Recno cursor->c_put function.
*
* PUBLIC: int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
*/
int
{
void *arg;
return (ret);
/*
* If we are running CDB, this had better be either a write
* cursor or an immediate writer. If it's a regular writer,
* that means we have an IWRITE lock and we need to upgrade
* it to a write lock.
*/
return (EINVAL);
return (EAGAIN);
}
/* Initialize the cursor for a new retrieval. */
/*
* To split, we need a valid key for the page. Since it's a cursor,
* we have to build one.
*
* The split code discards all short-term locks and stack pages.
*/
if (0) {
goto err;
}
goto err;
if (!exact) {
ret = DB_NOTFOUND;
goto err;
}
goto err;
goto split;
}
goto err;
switch (flags) {
case DB_AFTER:
/* Adjust the cursors. */
/* Set this cursor to reference the new record. */
break;
case DB_BEFORE:
/* Adjust the cursors. */
/* Set this cursor to reference the new record. */
break;
}
/* The cursor was reset, no further delete adjustment is necessary. */
DB_LOCK_IWRITE, 0);
if (ret != 0)
return (ret);
}
/*
* __ram_ca --
* Adjust cursors.
*
* PUBLIC: void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
*/
void
{
/*
* Adjust the cursors. See the comment in __bam_ca_delete().
*/
switch (op) {
case CA_DELETE:
break;
case CA_IAFTER:
break;
case CA_IBEFORE:
break;
}
}
}
/*
* __ram_getno --
* Check the user's record number, and make sure we've seen it.
*
* PUBLIC: int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int));
*/
int
int can_create;
{
/* Check the user's record number. */
return (EINVAL);
}
/*
* Btree can neither create records nor read them in. Recno can
* do both, see if we can find the record.
*/
}
/*
* __ram_update --
* Ensure the tree has records up to and including the specified one.
*/
static int
int can_create;
{
BTREE *t;
int ret;
/*
* If we can't create records and we've read the entire backing input
* file, we're done.
*/
return (0);
/*
* If we haven't seen this record yet, try to get it from the original
* file.
*/
return (ret);
return (ret);
return (ret);
}
/*
* If we can create records, create empty ones up to the requested
* record.
*/
return (0);
if ((ret =
return (ret);
}
}
} else
return (ret);
return (0);
}
/*
* __ram_source --
* Load information about the backing file.
*/
static int
const char *fname;
{
int ret;
/*
* !!!
* The caller has full responsibility for cleaning up on error --
* (it has to anyway, in case it fails after this routine succeeds).
*/
return (ret);
if ((ret =
return (ret);
}
/*
* XXX
* We'd like to test to see if the file is too big to mmap. Since we
* don't know what size or type off_t's or size_t's are, or the largest
* unsigned integral type is, or what random insanity the local C
* compiler will perpetrate, doing the comparison in a portable way is
* flatly impossible. Hope that mmap fails if the file is too large.
*/
return (ret);
}
return (0);
}
return (ret);
return (0);
}
/*
* __ram_writeback --
* Rewrite the backing file.
*/
static int
{
/* If the file wasn't modified, we're done. */
return (0);
/* If there's no backing source file, we're done. */
return (0);
}
/*
* Read any remaining records into the tree.
*
* !!!
* This is why we can't support transactions when applications specify
* backing (re_source) files. At this point we have to read in the
* rest of the records from the file so that we can write all of the
* records back out again, which could modify a page for which we'd
* have to log changes and which we don't have locked. This could be
* partially fixed by taking a snapshot of the entire file during the
* db_open(), or, since db_open() isn't transaction protected, as part
* of the first DB operation. But, if a checkpoint occurs then, the
* part of the log holding the copy of the file could be discarded, and
* that would make it impossible to recover in the face of disaster.
* This could all probably be fixed, but it would require transaction
* protecting the backing source file, i.e. mpool would have to know
* about it, and we don't want to go there.
*/
if ((ret =
return (ret);
/*
* !!!
* Close any underlying mmap region. This is required for Windows NT
* (4.0, Service Pack 2) -- if the file is still mapped, the following
* open will fail.
*/
}
/* Get rid of any backing file descriptor, just on GP's. */
}
/* Open the file, truncating it. */
return (ret);
}
/*
* We step through the records, writing each one out. Use the record
* number and the dbp->get() function, instead of a cursor, so we find
* and write out "deleted" or non-existent records.
*/
/*
* We'll need the delimiter if we're doing variable-length records,
* and the pad character if we're doing fixed-length records.
*/
goto err;
} else
case 0:
if ((ret =
goto err;
goto err;
}
break;
case DB_KEYEMPTY:
if ((ret =
goto err;
goto err;
}
}
break;
case DB_NOTFOUND:
ret = 0;
goto done;
}
goto err;
if (nw != 1) {
goto err;
}
}
}
err:
done: /* Close the file descriptor. */
if (ret == 0)
return (ret);
}
/*
* __ram_fmap --
* Get fixed length records from a file.
*/
static int
{
int ret;
return (ret);
return (ret);
}
}
return (DB_NOTFOUND);
}
;
/*
* Another process may have read this record from the input
* file and stored it into the database already, in which
* case we don't need to repeat that operation. We detect
* this by checking if the last record we've read is greater
* or equal to the number of records in the database.
*
* XXX
* We should just do a seek, since the records are fixed
* length.
*/
if (len != 0)
++recno;
return (ret);
}
}
return (0);
}
/*
* __ram_vmap --
* Get variable length records from a file.
*/
static int
{
return (ret);
return (DB_NOTFOUND);
}
;
/*
* Another process may have read this record from the input
* file and stored it into the database already, in which
* case we don't need to repeat that operation. We detect
* this by checking if the last record we've read is greater
* or equal to the number of records in the database.
*/
++recno;
return (ret);
}
++sp;
}
return (0);
}
/*
* __ram_add --
* Add records into the tree.
*/
static int
{
PAGE *h;
retry: /* Find the slot for insertion. */
return (ret);
stack = 1;
/*
* If re-numbering records, the on-page deleted flag means this record
* was implicitly created. If not re-numbering records, the on-page
* deleted flag means this record was implicitly created, or, it was
* deleted at some time.
*
* If DB_NOOVERWRITE is set and the item already exists in the tree,
* return an error unless the item was either marked for deletion or
* only implicitly created.
*/
isdeleted = 0;
if (exact) {
isdeleted = 1;
else
if (flags == DB_NOOVERWRITE) {
ret = DB_KEYEXIST;
goto err;
}
}
/*
* Select the arguments for __bam_iitem() and do the insert. If the
* key is an exact match, or we're replacing the data item with a
* new data item, replace the current item. If the key isn't an exact
* location.
*/
case 0:
/*
* Don't adjust anything.
*
* If we inserted a record, no cursors need adjusting because
* the only new record it's possible to insert is at the very
* end of the tree. The necessary adjustments to the internal
* page counts were made by __bam_iitem().
*
* If we overwrote a record, no cursors need adjusting because
* future DBcursor->get calls will simply return the underlying
* record (there's no adjustment made for the DB_CURRENT flag
* when a cursor get operation immediately follows a cursor
* delete operation, and the normal adjustment for the DB_NEXT
* flag is still correct).
*/
break;
case DB_NEEDSPLIT:
/* Discard the stack of pages and split the page. */
(void)__bam_stkrel(dbc, 0);
stack = 0;
goto err;
goto retry;
/* NOTREACHED */
default:
goto err;
}
__bam_stkrel(dbc, 0);
return (ret);
}