1N/A/*-
1N/A * See the file LICENSE for redistribution information.
1N/A *
1N/A * Copyright (c) 1996, 1997, 1998
1N/A * Sleepycat Software. All rights reserved.
1N/A */
1N/A#include "config.h"
1N/A
1N/A#ifndef lint
1N/Astatic const char sccsid[] = "@(#)mp_fopen.c 10.60 (Sleepycat) 1/1/99";
1N/A#endif /* not lint */
1N/A
1N/A#ifndef NO_SYSTEM_INCLUDES
1N/A#include <sys/types.h>
1N/A
1N/A#include <errno.h>
1N/A#include <string.h>
1N/A#endif
1N/A
1N/A#include "db_int.h"
1N/A#include "shqueue.h"
1N/A#include "db_shash.h"
1N/A#include "mp.h"
1N/A#include "common_ext.h"
1N/A
1N/Astatic int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
1N/Astatic int __memp_mf_open __P((DB_MPOOL *,
1N/A const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));
1N/A
1N/A/*
1N/A * memp_fopen --
1N/A * Open a backing file for the memory pool.
1N/A */
1N/Aint
1N/Amemp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)
1N/A DB_MPOOL *dbmp;
1N/A const char *path;
1N/A u_int32_t flags;
1N/A int mode;
1N/A size_t pagesize;
1N/A DB_MPOOL_FINFO *finfop;
1N/A DB_MPOOLFILE **retp;
1N/A{
1N/A int ret;
1N/A
1N/A MP_PANIC_CHECK(dbmp);
1N/A
1N/A /* Validate arguments. */
1N/A if ((ret = __db_fchk(dbmp->dbenv,
1N/A "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)
1N/A return (ret);
1N/A
1N/A /* Require a non-zero pagesize. */
1N/A if (pagesize == 0) {
1N/A __db_err(dbmp->dbenv, "memp_fopen: pagesize not specified");
1N/A return (EINVAL);
1N/A }
1N/A if (finfop != NULL && finfop->clear_len > pagesize)
1N/A return (EINVAL);
1N/A
1N/A return (__memp_fopen(dbmp,
1N/A NULL, path, flags, mode, pagesize, 1, finfop, retp));
1N/A}
1N/A
1N/A/*
1N/A * __memp_fopen --
1N/A * Open a backing file for the memory pool; internal version.
1N/A *
1N/A * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
1N/A * PUBLIC: u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
1N/A */
1N/Aint
1N/A__memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp)
1N/A DB_MPOOL *dbmp;
1N/A MPOOLFILE *mfp;
1N/A const char *path;
1N/A u_int32_t flags;
1N/A int mode, needlock;
1N/A size_t pagesize;
1N/A DB_MPOOL_FINFO *finfop;
1N/A DB_MPOOLFILE **retp;
1N/A{
1N/A DB_ENV *dbenv;
1N/A DB_MPOOLFILE *dbmfp;
1N/A DB_MPOOL_FINFO finfo;
1N/A db_pgno_t last_pgno;
1N/A size_t maxmap;
1N/A u_int32_t mbytes, bytes;
1N/A int ret;
1N/A u_int8_t idbuf[DB_FILE_ID_LEN];
1N/A char *rpath;
1N/A
1N/A dbenv = dbmp->dbenv;
1N/A ret = 0;
1N/A rpath = NULL;
1N/A
1N/A /*
1N/A * If mfp is provided, we take the DB_MPOOL_FINFO information from
1N/A * the mfp. We don't bother initializing everything, because some
1N/A * of them are expensive to acquire. If no mfp is provided and the
1N/A * finfop argument is NULL, we default the values.
1N/A */
1N/A if (finfop == NULL) {
1N/A memset(&finfo, 0, sizeof(finfo));
1N/A if (mfp != NULL) {
1N/A finfo.ftype = mfp->ftype;
1N/A finfo.pgcookie = NULL;
1N/A finfo.fileid = NULL;
1N/A finfo.lsn_offset = mfp->lsn_off;
1N/A finfo.clear_len = mfp->clear_len;
1N/A } else {
1N/A finfo.ftype = 0;
1N/A finfo.pgcookie = NULL;
1N/A finfo.fileid = NULL;
1N/A finfo.lsn_offset = -1;
1N/A finfo.clear_len = 0;
1N/A }
1N/A finfop = &finfo;
1N/A }
1N/A
1N/A /* Allocate and initialize the per-process structure. */
1N/A if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0)
1N/A return (ret);
1N/A dbmfp->dbmp = dbmp;
1N/A dbmfp->fd = -1;
1N/A dbmfp->ref = 1;
1N/A if (LF_ISSET(DB_RDONLY))
1N/A F_SET(dbmfp, MP_READONLY);
1N/A
1N/A if (path == NULL) {
1N/A if (LF_ISSET(DB_RDONLY)) {
1N/A __db_err(dbenv,
1N/A "memp_fopen: temporary files can't be readonly");
1N/A ret = EINVAL;
1N/A goto err;
1N/A }
1N/A last_pgno = 0;
1N/A } else {
1N/A /* Get the real name for this file and open it. */
1N/A if ((ret = __db_appname(dbenv,
1N/A DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0)
1N/A goto err;
1N/A if ((ret = __db_open(rpath,
1N/A LF_ISSET(DB_CREATE | DB_RDONLY),
1N/A DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {
1N/A __db_err(dbenv, "%s: %s", rpath, strerror(ret));
1N/A goto err;
1N/A }
1N/A
1N/A /*
1N/A * Don't permit files that aren't a multiple of the pagesize,
1N/A * and find the number of the last page in the file, all the
1N/A * time being careful not to overflow 32 bits.
1N/A *
1N/A * !!!
1N/A * We can't use off_t's here, or in any code in the mainline
1N/A * library for that matter. (We have to use them in the os
1N/A * stubs, of course, as there are system calls that take them
1N/A * as arguments.) The reason is that some customers build in
1N/A * environments where an off_t is 32-bits, but still run where
1N/A * offsets are 64-bits, and they pay us a lot of money.
1N/A */
1N/A if ((ret = __os_ioinfo(rpath,
1N/A dbmfp->fd, &mbytes, &bytes, NULL)) != 0) {
1N/A __db_err(dbenv, "%s: %s", rpath, strerror(ret));
1N/A goto err;
1N/A }
1N/A
1N/A /* Page sizes have to be a power-of-two, ignore mbytes. */
1N/A if (bytes % pagesize != 0) {
1N/A __db_err(dbenv,
1N/A "%s: file size not a multiple of the pagesize",
1N/A rpath);
1N/A ret = EINVAL;
1N/A goto err;
1N/A }
1N/A
1N/A last_pgno = mbytes * (MEGABYTE / pagesize);
1N/A last_pgno += bytes / pagesize;
1N/A
1N/A /* Correction: page numbers are zero-based, not 1-based. */
1N/A if (last_pgno != 0)
1N/A --last_pgno;
1N/A
1N/A /*
1N/A * Get the file id if we weren't given one. Generated file id's
1N/A * don't use timestamps, otherwise there'd be no chance of any
1N/A * other process joining the party.
1N/A */
1N/A if (finfop->fileid == NULL) {
1N/A if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0)
1N/A goto err;
1N/A finfop->fileid = idbuf;
1N/A }
1N/A }
1N/A
1N/A /*
1N/A * If we weren't provided an underlying shared object to join with,
1N/A * find/allocate the shared file objects. Also allocate space for
1N/A * for the per-process thread lock.
1N/A */
1N/A if (needlock)
1N/A LOCKREGION(dbmp);
1N/A
1N/A if (mfp == NULL)
1N/A ret = __memp_mf_open(dbmp,
1N/A path, pagesize, last_pgno, finfop, &mfp);
1N/A else {
1N/A ++mfp->ref;
1N/A ret = 0;
1N/A }
1N/A if (ret == 0 &&
1N/A F_ISSET(dbmp, MP_LOCKHANDLE) && (ret =
1N/A __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0)
1N/A LOCKINIT(dbmp, dbmfp->mutexp);
1N/A
1N/A if (needlock)
1N/A UNLOCKREGION(dbmp);
1N/A if (ret != 0)
1N/A goto err;
1N/A
1N/A dbmfp->mfp = mfp;
1N/A
1N/A /*
1N/A * If a file:
1N/A * + is read-only
1N/A * + isn't temporary
1N/A * + doesn't require any pgin/pgout support
1N/A * + the DB_NOMMAP flag wasn't set
1N/A * + and is less than mp_mmapsize bytes in size
1N/A *
1N/A * we can mmap it instead of reading/writing buffers. Don't do error
1N/A * checking based on the mmap call failure. We want to do normal I/O
1N/A * on the file if the reason we failed was because the file was on an
1N/A * NFS mounted partition, and we can fail in buffer I/O just as easily
1N/A * as here.
1N/A *
1N/A * XXX
1N/A * We'd like to test to see if the file is too big to mmap. Since we
1N/A * don't know what size or type off_t's or size_t's are, or the largest
1N/A * unsigned integral type is, or what random insanity the local C
1N/A * compiler will perpetrate, doing the comparison in a portable way is
1N/A * flatly impossible. Hope that mmap fails if the file is too large.
1N/A */
1N/A#define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 Mb. */
1N/A if (F_ISSET(mfp, MP_CAN_MMAP)) {
1N/A if (!F_ISSET(dbmfp, MP_READONLY))
1N/A F_CLR(mfp, MP_CAN_MMAP);
1N/A if (path == NULL)
1N/A F_CLR(mfp, MP_CAN_MMAP);
1N/A if (finfop->ftype != 0)
1N/A F_CLR(mfp, MP_CAN_MMAP);
1N/A if (LF_ISSET(DB_NOMMAP))
1N/A F_CLR(mfp, MP_CAN_MMAP);
1N/A maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ?
1N/A DB_MAXMMAPSIZE : dbenv->mp_mmapsize;
1N/A if (mbytes > maxmap / MEGABYTE ||
1N/A (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE))
1N/A F_CLR(mfp, MP_CAN_MMAP);
1N/A }
1N/A dbmfp->addr = NULL;
1N/A if (F_ISSET(mfp, MP_CAN_MMAP)) {
1N/A dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;
1N/A if (__db_mapfile(rpath,
1N/A dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {
1N/A dbmfp->addr = NULL;
1N/A F_CLR(mfp, MP_CAN_MMAP);
1N/A }
1N/A }
1N/A if (rpath != NULL)
1N/A __os_freestr(rpath);
1N/A
1N/A LOCKHANDLE(dbmp, dbmp->mutexp);
1N/A TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
1N/A UNLOCKHANDLE(dbmp, dbmp->mutexp);
1N/A
1N/A *retp = dbmfp;
1N/A return (0);
1N/A
1N/Aerr: /*
1N/A * Note that we do not have to free the thread mutex, because we
1N/A * never get to here after we have successfully allocated it.
1N/A */
1N/A if (rpath != NULL)
1N/A __os_freestr(rpath);
1N/A if (dbmfp->fd != -1)
1N/A (void)__os_close(dbmfp->fd);
1N/A if (dbmfp != NULL)
1N/A __os_free(dbmfp, sizeof(DB_MPOOLFILE));
1N/A return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __memp_mf_open --
1N/A * Open an MPOOLFILE.
1N/A */
1N/Astatic int
1N/A__memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)
1N/A DB_MPOOL *dbmp;
1N/A const char *path;
1N/A size_t pagesize;
1N/A db_pgno_t last_pgno;
1N/A DB_MPOOL_FINFO *finfop;
1N/A MPOOLFILE **retp;
1N/A{
1N/A MPOOLFILE *mfp;
1N/A int ret;
1N/A void *p;
1N/A
1N/A#define ISTEMPORARY (path == NULL)
1N/A
1N/A /*
1N/A * Walk the list of MPOOLFILE's, looking for a matching file.
1N/A * Temporary files can't match previous files.
1N/A */
1N/A if (!ISTEMPORARY)
1N/A for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
1N/A mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
1N/A if (F_ISSET(mfp, MP_TEMP))
1N/A continue;
1N/A if (!memcmp(finfop->fileid,
1N/A R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
1N/A if (finfop->clear_len != mfp->clear_len ||
1N/A finfop->ftype != mfp->ftype ||
1N/A pagesize != mfp->stat.st_pagesize) {
1N/A __db_err(dbmp->dbenv,
1N/A "%s: ftype, clear length or pagesize changed",
1N/A path);
1N/A return (EINVAL);
1N/A }
1N/A
1N/A /* Found it: increment the reference count. */
1N/A ++mfp->ref;
1N/A *retp = mfp;
1N/A return (0);
1N/A }
1N/A }
1N/A
1N/A /* Allocate a new MPOOLFILE. */
1N/A if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
1N/A return (ret);
1N/A *retp = mfp;
1N/A
1N/A /* Initialize the structure. */
1N/A memset(mfp, 0, sizeof(MPOOLFILE));
1N/A mfp->ref = 1;
1N/A mfp->ftype = finfop->ftype;
1N/A mfp->lsn_off = finfop->lsn_offset;
1N/A mfp->clear_len = finfop->clear_len;
1N/A
1N/A /*
1N/A * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
1N/A * we have to know the last page in the file. Figure it out and save
1N/A * it away.
1N/A */
1N/A mfp->stat.st_pagesize = pagesize;
1N/A mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
1N/A
1N/A if (ISTEMPORARY)
1N/A F_SET(mfp, MP_TEMP);
1N/A else {
1N/A /* Copy the file path into shared memory. */
1N/A if ((ret = __memp_alloc(dbmp,
1N/A strlen(path) + 1, &mfp->path_off, &p)) != 0)
1N/A goto err;
1N/A memcpy(p, path, strlen(path) + 1);
1N/A
1N/A /* Copy the file identification string into shared memory. */
1N/A if ((ret = __memp_alloc(dbmp,
1N/A DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
1N/A goto err;
1N/A memcpy(p, finfop->fileid, DB_FILE_ID_LEN);
1N/A
1N/A F_SET(mfp, MP_CAN_MMAP);
1N/A }
1N/A
1N/A /* Copy the page cookie into shared memory. */
1N/A if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {
1N/A mfp->pgcookie_len = 0;
1N/A mfp->pgcookie_off = 0;
1N/A } else {
1N/A if ((ret = __memp_alloc(dbmp,
1N/A finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
1N/A goto err;
1N/A memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);
1N/A mfp->pgcookie_len = finfop->pgcookie->size;
1N/A }
1N/A
1N/A /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
1N/A SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile);
1N/A
1N/A if (0) {
1N/Aerr: if (mfp->path_off != 0)
1N/A __db_shalloc_free(dbmp->addr,
1N/A R_ADDR(dbmp, mfp->path_off));
1N/A if (mfp->fileid_off != 0)
1N/A __db_shalloc_free(dbmp->addr,
1N/A R_ADDR(dbmp, mfp->fileid_off));
1N/A if (mfp != NULL)
1N/A __db_shalloc_free(dbmp->addr, mfp);
1N/A mfp = NULL;
1N/A }
1N/A return (0);
1N/A}
1N/A
1N/A/*
1N/A * memp_fclose --
1N/A * Close a backing file for the memory pool.
1N/A */
1N/Aint
1N/Amemp_fclose(dbmfp)
1N/A DB_MPOOLFILE *dbmfp;
1N/A{
1N/A DB_MPOOL *dbmp;
1N/A int ret, t_ret;
1N/A
1N/A dbmp = dbmfp->dbmp;
1N/A ret = 0;
1N/A
1N/A MP_PANIC_CHECK(dbmp);
1N/A
1N/A for (;;) {
1N/A LOCKHANDLE(dbmp, dbmp->mutexp);
1N/A
1N/A /*
1N/A * We have to reference count DB_MPOOLFILE structures as other
1N/A * threads may be using them. The problem only happens if the
1N/A * application makes a bad design choice. Here's the path:
1N/A *
1N/A * Thread A opens a database.
1N/A * Thread B uses thread A's DB_MPOOLFILE to write a buffer
1N/A * in order to free up memory in the mpool cache.
1N/A * Thread A closes the database while thread B is using the
1N/A * DB_MPOOLFILE structure.
1N/A *
1N/A * By opening all databases before creating the threads, and
1N/A * closing them after the threads have exited, applications
1N/A * get better performance and avoid the problem path entirely.
1N/A *
1N/A * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer
1N/A * is a short-term lock, even in worst case, since we better be
1N/A * the only thread of control using the DB_MPOOLFILE structure
1N/A * to read pages *into* the cache. Wait until we're the only
1N/A * reference holder and remove the DB_MPOOLFILE structure from
1N/A * the list, so nobody else can even find it.
1N/A */
1N/A if (dbmfp->ref == 1) {
1N/A TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);
1N/A break;
1N/A }
1N/A UNLOCKHANDLE(dbmp, dbmp->mutexp);
1N/A
1N/A (void)__os_sleep(1, 0);
1N/A }
1N/A UNLOCKHANDLE(dbmp, dbmp->mutexp);
1N/A
1N/A /* Complain if pinned blocks never returned. */
1N/A if (dbmfp->pinref != 0)
1N/A __db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",
1N/A __memp_fn(dbmfp), (u_long)dbmfp->pinref);
1N/A
1N/A /* Close the underlying MPOOLFILE. */
1N/A (void)__memp_mf_close(dbmp, dbmfp);
1N/A
1N/A /* Discard any mmap information. */
1N/A if (dbmfp->addr != NULL &&
1N/A (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0)
1N/A __db_err(dbmp->dbenv,
1N/A "%s: %s", __memp_fn(dbmfp), strerror(ret));
1N/A
1N/A /* Close the file; temporary files may not yet have been created. */
1N/A if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) {
1N/A __db_err(dbmp->dbenv,
1N/A "%s: %s", __memp_fn(dbmfp), strerror(t_ret));
1N/A if (ret != 0)
1N/A t_ret = ret;
1N/A }
1N/A
1N/A /* Free memory. */
1N/A if (dbmfp->mutexp != NULL) {
1N/A LOCKREGION(dbmp);
1N/A __db_shalloc_free(dbmp->addr, dbmfp->mutexp);
1N/A UNLOCKREGION(dbmp);
1N/A }
1N/A
1N/A /* Discard the DB_MPOOLFILE structure. */
1N/A __os_free(dbmfp, sizeof(DB_MPOOLFILE));
1N/A
1N/A return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __memp_mf_close --
1N/A * Close down an MPOOLFILE.
1N/A */
1N/Astatic int
1N/A__memp_mf_close(dbmp, dbmfp)
1N/A DB_MPOOL *dbmp;
1N/A DB_MPOOLFILE *dbmfp;
1N/A{
1N/A BH *bhp, *nbhp;
1N/A MPOOL *mp;
1N/A MPOOLFILE *mfp;
1N/A size_t mf_offset;
1N/A
1N/A mp = dbmp->mp;
1N/A mfp = dbmfp->mfp;
1N/A
1N/A LOCKREGION(dbmp);
1N/A
1N/A /* If more than a single reference, simply decrement. */
1N/A if (mfp->ref > 1) {
1N/A --mfp->ref;
1N/A goto ret1;
1N/A }
1N/A
1N/A /*
1N/A * Move any BH's held by the file to the free list. We don't free the
1N/A * memory itself because we may be discarding the memory pool, and it's
1N/A * fairly expensive to reintegrate the buffers back into the region for
1N/A * no purpose.
1N/A */
1N/A mf_offset = R_OFFSET(dbmp, mfp);
1N/A for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
1N/A nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
1N/A
1N/A#ifdef DEBUG_NO_DIRTY
1N/A /* Complain if we find any blocks that were left dirty. */
1N/A if (F_ISSET(bhp, BH_DIRTY))
1N/A __db_err(dbmp->dbenv,
1N/A "%s: close: pgno %lu left dirty; ref %lu",
1N/A __memp_fn(dbmfp),
1N/A (u_long)bhp->pgno, (u_long)bhp->ref);
1N/A#endif
1N/A
1N/A if (bhp->mf_offset == mf_offset) {
1N/A if (F_ISSET(bhp, BH_DIRTY)) {
1N/A ++mp->stat.st_page_clean;
1N/A --mp->stat.st_page_dirty;
1N/A }
1N/A __memp_bhfree(dbmp, mfp, bhp, 0);
1N/A SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh);
1N/A }
1N/A }
1N/A
1N/A /* Delete from the list of MPOOLFILEs. */
1N/A SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
1N/A
1N/A /* Free the space. */
1N/A if (mfp->path_off != 0)
1N/A __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
1N/A if (mfp->fileid_off != 0)
1N/A __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
1N/A if (mfp->pgcookie_off != 0)
1N/A __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
1N/A __db_shalloc_free(dbmp->addr, mfp);
1N/A
1N/Aret1: UNLOCKREGION(dbmp);
1N/A return (0);
1N/A}