1N/A/*-
1N/A * See the file LICENSE for redistribution information.
1N/A *
1N/A * Copyright (c) 1996, 1997, 1998
1N/A * Sleepycat Software. All rights reserved.
1N/A */
1N/A#include "config.h"
1N/A
1N/A#ifndef lint
1N/Astatic const char sccsid[] = "@(#)mp_region.c 10.35 (Sleepycat) 12/11/98";
1N/A#endif /* not lint */
1N/A
1N/A#ifndef NO_SYSTEM_INCLUDES
1N/A#include <sys/types.h>
1N/A
1N/A#include <errno.h>
1N/A#include <string.h>
1N/A#endif
1N/A
1N/A#include "db_int.h"
1N/A#include "shqueue.h"
1N/A#include "db_shash.h"
1N/A#include "mp.h"
1N/A#include "common_ext.h"
1N/A
1N/A/*
1N/A * __memp_reg_alloc --
1N/A * Allocate some space in the mpool region, with locking.
1N/A *
1N/A * PUBLIC: int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
1N/A */
1N/Aint
1N/A__memp_reg_alloc(dbmp, len, offsetp, retp)
1N/A DB_MPOOL *dbmp;
1N/A size_t len, *offsetp;
1N/A void *retp;
1N/A{
1N/A int ret;
1N/A
1N/A LOCKREGION(dbmp);
1N/A ret = __memp_alloc(dbmp, len, offsetp, retp);
1N/A UNLOCKREGION(dbmp);
1N/A return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __memp_alloc --
1N/A * Allocate some space in the mpool region.
1N/A *
1N/A * PUBLIC: int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
1N/A */
1N/Aint
1N/A__memp_alloc(dbmp, len, offsetp, retp)
1N/A DB_MPOOL *dbmp;
1N/A size_t len, *offsetp;
1N/A void *retp;
1N/A{
1N/A BH *bhp, *nbhp;
1N/A MPOOL *mp;
1N/A MPOOLFILE *mfp;
1N/A size_t fsize, total;
1N/A int nomore, restart, ret, wrote;
1N/A void *p;
1N/A
1N/A mp = dbmp->mp;
1N/A
1N/A nomore = 0;
1N/Aalloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
1N/A if (offsetp != NULL)
1N/A *offsetp = R_OFFSET(dbmp, p);
1N/A *(void **)retp = p;
1N/A return (0);
1N/A }
1N/A if (nomore) {
1N/A __db_err(dbmp->dbenv,
1N/A "Unable to allocate %lu bytes from mpool shared region: %s\n",
1N/A (u_long)len, strerror(ret));
1N/A return (ret);
1N/A }
1N/A
1N/A /* Look for a buffer on the free list that's the right size. */
1N/A for (bhp =
1N/A SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
1N/A nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
1N/A
1N/A if (__db_shsizeof(bhp) == len) {
1N/A SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
1N/A if (offsetp != NULL)
1N/A *offsetp = R_OFFSET(dbmp, bhp);
1N/A *(void **)retp = bhp;
1N/A return (0);
1N/A }
1N/A }
1N/A
1N/A /* Discard from the free list until we've freed enough memory. */
1N/A total = 0;
1N/A for (bhp =
1N/A SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
1N/A nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
1N/A
1N/A SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
1N/A __db_shalloc_free(dbmp->addr, bhp);
1N/A --mp->stat.st_page_clean;
1N/A
1N/A /*
1N/A * Retry as soon as we've freed up sufficient space. If we
1N/A * will have to coalesce memory to satisfy the request, don't
1N/A * try until it's likely (possible?) that we'll succeed.
1N/A */
1N/A total += fsize = __db_shsizeof(bhp);
1N/A if (fsize >= len || total >= 3 * len)
1N/A goto alloc;
1N/A }
1N/A
1N/Aretry: /* Find a buffer we can flush; pure LRU. */
1N/A restart = total = 0;
1N/A for (bhp =
1N/A SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
1N/A nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
1N/A
1N/A /* Ignore pinned or locked (I/O in progress) buffers. */
1N/A if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
1N/A continue;
1N/A
1N/A /* Find the associated MPOOLFILE. */
1N/A mfp = R_ADDR(dbmp, bhp->mf_offset);
1N/A
1N/A /*
1N/A * Write the page if it's dirty.
1N/A *
1N/A * If we wrote the page, fall through and free the buffer. We
1N/A * don't have to rewalk the list to acquire the buffer because
1N/A * it was never available for any other process to modify it.
1N/A * If we didn't write the page, but we discarded and reacquired
1N/A * the region lock, restart the buffer list walk. If we neither
1N/A * wrote the buffer nor discarded the region lock, continue down
1N/A * the buffer list.
1N/A */
1N/A if (F_ISSET(bhp, BH_DIRTY)) {
1N/A ++bhp->ref;
1N/A if ((ret = __memp_bhwrite(dbmp,
1N/A mfp, bhp, &restart, &wrote)) != 0)
1N/A return (ret);
1N/A --bhp->ref;
1N/A
1N/A /*
1N/A * It's possible that another process wants this buffer
1N/A * and incremented the ref count while we were writing
1N/A * it.
1N/A */
1N/A if (bhp->ref != 0)
1N/A goto retry;
1N/A
1N/A if (wrote)
1N/A ++mp->stat.st_rw_evict;
1N/A else {
1N/A if (restart)
1N/A goto retry;
1N/A continue;
1N/A }
1N/A } else
1N/A ++mp->stat.st_ro_evict;
1N/A
1N/A /*
1N/A * Check to see if the buffer is the size we're looking for.
1N/A * If it is, simply reuse it.
1N/A */
1N/A total += fsize = __db_shsizeof(bhp);
1N/A if (fsize == len) {
1N/A __memp_bhfree(dbmp, mfp, bhp, 0);
1N/A
1N/A if (offsetp != NULL)
1N/A *offsetp = R_OFFSET(dbmp, bhp);
1N/A *(void **)retp = bhp;
1N/A return (0);
1N/A }
1N/A
1N/A /* Free the buffer. */
1N/A __memp_bhfree(dbmp, mfp, bhp, 1);
1N/A
1N/A /*
1N/A * Retry as soon as we've freed up sufficient space. If we
1N/A * have to coalesce of memory to satisfy the request, don't
1N/A * try until it's likely (possible?) that we'll succeed.
1N/A */
1N/A if (fsize >= len || total >= 3 * len)
1N/A goto alloc;
1N/A
1N/A /* Restart the walk if we discarded the region lock. */
1N/A if (restart)
1N/A goto retry;
1N/A }
1N/A nomore = 1;
1N/A goto alloc;
1N/A}
1N/A
1N/A/*
1N/A * __memp_ropen --
1N/A * Attach to, and optionally create, the mpool region.
1N/A *
1N/A * PUBLIC: int __memp_ropen
1N/A * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
1N/A */
1N/Aint
1N/A__memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
1N/A DB_MPOOL *dbmp;
1N/A const char *path;
1N/A size_t cachesize;
1N/A int mode, is_private;
1N/A u_int32_t flags;
1N/A{
1N/A MPOOL *mp;
1N/A size_t rlen;
1N/A int defcache, ret;
1N/A
1N/A /*
1N/A * Unlike other DB subsystems, mpool can't simply grow the region
1N/A * because it returns pointers into the region to its clients. To
1N/A * "grow" the region, we'd have to allocate a new region and then
1N/A * store a region number in the structures that reference regional
1N/A * objects. It's reasonable that we fail regardless, as clients
1N/A * shouldn't have every page in the region pinned, so the only
1N/A * "failure" mode should be a performance penalty because we don't
1N/A * find a page in the cache that we'd like to have found.
1N/A *
1N/A * Up the user's cachesize by 25% to account for our overhead.
1N/A */
1N/A defcache = 0;
1N/A if (cachesize < DB_CACHESIZE_MIN)
1N/A if (cachesize == 0) {
1N/A defcache = 1;
1N/A cachesize = DB_CACHESIZE_DEF;
1N/A } else
1N/A cachesize = DB_CACHESIZE_MIN;
1N/A rlen = cachesize + cachesize / 4;
1N/A
1N/A /*
1N/A * Map in the region.
1N/A *
1N/A * If it's a private mpool, use malloc, it's a lot faster than
1N/A * instantiating a region.
1N/A */
1N/A dbmp->reginfo.dbenv = dbmp->dbenv;
1N/A dbmp->reginfo.appname = DB_APP_NONE;
1N/A if (path == NULL)
1N/A dbmp->reginfo.path = NULL;
1N/A else
1N/A if ((ret = __os_strdup(path, &dbmp->reginfo.path)) != 0)
1N/A return (ret);
1N/A dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
1N/A dbmp->reginfo.mode = mode;
1N/A dbmp->reginfo.size = rlen;
1N/A dbmp->reginfo.dbflags = flags;
1N/A dbmp->reginfo.flags = 0;
1N/A if (defcache)
1N/A F_SET(&dbmp->reginfo, REGION_SIZEDEF);
1N/A
1N/A /*
1N/A * If we're creating a temporary region, don't use any standard
1N/A * naming.
1N/A */
1N/A if (is_private) {
1N/A dbmp->reginfo.appname = DB_APP_TMP;
1N/A dbmp->reginfo.file = NULL;
1N/A F_SET(&dbmp->reginfo, REGION_PRIVATE);
1N/A }
1N/A
1N/A if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
1N/A if (dbmp->reginfo.path != NULL)
1N/A __os_freestr(dbmp->reginfo.path);
1N/A return (ret);
1N/A }
1N/A
1N/A /*
1N/A * The MPOOL structure is first in the region, the rest of the region
1N/A * is free space.
1N/A */
1N/A dbmp->mp = dbmp->reginfo.addr;
1N/A dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
1N/A
1N/A /* Initialize a created region. */
1N/A if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
1N/A mp = dbmp->mp;
1N/A SH_TAILQ_INIT(&mp->bhq);
1N/A SH_TAILQ_INIT(&mp->bhfq);
1N/A SH_TAILQ_INIT(&mp->mpfq);
1N/A
1N/A __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
1N/A
1N/A /*
1N/A * Assume we want to keep the hash chains with under 10 pages
1N/A * on each chain. We don't know the pagesize in advance, and
1N/A * it may differ for different files. Use a pagesize of 1K for
1N/A * the calculation -- we walk these chains a lot, they should
1N/A * be short.
1N/A */
1N/A mp->htab_buckets =
1N/A __db_tablesize((cachesize / (1 * 1024)) / 10);
1N/A
1N/A /* Allocate hash table space and initialize it. */
1N/A if ((ret = __db_shalloc(dbmp->addr,
1N/A mp->htab_buckets * sizeof(DB_HASHTAB),
1N/A 0, &dbmp->htab)) != 0)
1N/A goto err;
1N/A __db_hashinit(dbmp->htab, mp->htab_buckets);
1N/A mp->htab = R_OFFSET(dbmp, dbmp->htab);
1N/A
1N/A ZERO_LSN(mp->lsn);
1N/A mp->lsn_cnt = 0;
1N/A
1N/A memset(&mp->stat, 0, sizeof(mp->stat));
1N/A mp->stat.st_cachesize = cachesize;
1N/A
1N/A mp->flags = 0;
1N/A }
1N/A
1N/A /* Get the local hash table address. */
1N/A dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
1N/A
1N/A UNLOCKREGION(dbmp);
1N/A return (0);
1N/A
1N/Aerr: UNLOCKREGION(dbmp);
1N/A (void)__db_rdetach(&dbmp->reginfo);
1N/A if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
1N/A (void)memp_unlink(path, 1, dbmp->dbenv);
1N/A
1N/A if (dbmp->reginfo.path != NULL)
1N/A __os_freestr(dbmp->reginfo.path);
1N/A return (ret);
1N/A}