1N/A/*-
1N/A * See the file LICENSE for redistribution information.
1N/A *
1N/A * Copyright (c) 1996, 1997, 1998
1N/A * Sleepycat Software. All rights reserved.
1N/A */
1N/A
1N/A#include "config.h"
1N/A
1N/A#ifndef lint
1N/Astatic const char sccsid[] = "@(#)os_map.c 10.24 (Sleepycat) 10/12/98";
1N/A#endif /* not lint */
1N/A
1N/A#ifndef NO_SYSTEM_INCLUDES
1N/A#include <sys/types.h>
1N/A#ifdef HAVE_MMAP
1N/A#include <sys/mman.h>
1N/A#endif
1N/A
1N/A#ifdef HAVE_SHMGET
1N/A#include <sys/ipc.h>
1N/A#include <sys/shm.h>
1N/A#endif
1N/A
1N/A#include <errno.h>
1N/A#include <string.h>
1N/A#endif
1N/A
1N/A#include "db_int.h"
1N/A#include "os_jump.h"
1N/A#include "common_ext.h"
1N/A
1N/A#ifdef HAVE_MMAP
1N/Astatic int __os_map __P((char *, int, size_t, int, int, int, void **));
1N/A#endif
1N/A#ifdef HAVE_SHMGET
1N/Astatic int __os_shmget __P((REGINFO *));
1N/A#endif
1N/A
1N/A/*
1N/A * __db_mapanon_ok --
1N/A * Return if this OS can support anonymous memory regions.
1N/A *
1N/A * PUBLIC: int __db_mapanon_ok __P((int));
1N/A */
1N/Aint
1N/A__db_mapanon_ok(need_names)
1N/A int need_names;
1N/A{
1N/A int ret;
1N/A
1N/A ret = EINVAL;
1N/A
1N/A /*
1N/A * If we don't have spinlocks, we have to have a file descriptor
1N/A * for fcntl(2) locking, which implies using mmap(2) to map in a
1N/A * regular file. Theoretically, we could probably find ways to
1N/A * get a file descriptor to lock other types of shared regions,
1N/A * but I don't see any reason to do so.
1N/A *
1N/A * If need_names is set, the application wants to share anonymous
1N/A * memory among multiple processes, so we have to have a way to
1N/A * name it. This requires shmget(2), on UNIX systems.
1N/A */
1N/A#ifdef HAVE_SPINLOCKS
1N/A#ifdef HAVE_SHMGET
1N/A ret = 0;
1N/A#endif
1N/A#ifdef HAVE_MMAP
1N/A#ifdef MAP_ANON
1N/A if (!need_names)
1N/A ret = 0;
1N/A#endif
1N/A#ifdef MAP_ANONYMOUS
1N/A if (!need_names)
1N/A ret = 0;
1N/A#endif
1N/A#else
1N/A COMPQUIET(need_names, 0);
1N/A#endif /* HAVE_MMAP */
1N/A#endif /* HAVE_SPINLOCKS */
1N/A
1N/A return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __db_mapinit --
1N/A * Return if shared regions need to be initialized.
1N/A *
1N/A * PUBLIC: int __db_mapinit __P((void));
1N/A */
1N/Aint
1N/A__db_mapinit()
1N/A{
1N/A /*
1N/A * Historically, some systems required that all of the bytes of the
1N/A * region be written before it could be mmapped and accessed randomly.
1N/A * We have the option of setting REGION_INIT_NEEDED at configuration
1N/A * time if we're running on one of those systems.
1N/A */
1N/A#ifdef REGION_INIT_NEEDED
1N/A return (1);
1N/A#else
1N/A return (0);
1N/A#endif
1N/A}
1N/A
1N/A/*
1N/A * __db_mapregion --
1N/A * Attach to a shared memory region.
1N/A *
1N/A * PUBLIC: int __db_mapregion __P((char *, REGINFO *));
1N/A */
1N/Aint
1N/A__db_mapregion(path, infop)
1N/A char *path;
1N/A REGINFO *infop;
1N/A{
1N/A int called, ret;
1N/A
1N/A called = 0;
1N/A ret = EINVAL;
1N/A
1N/A /* If the user replaces the map call, call through their interface. */
1N/A if (__db_jump.j_map != NULL) {
1N/A F_SET(infop, REGION_HOLDINGSYS);
1N/A return (__db_jump.j_map(path, infop->fd, infop->size,
1N/A 1, F_ISSET(infop, REGION_ANONYMOUS), 0, &infop->addr));
1N/A }
1N/A
1N/A if (F_ISSET(infop, REGION_ANONYMOUS)) {
1N/A /*
1N/A * !!!
1N/A * If we're creating anonymous regions:
1N/A *
1N/A * If it's private, we use mmap(2). The problem with using
1N/A * shmget(2) is that we may be creating a region of which the
1N/A * application isn't aware, and if the application crashes
1N/A * we'll have no way to remove the system resources for the
1N/A * region.
1N/A *
1N/A * If it's not private, we use the shmget(2) interface if it's
1N/A * available, because it allows us to name anonymous memory.
1N/A * If shmget(2) isn't available, use the mmap(2) calls.
1N/A *
1N/A * In the case of anonymous memory, using mmap(2) means the
1N/A * memory isn't named and only the single process and its
1N/A * threads can access the region.
1N/A */
1N/A#ifdef HAVE_MMAP
1N/A#ifdef MAP_ANON
1N/A#define HAVE_MMAP_ANONYMOUS 1
1N/A#else
1N/A#ifdef MAP_ANONYMOUS
1N/A#define HAVE_MMAP_ANONYMOUS 1
1N/A#endif
1N/A#endif
1N/A#endif
1N/A#ifdef HAVE_MMAP_ANONYMOUS
1N/A if (!called && F_ISSET(infop, REGION_PRIVATE)) {
1N/A called = 1;
1N/A ret = __os_map(path,
1N/A infop->fd, infop->size, 1, 1, 0, &infop->addr);
1N/A }
1N/A#endif
1N/A#ifdef HAVE_SHMGET
1N/A if (!called) {
1N/A called = 1;
1N/A ret = __os_shmget(infop);
1N/A }
1N/A#endif
1N/A#ifdef HAVE_MMAP
1N/A /*
1N/A * If we're trying to join an unnamed anonymous region, fail --
1N/A * that's not possible.
1N/A */
1N/A if (!called) {
1N/A called = 1;
1N/A
1N/A if (!F_ISSET(infop, REGION_CREATED)) {
1N/A __db_err(infop->dbenv,
1N/A "cannot join region in unnamed anonymous memory");
1N/A return (EINVAL);
1N/A }
1N/A
1N/A ret = __os_map(path,
1N/A infop->fd, infop->size, 1, 1, 0, &infop->addr);
1N/A }
1N/A#endif
1N/A } else {
1N/A /*
1N/A * !!!
1N/A * If we're creating normal regions, we use the mmap(2)
1N/A * interface if it's available because it's POSIX 1003.1
1N/A * standard and we trust it more than we do shmget(2).
1N/A */
1N/A#ifdef HAVE_MMAP
1N/A if (!called) {
1N/A called = 1;
1N/A
1N/A /* Mmap(2) regions that aren't anonymous can grow. */
1N/A F_SET(infop, REGION_CANGROW);
1N/A
1N/A ret = __os_map(path,
1N/A infop->fd, infop->size, 1, 0, 0, &infop->addr);
1N/A }
1N/A#endif
1N/A#ifdef HAVE_SHMGET
1N/A if (!called) {
1N/A called = 1;
1N/A ret = __os_shmget(infop);
1N/A }
1N/A#endif
1N/A }
1N/A return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __db_unmapregion --
1N/A * Detach from the shared memory region.
1N/A *
1N/A * PUBLIC: int __db_unmapregion __P((REGINFO *));
1N/A */
1N/Aint
1N/A__db_unmapregion(infop)
1N/A REGINFO *infop;
1N/A{
1N/A int called, ret;
1N/A
1N/A called = 0;
1N/A ret = EINVAL;
1N/A
1N/A if (__db_jump.j_unmap != NULL)
1N/A return (__db_jump.j_unmap(infop->addr, infop->size));
1N/A
1N/A#ifdef HAVE_SHMGET
1N/A if (infop->segid != INVALID_SEGID) {
1N/A called = 1;
1N/A ret = shmdt(infop->addr) ? errno : 0;
1N/A }
1N/A#endif
1N/A#ifdef HAVE_MMAP
1N/A if (!called) {
1N/A called = 1;
1N/A ret = munmap(infop->addr, infop->size) ? errno : 0;
1N/A }
1N/A#endif
1N/A return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __db_unlinkregion --
1N/A * Remove the shared memory region.
1N/A *
1N/A * PUBLIC: int __db_unlinkregion __P((char *, REGINFO *));
1N/A */
1N/Aint
1N/A__db_unlinkregion(name, infop)
1N/A char *name;
1N/A REGINFO *infop;
1N/A{
1N/A int called, ret;
1N/A
1N/A called = 0;
1N/A ret = EINVAL;
1N/A
1N/A if (__db_jump.j_runlink != NULL)
1N/A return (__db_jump.j_runlink(name));
1N/A
1N/A#ifdef HAVE_SHMGET
1N/A if (infop->segid != INVALID_SEGID) {
1N/A called = 1;
1N/A ret = shmctl(infop->segid, IPC_RMID, NULL) ? errno : 0;
1N/A }
1N/A#endif
1N/A#ifdef HAVE_MMAP
1N/A COMPQUIET(infop, NULL);
1N/A if (!called) {
1N/A called = 1;
1N/A ret = 0;
1N/A }
1N/A#endif
1N/A return (ret);
1N/A}
1N/A
1N/A/*
1N/A * __db_mapfile --
1N/A * Map in a shared memory file.
1N/A *
1N/A * PUBLIC: int __db_mapfile __P((char *, int, size_t, int, void **));
1N/A */
1N/Aint
1N/A__db_mapfile(path, fd, len, is_rdonly, addr)
1N/A char *path;
1N/A int fd, is_rdonly;
1N/A size_t len;
1N/A void **addr;
1N/A{
1N/A if (__db_jump.j_map != NULL)
1N/A return (__db_jump.j_map(path, fd, len, 0, 0, is_rdonly, addr));
1N/A
1N/A#ifdef HAVE_MMAP
1N/A return (__os_map(path, fd, len, 0, 0, is_rdonly, addr));
1N/A#else
1N/A return (EINVAL);
1N/A#endif
1N/A}
1N/A
1N/A/*
1N/A * __db_unmapfile --
1N/A * Unmap the shared memory file.
1N/A *
1N/A * PUBLIC: int __db_unmapfile __P((void *, size_t));
1N/A */
1N/Aint
1N/A__db_unmapfile(addr, len)
1N/A void *addr;
1N/A size_t len;
1N/A{
1N/A if (__db_jump.j_unmap != NULL)
1N/A return (__db_jump.j_unmap(addr, len));
1N/A
1N/A#ifdef HAVE_MMAP
1N/A return (munmap(addr, len) ? errno : 0);
1N/A#else
1N/A return (EINVAL);
1N/A#endif
1N/A}
1N/A
1N/A#ifdef HAVE_MMAP
1N/A/*
1N/A * __os_map --
1N/A * Call the mmap(2) function.
1N/A */
1N/Astatic int
1N/A__os_map(path, fd, len, is_region, is_anonymous, is_rdonly, addr)
1N/A char *path;
1N/A int fd, is_region, is_anonymous, is_rdonly;
1N/A size_t len;
1N/A void **addr;
1N/A{
1N/A void *p;
1N/A int flags, prot;
1N/A
1N/A COMPQUIET(path, NULL);
1N/A
1N/A /*
1N/A * If it's read-only, it's private, and if it's not, it's shared.
1N/A * Don't bother with an additional parameter.
1N/A */
1N/A flags = is_rdonly ? MAP_PRIVATE : MAP_SHARED;
1N/A
1N/A if (is_region && is_anonymous) {
1N/A /*
1N/A * BSD derived systems use MAP_ANON; Digital Unix and HP/UX
1N/A * use MAP_ANONYMOUS.
1N/A */
1N/A#ifdef MAP_ANON
1N/A flags |= MAP_ANON;
1N/A#endif
1N/A#ifdef MAP_ANONYMOUS
1N/A flags |= MAP_ANONYMOUS;
1N/A#endif
1N/A fd = -1;
1N/A }
1N/A#ifdef MAP_FILE
1N/A if (!is_region || !is_anonymous) {
1N/A /*
1N/A * Historically, MAP_FILE was required for mapping regular
1N/A * files, even though it was the default. Some systems have
1N/A * it, some don't, some that have it set it to 0.
1N/A */
1N/A flags |= MAP_FILE;
1N/A }
1N/A#endif
1N/A
1N/A /*
1N/A * I know of no systems that implement the flag to tell the system
1N/A * that the region contains semaphores, but it's not an unreasonable
1N/A * thing to do, and has been part of the design since forever. I
1N/A * don't think anyone will object, but don't set it for read-only
1N/A * files, it doesn't make sense.
1N/A */
1N/A#ifdef MAP_HASSEMAPHORE
1N/A if (!is_rdonly)
1N/A flags |= MAP_HASSEMAPHORE;
1N/A#endif
1N/A
1N/A prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE);
1N/A
1N/A/*
1N/A * XXX
1N/A * Work around a bug in the VMS V7.1 mmap() implementation. To map a file
1N/A * into memory on VMS it needs to be opened in a certain way, originally.
1N/A * To get the file opened in that certain way, the VMS mmap() closes the
1N/A * file and re-opens it. When it does this, it doesn't flush any caches
1N/A * out to disk before closing. The problem this causes us is that when the
1N/A * memory cache doesn't get written out, the file isn't big enough to match
1N/A * the memory chunk and the mmap() call fails. This call to fsync() fixes
1N/A * the problem. DEC thinks this isn't a bug because of language in XPG5
1N/A * discussing user responsibility for on-disk and in-memory synchronization.
1N/A */
1N/A#ifdef VMS
1N/A if (__os_fsync(fd) == -1)
1N/A return(errno);
1N/A#endif
1N/A
1N/A /* MAP_FAILED was not defined in early mmap implementations. */
1N/A#ifndef MAP_FAILED
1N/A#define MAP_FAILED -1
1N/A#endif
1N/A if ((p =
1N/A mmap(NULL, len, prot, flags, fd, (off_t)0)) == (void *)MAP_FAILED)
1N/A return (errno);
1N/A
1N/A *addr = p;
1N/A return (0);
1N/A}
1N/A#endif
1N/A
1N/A#ifdef HAVE_SHMGET
1N/A/*
1N/A * __os_shmget --
1N/A * Call the shmget(2) family of functions.
1N/A */
1N/Astatic int
1N/A__os_shmget(infop)
1N/A REGINFO *infop;
1N/A{
1N/A if (F_ISSET(infop, REGION_CREATED) &&
1N/A (infop->segid = shmget(0, infop->size, IPC_PRIVATE | 0600)) == -1)
1N/A return (errno);
1N/A
1N/A if ((infop->addr = shmat(infop->segid, NULL, 0)) == (void *)-1) {
1N/A /*
1N/A * If we're trying to join the region and failing, assume
1N/A * that there was a reboot and the region no longer exists.
1N/A */
1N/A if (!F_ISSET(infop, REGION_CREATED))
1N/A errno = EAGAIN;
1N/A return (errno);
1N/A }
1N/A
1N/A F_SET(infop, REGION_HOLDINGSYS);
1N/A return (0);
1N/A}
1N/A#endif