shm.c revision 824c205f0f8b4a6b02f9096e50cb9e298ddcc0a5
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Inter-Process Communication Shared Memory Facility.
*
*
* Resource controls
* -----------------
*
* Control: zone.max-shm-ids (rc_zone_shmmni)
* Description: Maximum number of shared memory ids allowed a zone.
*
* When shmget() is used to allocate a shared memory segment, one id
* is allocated. If the id allocation doesn't succeed, shmget()
* fails and errno is set to ENOSPC. Upon successful shmctl(,
* IPC_RMID) the id is deallocated.
*
* Control: project.max-shm-ids (rc_project_shmmni)
* Description: Maximum number of shared memory ids allowed a project.
*
* When shmget() is used to allocate a shared memory segment, one id
* is allocated. If the id allocation doesn't succeed, shmget()
* fails and errno is set to ENOSPC. Upon successful shmctl(,
* IPC_RMID) the id is deallocated.
*
* Control: zone.max-shm-memory (rc_zone_shmmax)
* Description: Total amount of shared memory allowed a zone.
*
* When shmget() is used to allocate a shared memory segment, the
* segment's size is allocated against this limit. If the space
* allocation doesn't succeed, shmget() fails and errno is set to
* EINVAL. The size will be deallocated once the last process has
* detached the segment and the segment has been successfully
* shmctl(, IPC_RMID)ed.
*
* Control: project.max-shm-memory (rc_project_shmmax)
* Description: Total amount of shared memory allowed a project.
*
* When shmget() is used to allocate a shared memory segment, the
* segment's size is allocated against this limit. If the space
* allocation doesn't succeed, shmget() fails and errno is set to
* EINVAL. The size will be deallocated once the last process has
* detached the segment and the segment has been successfully
* shmctl(, IPC_RMID)ed.
*/
#include <sys/sysmacros.h>
#include <sys/tuneable.h>
#include <sys/lwpchan_impl.h>
#include <sys/ipc_impl.h>
#include <sys/shm_impl.h>
static void shm_dtor(kipc_perm_t *);
static void shm_rmid(kipc_perm_t *);
static void shm_remove_zone(zoneid_t, void *);
/*
* Semantics for share_page_table and ism_off:
*
*
* Setting share_page_table automatically turns on the SHM_SHARE_MMU (ISM) flag
* in a call to shmat(2). In other words, with share_page_table set, you always
* get ISM, even if say, DISM is specified. It should really be called "ism_on".
*
* Setting ism_off turns off the SHM_SHARE_MMU flag from the flags passed to
* shmat(2).
*
* If both share_page_table and ism_off are set, share_page_table prevails.
*
* Although these tunables should probably be removed, they do have some
* external exposure; as long as they exist, they should at least work sensibly.
*/
int share_page_table;
int ism_off;
/*
* The following tunables are obsolete. Though for compatibility we
* still read and interpret shminfo_shmmax and shminfo_shmmni (see
* Shared Memory facility is through the resource controls described at
* the top of this file.
*/
extern rctl_hndl_t rc_zone_shmmax;
extern rctl_hndl_t rc_zone_shmmni;
extern rctl_hndl_t rc_project_shmmax;
extern rctl_hndl_t rc_project_shmmni;
static ipc_service_t *shm_svc;
static zone_key_t shm_zone_key;
/*
* Module linkage information for the kernel.
*/
static struct sysent ipcshm_sysent = {
4,
#ifdef _SYSCALL32_IMPL
#else /* _SYSCALL32_IMPL */
#endif /* _SYSCALL32_IMPL */
(int (*)())shmsys
};
#ifdef _SYSCALL32_IMPL
static struct sysent ipcshm_sysent32 = {
4,
(int (*)())shmsys
};
#endif /* _SYSCALL32_IMPL */
};
#ifdef _SYSCALL32_IMPL
};
#endif /* _SYSCALL32_IMPL */
static struct modlinkage modlinkage = {
&modlsys,
#ifdef _SYSCALL32_IMPL
#endif
};
int
_init(void)
{
int result;
return (0);
(void) zone_key_delete(shm_zone_key);
return (result);
}
int
_fini(void)
{
return (EBUSY);
}
int
{
}
/*
* Shmat (attach shared segment) system call.
*/
static int
{
int error = 0;
int useISM;
int result;
return (EINVAL);
goto errret;
if ((flags & SHM_RDONLY) == 0 &&
goto errret;
if (spt_invalid(flags)) {
goto errret;
}
if (ism_off)
if (share_page_table) {
}
goto errret;
/*
* If trying to change an existing {D}ISM segment from ISM
* to DISM or vice versa, return error. Note that this
* validation of flags needs to be done after the effect of
* tunables such as ism_off and share_page_table, for
* semantics that are consistent with the tunables' settings.
*/
if (spt_invalid(newsptflags)) {
goto errret;
}
}
/* somewhere to record spt info for final detach */
if (useISM) {
/*
* Handle ISM
*/
n = page_num_pagesizes();
if (n < 2) { /* large pages aren't supported */
goto errret;
}
/*
* Pick a share pagesize to use, if (!isspt(sp)).
* Otherwise use the already chosen page size.
*
* For the initial shmat (!isspt(sp)), where sptcreate is
* called, map_pgsz is called to recommend a [D]ISM pagesize,
* important for systems which offer more than one potential
* [D]ISM pagesize.
* If the shmat is just to attach to an already created
* [D]ISM segment, then use the previously selected page size.
*/
if (share_size == 0) {
goto errret;
}
} else {
}
/*
* For 64 bit amd64, we want to share an entire page table
* if possible. We know (ugh) that there are 512 entries in
* in a page table. The number for 32 bit non-PAE should be
* 1024, but I'm not going to special case that. Note using 512
* won't cause a failure below. It retries with align_hint set
* to share_size
*/
align_hint *= 512;
#endif /* __i386 || __amd64 */
#if defined(__sparcv9)
/*
* If no address has been passed in, and this is a
* 64-bit process, we'll try to find an address
* in the predict-ISM zone.
*/
/*
* We found an address which looks like a
* candidate. We want to round it up, and
* then check that it's a valid user range.
* This assures that we won't fail below.
*/
addr = 0;
}
}
}
#endif /* __sparcv9 */
if (addr == 0) {
for (;;) {
break;
}
goto errret;
}
} else {
/* Use the user-supplied attach address */
/*
* Check that the address range
* 1) is properly aligned
* 2) is correct in unix terms
* 3) is within an unmapped address segment
*/
/* XXX - in SunOS, is sp->shm_segsz */
goto errret;
}
as->a_userlimit);
if (result == RANGE_BADPROT) {
/*
* We try to accomodate processors which
* may not support execute permissions on
* all ISM segments by trying the check
* again but without PROT_EXEC.
*/
as->a_userlimit);
}
if (result != RANGE_OKAY ||
goto errret;
}
}
if (error) {
goto errret;
}
/*
* Ensure we're attaching to an ISM segment with
* fewer or equal permissions than what we're
* allowed. Fail if the segment has more
* permissions than what we're allowed.
*/
goto errret;
}
if (error == 0)
} else {
/*
* Normal case.
*/
if (flags & SHM_RDONLY)
prot &= ~PROT_WRITE;
if (addr == 0) {
/* Let the system pick the attach address */
goto errret;
}
} else {
/* Use the user-supplied attach address */
~(SHMLBA - 1));
/*
* Check that the address range
* 1) is properly aligned
* 2) is correct in unix terms
* 3) is within an unmapped address segment
*/
/* XXX - in SunOS, is sp->shm_segsz */
goto errret;
}
as->a_userlimit);
if (result == RANGE_BADPROT) {
as->a_userlimit);
}
if (result != RANGE_OKAY ||
goto errret;
}
}
/* Initialize the create arguments and map the segment */
}
if (error)
goto errret;
/* record shmem range for the detach */
return (error);
}
static void
{
if (sp->shm_sptinfo) {
}
}
}
/* ARGSUSED */
static void
{
/* nothing to do */
}
/*
* Shmctl system call.
*/
/* ARGSUSED */
static int
{
int error = 0;
struct shmid_ds64 ds64;
/*
* Perform pre- or non-lookup actions (e.g. copyins, RMID).
*/
switch (cmd) {
case IPC_SET:
return (EFAULT);
break;
case IPC_SET64:
return (EFAULT);
break;
case IPC_RMID:
}
return (EINVAL);
switch (cmd) {
/* Set ownership and permissions. */
case IPC_SET:
break;
break;
case IPC_STAT:
break;
return (EFAULT);
return (0);
case IPC_SET64:
break;
break;
case IPC_STAT64:
return (EFAULT);
return (0);
/* Lock segment in memory */
case SHM_LOCK:
break;
"shmctl - couldn't lock %ld pages into memory",
}
}
break;
/* Unlock segment */
case SHM_UNLOCK:
break;
}
}
break;
default:
break;
}
return (error);
}
static void
{
/*
* Discard lwpchan mappings.
*/
/*
* Perform some detach-time accounting.
*/
sp->shm_ismattch--;
}
static int
{
return (EINVAL);
}
return (EINVAL);
}
return (0);
}
/*
* Remove all shared memory segments associated with a given zone.
* Called by zone_shutdown when the zone is halted.
*/
/*ARGSUSED1*/
static void
{
}
/*
* Shmget (create new shmem) system call.
*/
static int
{
int error;
top:
return (error);
/*
* A segment with the requested key exists.
*/
return (EINVAL);
}
} else {
/*
* A new segment should be created.
*/
/*
* Check rsize and the per-project and per-zone limit on
* shared memory. Checking rsize handles both the size == 0
* case and the size < ULONG_MAX & PAGEMASK case (i.e.
* rounding up wraps a size_t).
*/
if (rsize == 0 ||
return (EINVAL);
}
return (ENOMEM);
}
/*
* Store the original user's requested size, in bytes,
* rather than the page-aligned size. The former is
* used for IPC_STAT and shmget() lookups. The latter
* is saved in the anon_map structure and is used for
* calls to the vm layer.
*/
sp->shm_ismattch = 0;
/*
* Check limits one last time, push id into global
* visibility, and update resource usage counts.
*/
(kipc_perm_t *)sp)) {
goto top;
return (error);
}
if ((rctl_test(rc_project_shmmax,
return (EINVAL);
}
}
#ifdef C2_AUDIT
if (audit_active)
#endif
return (0);
}
/*
* shmids system call.
*/
static int
{
}
/*
* System entry point for shmat, shmctl, shmdt, and shmget system calls.
*/
static uintptr_t
{
int error;
switch (opcode) {
case SHMAT:
break;
case SHMCTL:
break;
case SHMDT:
break;
case SHMGET:
break;
case SHMIDS:
break;
default:
break;
}
if (error)
return (r_val);
}
/*
* segacct_t comparator
* This works as expected, with one minor change: the first of two real
* segments with equal addresses is considered to be 'greater than' the
* second. We only return equal when searching using a template, in
* which case we explicitly set the template segment's length to 0
* (which is invalid for a real segment).
*/
static int
shm_sacompar(const void *x, const void *y)
{
return (-1);
return (1);
return (1);
} else {
return (0);
}
return (1);
} else {
return (0);
}
}
/*
* add this record to the segacct list.
*/
static void
{
} else if (tree) {
}
/*
* We can ignore the result of avl_find, as the comparator will
* never return equal for segments with non-zero length. This
* is a necessary hack to get around the fact that we do, in
* fact, have duplicate keys.
*/
}
/*
* Duplicate parent's segacct records in child.
*/
void
{
/*
* We are the only lwp running in the parent so nobody can
* mess with our p_segacct list. Thus it is safe to traverse
* the list without holding p_lock. This is essential because
* we can't hold p_lock during a KM_SLEEP allocation.
*/
sp->shm_ismattch++;
mutex_exit(mp);
}
}
/*
* Detach shared memory segments from exiting process.
*/
void
{
}
/*
* At this time pages should be in memory, so just lock them.
*/
static void
{
panic("lock_again: page not in the system");
/*NOTREACHED*/
}
(void) page_pp_lock(pp, 0, 0);
}
}
/* check if this segment is already locked. */
/*ARGSUSED*/
static int
{
size_t i;
return (0); /* unlocked */
if (VPP_ISPPLOCK(vpp) == 0) {
return (1); /* partially locked */
}
}
return (2); /* locked */
}
/*
* Attach the shared memory segment to the process
* address space and lock the pages.
*/
static int
{
struct segvn_crargs crargs;
struct segvn_data *svd;
/* check if shared memory is already attached */
case 0: /* unlocked */
case 1: /* partially locked */
return (error);
case 2: /* locked */
return (0);
default:
break;
}
}
}
/* attach shm segment to our address space */
return (ENOMEM);
}
/* Initialize the create arguments and map the segment */
if (!error) {
NULL, 0)) == 0) {
}
}
return (error);
}
/*
* Unlock shared memory
*/
static void
{
if (lck) {
panic("shmem_unlock: null app");
/*NOTREACHED*/
}
continue;
}
if (lck) {
panic("shmem_unlock: page not in the system");
/*NOTREACHED*/
}
continue;
}
page_pp_unlock(pp, 0, 0);
}
}
}
/*
* We call this routine when we have removed all references to this
* amp. This means all shmdt()s and the IPC_RMID have been done.
*/
static void
{
/*
* If we are finally deleting the
* shared memory, and if no one did
* the SHM_UNLOCK, we must do it now.
*/
/*
* Free up the anon_map.
*/
} else {
}
}
/*
* Return the shared memory id for the process's virtual address.
* Return SHMID_NONE if addr is not within a SysV shared memory segment.
* Return SHMID_FREE if addr's SysV shared memory segment's id has been freed.
*
* shmgetid() is called from code in /proc with the process locked but
* with pp->p_lock not held. The address space lock is held, so we
* cannot grab pp->p_lock here due to lock-ordering constraints.
* Because of all this, modifications to the p_segacct list must only
* be made after calling prbarrier() to ensure the process is not locked.
* See shmdt() and sa_add(), above. shmgetid() may also be called on a
* thread's own process without the process locked.
*/
int
{
return (SHMID_NONE);
return (SHMID_NONE);
return (SHMID_FREE);
}