/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#include <sys/sysmacros.h>
#include <sys/tuneable.h>
#include <sys/shm_impl.h>
#include <sys/tnf_probe.h>
/*
* # pages used for spt
*/
/*
* segspt_minfree is the memory left for system after ISM
* locked its pages; it is set up to 5% of availrmem in
* sptcreate when ISM is created. ISM should not use more
* than ~90% of availrmem; if it does, then the performance
* of the system may decrease. Machines with large memories may
* be able to use up more memory for ISM so we set the default
* segspt_minfree to 5% (which gives ISM max 95% of availrmem.
* If somebody wants even more memory for ISM (risking hanging
* the system) they can patch the segspt_minfree to smaller number.
*/
static void
{
panic("segspt_badop called");
/*NOTREACHED*/
}
SEGSPT_BADOP(int), /* dup */
SEGSPT_BADOP(int), /* fault */
SEGSPT_BADOP(int), /* setprot */
SEGSPT_BADOP(int), /* checkprot */
SEGSPT_BADOP(int), /* kluster */
SEGSPT_BADOP(int), /* sync */
SEGSPT_BADOP(int), /* lockop */
SEGSPT_BADOP(int), /* getprot */
SEGSPT_BADOP(int), /* gettype */
SEGSPT_BADOP(int), /* getvp */
SEGSPT_BADOP(int), /* advise */
SEGSPT_BADOP(void), /* dump */
SEGSPT_BADOP(int), /* pagelock */
SEGSPT_BADOP(int), /* setpgsz */
SEGSPT_BADOP(int), /* getmemid */
segspt_getpolicy, /* getpolicy */
SEGSPT_BADOP(int), /* capable */
seg_inherit_notsup /* inherit */
};
register char *vec);
segspt_shmadvise, /* advise */
};
enum seg_rw, int);
/*ARGSUSED*/
int
{
int err;
#ifdef DEBUG
#endif
if (segspt_minfree == 0) /* leave min 5% of availrmem for */
if (!hat_supported(HAT_SHARED_PT, (void *)0))
return (EINVAL);
/*
* get a new as for this shared memory segment
*/
/*
* create a shared page table (spt) segment
*/
return (err);
}
return (0);
}
void
{
#ifdef DEBUG
#endif
}
/*
* called from seg_free().
* free (i.e., unlock, unmap, return to free list)
* all the pages in the given seg.
*/
void
{
if (sptd->spt_realsize)
if (sptd->spt_ppa_lckcnt)
sizeof (*sptd->spt_ppa_lckcnt)
}
}
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED*/
static size_t
{
#ifdef lint
#endif
/* page exists, and it's locked. */
}
return (len);
} else {
ulong_t i;
int ret;
return (EINVAL);
}
for (i = 0; i < npages; i++, anon_index++) {
ret = 0;
}
} else {
}
ret |= SEG_PAGE_LOCKED;
}
}
return (len);
}
}
static int
{
/*
* seg.s_size may have been rounded up to the largest page size
* in shmat().
* XXX This should be cleanedup. sptdestroy should take a length
* argument which should be the same as sptcreate. Then
* this rounding would not be needed (or is done in shm.c)
* Only the check for full segment will be needed.
*
* XXX -- shouldn't raddr == 0 always? These tests don't seem
* to be useful at all.
*/
return (0);
} else
return (EINVAL);
}
int
{
int err;
caddr_t a;
/*
* We are holding the a_lock on the underlying dummy as,
* so we can make calls to the HAT layer.
*/
#ifdef DEBUG
#endif
return (err);
}
goto out1;
KM_NOSLEEP)) == NULL)
goto out2;
}
goto out3;
}
/*
* Set policy to affect initial allocation of pages in
* anon_map_createpages()
*/
/*
* We are rounding up the size of the anon array
* on 4 M boundary because we always create 4 M
* of page(s) when locking, faulting pages and we
* don't have to check for all corner cases e.g.
* if there is enough space to allocate 4 M
* page.
*/
/*
* The zone will never be NULL, as a fully created
* shm always has an owning zone.
*/
goto out4;
}
}
sptd->spt_pcachecnt = 0;
return (0);
}
/*
* get array of pages for each anon slot in amp
*/
goto out4;
/* May be partially locked, so, count bytes to charge for locking */
for (i = 0; i < npages; i++)
lockedbytes += PAGESIZE;
if (lockedbytes > 0) {
for (i = 0; i < npages; i++)
page_unlock(ppa[i]);
goto out4;
}
}
/*
* addr is initial address corresponding to the first page on ppa list
*/
for (i = 0; i < npages; i++) {
/* attempt to lock all pages */
/*
* if unable to lock any page, unlock all
* of them and return error
*/
for (j = 0; j < i; j++)
for (i = 0; i < npages; i++)
page_unlock(ppa[i]);
goto out4;
}
}
/*
* Some platforms assume that ISM mappings are HAT_LOAD_LOCK
* for the entire life of the segment. For example platforms
* that do not support Dynamic Reconfiguration.
*/
/*
* Load translations one lare page at a time
* to make sure we don't create mappings bigger than
* segment's size code in case underlying pages
* are shared with segvn's segment that uses bigger
* size code than we do.
*/
}
/*
* On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
* we will leave the pages locked SE_SHARED for the life
* of the ISM segment. This will prevent any calls to
* hat_pageunload() on this ISM segment for those platforms.
*/
if (!(hat_flags & HAT_LOAD_LOCK)) {
/*
* On platforms that support HAT_DYNAMIC_ISM_UNMAP,
* we no longer need to hold the SE_SHARED lock on the pages,
* since L_PAGELOCK and F_SOFTLOCK calls will grab the
* SE_SHARED lock on the pages as necessary.
*/
for (i = 0; i < npages; i++)
page_unlock(ppa[i]);
}
sptd->spt_pcachecnt = 0;
return (0);
out4:
out3:
out2:
out1:
return (err);
}
/*ARGSUSED*/
void
{
int root = 0;
if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) ||
}
}
panic("segspt_free_pages: null app");
/*NOTREACHED*/
}
} else {
== NULL)
continue;
}
/*
* If this platform supports HAT_DYNAMIC_ISM_UNMAP,
* the pages won't be having SE_SHARED lock at this
* point.
*
* On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
* the pages are still held SE_SHARED locked from the
* original segspt_create()
*
* Our goal is to get SE_EXCL lock on each page, remove
* permanent lock on it and invalidate the page.
*/
if (hat_flags == HAT_UNLOAD_UNMAP)
else {
panic("segspt_free_pages: "
"page not locked");
/*NOTREACHED*/
}
if (!page_tryupgrade(pp)) {
}
}
panic("segspt_free_pages: "
"page not in the system");
/*NOTREACHED*/
}
} else {
continue;
}
/*
* It's logical to invalidate the pages here as in most cases
* these were created by segspt.
*/
if (root == 0) {
root = 1;
curnpgs--;
root = 0;
curnpgs = 0;
} else {
curnpgs--;
}
} else {
panic("segspt_free_pages: bad large page");
/*NOTREACHED*/
}
/*
* Before destroying the pages, we need to take care
* of the rctl locked memory accounting. For that
* we need to calculte the unlocked_bytes.
*/
/*LINTED: constant in conditional context */
}
}
if (unlocked_bytes > 0)
}
panic("segspt_free_pages: bad large page");
/*NOTREACHED*/
}
/*
* mark that pages have been released
*/
sptd->spt_realsize = 0;
}
}
/*
* Get memory allocation policy info for specified address in given segment
*/
static lgrp_mem_policy_info_t *
{
/*
* Get anon_map from segspt
*
* Assume that no lock needs to be held on anon_map, since
* it should be protected by its reference count which must be
* nonzero for an existing segment
* Need to grab readers lock on policy tree though
*/
return (NULL);
/*
* Get policy info
*
* Assume starting anon index of 0
*/
return (policy_info);
}
/*
* DISM only.
* Return locked pages over a given range.
*
* We will cache all DISM locked pages and save the pplist for the
* entire segment in the ppa field of the underlying DISM segment structure.
* Later, during a call to segspt_reclaim() we will use this ppa array
* to page_unlock() all of the pages and then we will free this ppa list.
*/
/*ARGSUSED*/
static int
{
/*
* we will be using the underlying sptseg and it's base address
* and length for the caching arguments.
*/
/*
* check if the request is larger than number of pages covered
* by amp
*/
return (ENOTSUP);
}
if (type == L_PAGEUNLOCK) {
/*
* If someone is blocked while unmapping, we purge
* segment page cache and thus reclaim pplist synchronously
* without waiting for seg_pasync_thread. This speeds up
* unmapping in cases where munmap(2) is called, while
* raw async i/o is still in progress or where a thread
* exits on data fault in a multithreaded application.
*/
shmd->shm_softlockcnt > 0)) {
}
return (0);
}
/* The L_PAGELOCK case ... */
/*
* for DISM ppa needs to be rebuild since
* number of locked pages could be changed
*/
return (ENOTSUP);
}
/*
* First try to find pages in segment page cache, without
* holding the segment lock.
*/
return (ENOTSUP);
}
} else {
an_idx++;
}
}
/*
* Since we cache the entire DISM segment, we want to
* set ppp to point to the first slot that corresponds
* to the requested addr, i.e. pg_idx.
*/
return (0);
}
/*
* try to find pages in segment page cache with mutex
*/
return (ENOTSUP);
}
} else {
an_idx++;
}
}
/*
* Since we cache the entire DISM segment, we want to
* set ppp to point to the first slot that corresponds
* to the requested addr, i.e. pg_idx.
*/
return (0);
}
SEGP_FORCE_WIRED) == SEGP_FAIL) {
return (ENOTSUP);
}
/*
* No need to worry about protections because DISM pages are always rw.
*/
/*
* Do we need to build the ppa array?
*/
pl_built = 1;
/*
* Cache only mlocked pages. For large pages
* if one (constituent) page is mlocked
* all pages for that large page
* are cached also. This is for quick
* lookups of ppa array;
*/
if (lpg_cnt == 0) {
lpg_cnt++;
/*
* For a small page, we are done --
* lpg_count is reset to 0 below.
*
* For a large page, we are guaranteed
* to find the anon structures of all
* constituent pages and a non-zero
* lpg_cnt ensures that we don't test
* for mlock for these. We are done
* when lpg_count reaches (npgs + 1).
* If we are not the first constituent
* page, restart at the first one.
*/
continue;
}
}
lpg_cnt = 0;
/*
* availrmem is decremented only
* for unlocked pages
*/
}
an_idx++;
}
if (claim_availrmem) {
claim_availrmem = 0;
goto insert_fail;
} else {
}
}
} else {
/*
* We already have a valid ppa[].
*/
}
/*
* seg_pinsert failed. We return
* ENOTSUP, so that the as_pagelock() code will
* then try the slower F_SOFTLOCK path.
*/
if (pl_built) {
/*
* No one else has referenced the ppa[].
* We created it and we need to destroy it.
*/
}
goto insert_fail;
}
/*
* In either case, we increment softlockcnt on the 'real' segment.
*/
sptd->spt_pcachecnt++;
return (ENOTSUP);
}
} else {
an_idx++;
}
}
/*
* We can now drop the sptd->spt_lock since the ppa[]
* exists and he have incremented pacachecnt.
*/
/*
* Since we cache the entire segment, we want to
* set ppp to point to the first slot that corresponds
* to the requested addr, i.e. pg_idx.
*/
return (0);
/*
* We will only reach this code if we tried and failed.
*
* And we can drop the lock on the dummy seg, once we've failed
* to set up a new ppa[].
*/
if (pl_built) {
if (claim_availrmem) {
}
/*
* We created pl and we need to destroy it.
*/
}
}
if (shmd->shm_softlockcnt <= 0) {
}
}
}
return (ret);
}
/*
* return locked pages over a given range.
*
* We will cache the entire ISM segment and save the pplist for the
* entire segment in the ppa field of the underlying ISM segment structure.
* Later, during a call to segspt_reclaim() we will use this ppa array
* to page_unlock() all of the pages and then we will free this ppa list.
*/
/*ARGSUSED*/
static int
{
/*
* we will be using the underlying sptseg and it's base address
* and length for the caching arguments.
*/
}
/*
* check if the request is larger than number of pages covered
* by amp
*/
return (ENOTSUP);
}
if (type == L_PAGEUNLOCK) {
/*
* If someone is blocked while unmapping, we purge
* segment page cache and thus reclaim pplist synchronously
* without waiting for seg_pasync_thread. This speeds up
* unmapping in cases where munmap(2) is called, while
* raw async i/o is still in progress or where a thread
* exits on data fault in a multithreaded application.
*/
}
return (0);
}
/* The L_PAGELOCK case... */
/*
* First try to find pages in segment page cache, without
* holding the segment lock.
*/
/*
* Since we cache the entire ISM segment, we want to
* set ppp to point to the first slot that corresponds
* to the requested addr, i.e. page_index.
*/
return (0);
}
/*
* try to find pages in segment page cache
*/
/*
* Since we cache the entire segment, we want to
* set ppp to point to the first slot that corresponds
* to the requested addr, i.e. page_index.
*/
return (0);
}
SEGP_FORCE_WIRED) == SEGP_FAIL) {
return (ENOTSUP);
}
/*
* No need to worry about protections because ISM pages
* are always rw.
*/
/*
* Do we need to build the ppa array?
*/
pl_built = 1;
/*
* availrmem is decremented once during anon_swap_adjust()
* and is incremented during the anon_unresv(), which is
* called from shm_rm_amp() when the segment is destroyed.
*/
/* pcachecnt is protected by sptd->spt_lock */
}
goto insert_fail;
}
} else {
/*
* We already have a valid ppa[].
*/
}
/*
* seg_pinsert failed. We return
* ENOTSUP, so that the as_pagelock() code will
* then try the slower F_SOFTLOCK path.
*/
if (pl_built) {
/*
* No one else has referenced the ppa[].
* We created it and we need to destroy it.
*/
}
goto insert_fail;
}
/*
* In either case, we increment softlockcnt on the 'real' segment.
*/
sptd->spt_pcachecnt++;
/*
* We can now drop the sptd->spt_lock since the ppa[]
* exists and he have incremented pacachecnt.
*/
/*
* Since we cache the entire segment, we want to
* set ppp to point to the first slot that corresponds
* to the requested addr, i.e. page_index.
*/
return (0);
/*
* We will only reach this code if we tried and failed.
*
* And we can drop the lock on the dummy seg, once we've failed
* to set up a new ppa[].
*/
if (pl_built) {
/*
* We created pl and we need to destroy it.
*/
while (np) {
np--;
pplist++;
}
}
if (shmd->shm_softlockcnt <= 0) {
}
}
}
return (ret);
}
/*
* purge any cached pages in the I/O page cache
*/
static void
{
}
static int
{
int done = 0;
#ifdef lint
#endif
/*
* Acquire the lock on the dummy seg and destroy the
* ppa array IF this is the last pcachecnt.
*/
if (--sptd->spt_pcachecnt == 0) {
for (i = 0; i < npages; i++) {
continue;
}
hat_setrefmod(pplist[i]);
} else {
hat_setref(pplist[i]);
}
(sptd->spt_ppa_lckcnt[i] == 0))
page_unlock(pplist[i]);
}
}
/*
* we will track the pplist in a segspt specific field
* ppa, that is initialized at the time we add an entry to
* the cache.
*/
done = 1;
}
/*
* If we are pcache async thread or called via seg_ppurge_wiredpp() we
* may not hold AS lock (in this case async argument is not 0). This
* means if softlockcnt drops to 0 after the decrement below address
* space may get freed. We can't allow it since after softlock
* derement to 0 we still need to access as structure for possible
* wakeup of unmap waiters. To prevent the disappearance of as we take
* this segment's shm_segfree_syncmtx. segspt_shmfree() also takes
* this mutex as a barrier to make sure this routine completes before
* segment is freed.
*
* The second complication we have to deal with in async case is a
* possibility of missed wake up of unmap wait thread. When we don't
* hold as lock here we may take a_contents lock before unmap wait
* thread that was first to see softlockcnt was still not 0. As a
* result we'll fail to wake up an unmap wait thread. To avoid this
* race we set nounmapwait flag in as structure if we drop softlockcnt
* to 0 if async is not 0. unmapwait thread
* will not block if this flag is set.
*/
if (async)
/*
* Now decrement softlockcnt.
*/
if (shmd->shm_softlockcnt <= 0) {
if (async)
}
}
}
if (async)
return (done);
}
/*
* Do a F_SOFTUNLOCK call over the range requested.
* The range must have already been F_SOFTLOCK'ed.
*
* The calls to acquire and release the anon map lock mutex were
* removed in order to avoid a deadly embrace during a DR
* memory delete operation. (Eg. DR blocks while waiting for a
* exclusive lock on a page that is being used for kaio; the
* thread that will complete the kaio and call segspt_softunlock
* blocks on the anon map lock; another thread holding the anon
* map lock blocks on another page lock via the segspt_shmfault
* -> page_lookup -> page_lookup_create -> page_lock_es code flow.)
*
* The appropriateness of the removal is based upon the following:
* 1. If we are holding a segment's reader lock and the page is held
* shared, then the corresponding element in anonmap which points to
* anon struct cannot change and there is no need to acquire the
* anonymous map lock.
* 2. Threads in segspt_softunlock have a reader lock on the segment
* and already have the shared page lock, so we are guaranteed that
* the anon map slot cannot change and therefore can call anon_get_ptr()
* without grabbing the anonymous map lock.
* 3. Threads that softlock a shared page break copy-on-write, even if
* its a read. Thus cow faults can be ignored with respect to soft
* unlocking, since the breaking of cow means that the anon slot(s) will
* not be shared.
*/
static void
{
/*
* Some platforms assume that ISM mappings are HAT_LOAD_LOCK
* and therefore their pages are SE_SHARED locked
* for the entire life of the segment.
*/
if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) &&
goto softlock_decrement;
}
/*
* Any thread is free to do a page_find and
* page_unlock() on the pages within this seg.
*
* We are already holding the as->a_lock on the user's
* real segment, but we need to hold the a_lock on the
* underlying dummy as. This is mostly to satisfy the
* underlying HAT layer.
*/
/*
* Use page_find() instead of page_lookup() to
* find the page since we know that it has a
* "shared" lock.
*/
panic("segspt_softunlock: "
"addr %p, ap %p, vp %p, off %llx",
/*NOTREACHED*/
}
hat_setref(pp);
}
}
if (shmd->shm_softlockcnt == 0) {
/*
* All SOFTLOCKS are gone. Wakeup any waiting
* unmappers so they can try again to unmap.
* Check for waiters first without the mutex
* held so we don't always grab the mutex on
* softunlocks.
*/
}
}
}
}
int
{
int error = 0;
return (ENOMEM);
KM_NOSLEEP)) == NULL) {
return (ENOMEM);
}
shmd->shm_lckpgs = 0;
if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
}
}
} else {
}
if (error) {
} else {
}
return (error);
}
int
{
if (shmd->shm_softlockcnt > 0) {
if (reclaim == 1) {
reclaim = 0;
goto retry;
}
return (EAGAIN);
}
#ifdef DEBUG
#endif
return (EINVAL);
}
NULL, 0);
return (0);
}
void
{
/*
* Need to increment refcnt when attaching
* and decrement when detaching because of dup().
*/
}
/*
* Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's
* still working with this segment without holding as lock.
*/
}
/*ARGSUSED*/
int
{
/*
* Shared page table is more than shared mapping.
* Individual process sharing page tables can't change prot
* because there is only one set of page tables.
* This will be allowed after private page table is
* supported.
*/
/* need to return correct status error? */
return (0);
}
{
int i;
int err = 0;
caddr_t a;
#ifdef lint
#endif
/*
* Because of the way spt is implemented
* the realsize of the segment does not have to be
* equal to the segment size itself. The segment size is
* often in multiples of a page size larger than PAGESIZE.
* The realsize is rounded up to the nearest PAGESIZE
* based on what the user requested. This is a bit of
* ungliness that is historical but not easily fixed
* without re-designing the higher levels of ISM.
*/
return (FC_NOMAP);
/*
* For all of the following cases except F_PROT, we need to
* make any necessary adjustments to addr and len
* and get all of the necessary page_t's into an array called ppa[].
*
* The code in shmat() forces base addr and len of ISM segment
* to be aligned to largest page size supported. Therefore,
* we are able to handle F_SOFTLOCK and F_INVAL calls in "large
* pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
* in large pagesize chunks, or else we will screw up the HAT
* layer by calling hat_memload_array() with differing page sizes
* over a given virtual range.
*/
/*
* Now we need to convert from addr in segshm to addr in segspt.
*/
switch (type) {
case F_SOFTLOCK:
/*
* Fall through to the F_INVAL case to load up the hat layer
* entries with the HAT_LOAD_LOCK flag.
*/
/* FALLTHRU */
case F_INVAL:
return (FC_NOMAP);
if (err != 0) {
if (type == F_SOFTLOCK) {
atomic_add_long((ulong_t *)(
}
goto dism_err;
}
a = segspt_addr;
pidx = 0;
if (type == F_SOFTLOCK) {
/*
* Load up the translation keeping it
* locked and don't unlock the page.
*/
}
} else {
/*
* Migrate pages marked for migration
*/
if (lgrp_optimizations())
}
/*
* And now drop the SE_SHARED lock(s).
*/
if (dyn_ism_unmap) {
for (i = 0; i < npages; i++) {
page_unlock(ppa[i]);
}
}
}
if (!dyn_ism_unmap) {
panic("hat_share err in DISM fault");
/* NOTREACHED */
}
for (i = 0; i < npages; i++) {
page_unlock(ppa[i]);
}
}
}
return (err);
case F_SOFTUNLOCK:
/*
* This is a bit ugly, we pass in the real seg pointer,
* but the segspt_addr is the virtual address within the
* dummy seg.
*/
return (0);
case F_PROT:
/*
* This takes care of the unusual case where a user
* allocates a stack in shared memory and a register
* window overflow is written to that stack page before
* it is otherwise modified.
*
* We can get away with this because ISM segments are
* always rw. Other than this unusual case, there
* should be no instances of protection violations.
*/
return (0);
default:
#ifdef DEBUG
panic("segspt_dismfault default type?");
#else
return (FC_NOMAP);
#endif
}
}
{
int i;
caddr_t a;
#ifdef lint
#endif
}
/*
* Because of the way spt is implemented
* the realsize of the segment does not have to be
* equal to the segment size itself. The segment size is
* often in multiples of a page size larger than PAGESIZE.
* The realsize is rounded up to the nearest PAGESIZE
* based on what the user requested. This is a bit of
* ungliness that is historical but not easily fixed
* without re-designing the higher levels of ISM.
*/
return (FC_NOMAP);
/*
* For all of the following cases except F_PROT, we need to
* make any necessary adjustments to addr and len
* and get all of the necessary page_t's into an array called ppa[].
*
* The code in shmat() forces base addr and len of ISM segment
* to be aligned to largest page size supported. Therefore,
* we are able to handle F_SOFTLOCK and F_INVAL calls in "large
* pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
* in large pagesize chunks, or else we will screw up the HAT
* layer by calling hat_memload_array() with differing page sizes
* over a given virtual range.
*/
/*
* Now we need to convert from addr in segshm to addr in segspt.
*/
/*
* And now we may have to adjust npages downward if we have
* exceeded the realsize of the segment or initial anon
* allocations.
*/
switch (type) {
case F_SOFTLOCK:
/*
* availrmem is decremented once during anon_swap_adjust()
* and is incremented during the anon_unresv(), which is
* called from shm_rm_amp() when the segment is destroyed.
*/
/*
* Some platforms assume that ISM pages are SE_SHARED
* locked for the entire life of the segment.
*/
if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0))
return (0);
/*
* Fall through to the F_INVAL case to load up the hat layer
* entries with the HAT_LOAD_LOCK flag.
*/
/* FALLTHRU */
case F_INVAL:
return (FC_NOMAP);
/*
* Some platforms that do NOT support DYNAMIC_ISM_UNMAP
* may still rely on this call to hat_share(). That
* would imply that those hat's can fault on a
* HAT_LOAD_LOCK translation, which would seem
* contradictory.
*/
if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
panic("hat_share error in ISM fault");
/*NOTREACHED*/
}
return (0);
}
/*
* I see no need to lock the real seg,
* here, because all of our work will be on the underlying
* dummy seg.
*
* sptseg_addr and npages now account for large pages.
*/
for (i = 0; i < npages; i++) {
}
/*
* We are already holding the as->a_lock on the user's
* real segment, but we need to hold the a_lock on the
* underlying dummy as. This is mostly to satisfy the
* underlying HAT layer.
*/
a = sptseg_addr;
pidx = 0;
if (type == F_SOFTLOCK) {
/*
* Load up the translation keeping it
* locked and don't unlock the page.
*/
}
} else {
/*
* Migrate pages marked for migration.
*/
if (lgrp_optimizations())
}
/*
* And now drop the SE_SHARED lock(s).
*/
for (i = 0; i < npages; i++)
page_unlock(ppa[i]);
}
return (0);
case F_SOFTUNLOCK:
/*
* This is a bit ugly, we pass in the real seg pointer,
* but the sptseg_addr is the virtual address within the
* dummy seg.
*/
return (0);
case F_PROT:
/*
* This takes care of the unusual case where a user
* allocates a stack in shared memory and a register
* window overflow is written to that stack page before
* it is otherwise modified.
*
* We can get away with this because ISM segments are
* always rw. Other than this unusual case, there
* should be no instances of protection violations.
*/
return (0);
default:
#ifdef DEBUG
#endif
return (FC_NOMAP);
}
}
/*ARGSUSED*/
static faultcode_t
{
return (0);
}
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED*/
static size_t
{
return (0);
}
/*
* duplicate the shared page tables
*/
int
{
int error = 0;
shmd_new->shm_lckpgs = 0;
if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
}
}
return (error);
} else {
}
}
/*ARGSUSED*/
int
{
/*
* ISM segment is always rw.
*/
}
/*
* Return an array of locked large pages, for empty slots allocate
* private zero-filled anon pages.
*/
static int
{
int anon_locked = 0;
ppa_idx = 0;
/*CONSTCOND*/
while (1) {
/*
* If we're currently locked, and we get to a new
* page, unlock our current anon chunk.
*/
anon_locked = 0;
}
if (!anon_locked) {
anon_locked = 1;
}
if (ierr != 0) {
if (ierr > 0) {
goto lpgs_err;
}
break;
}
}
break;
}
/*
* ierr == -1 means we failed to allocate a large page.
* so do a size down operation.
*
* ierr == -2 means some other process that privately shares
* pages with this process has allocated a larger page and we
* need to retry with larger pages. So do a size up
* operation. This relies on the fact that large pages are
* never partially shared i.e. if we share any constituent
* page of a large page with another process we must share the
* entire large page. Note this cannot happen for SOFTLOCK
* case, unless current address (lpaddr) is at the beginning
* of the next page size boundary because the other process
* couldn't have relocated locked pages.
*/
if (segvn_anypgsz) {
} else {
/*
* For faults and segvn_anypgsz == 0
* we need to be careful not to loop forever
* if existing page is found with szc other
* than 0 or seg->s_szc. This could be due
* to page relocations on behalf of DR or
* more likely large page creation. For this
* case simply re-size to existing page's szc
* if returned by anon_map_getpages().
*/
} else {
}
}
}
if (anon_locked) {
}
return (0);
if (anon_locked) {
}
for (j = 0; j < ppa_idx; j++)
page_unlock(ppa[j]);
return (err);
}
/*
* count the number of bytes in a set of spt pages that are currently not
* locked
*/
static rctl_qty_t
{
ulong_t i;
for (i = 0; i < npages; i++) {
}
return (unlocked);
}
extern u_longlong_t randtick(void);
/* Random number with a range [0, n-1], n must be power of two */
#define RAND_P2(n) \
int
{
ulong_t i;
int kernel;
int rv = 0;
/* return the number of bytes actually locked */
*locked = 0;
/*
* To avoid contention on freemem_lock, availrmem and pages_locked
* global counters are updated only every nlck locked pages instead of
* every time. Reserve nlck locks up front and deduct from this
* reservation for each page that requires a lock. When the reservation
* is consumed, reserve again. nlck is randomized, so the competing
* threads do not fall into a cyclic lock contention pattern. When
* memory is low, the lock ahead is disabled, and instead page_pp_lock()
* is used to lock pages.
*/
/* if fewer loops left, decrease nlck */
/*
* Reserve nlck locks up front and deduct from this
* reservation for each page that requires a lock. When
* the reservation is consumed, reserve again.
*/
/* Do not do advance memory reserves */
use_reserved = 0;
} else {
pages_locked += nlck;
}
}
"DISM page lock limit "
"reached on DISM offset 0x%lx\n",
anon_index << PAGESHIFT);
}
use_reserved)) {
break;
}
/* if this is a newly locked page, count it */
nlck--;
}
shmd->shm_lckpgs++;
}
}
}
/* Return unused lock reservation */
pages_locked -= nlck;
}
return (rv);
}
int
{
int kernel;
ulong_t i;
for (i = 0; i < npages; i++, anon_index++) {
/*
* availrmem is decremented only for pages which are not
* in seg pcache, for pages in seg pcache availrmem was
* decremented in _dismpagelock()
*/
/*
* lock page but do not change availrmem, we do it
* ourselves every nlck loops.
*/
if (kernel == 0)
nlck++;
}
shmd->shm_lckpgs--;
}
/*
* To reduce freemem_lock contention, do not update availrmem
* until at least NLCK pages have been unlocked.
* 1. No need to update if nlck is zero
* 2. Always update if the last iteration
*/
pages_locked -= nlck;
nlck = 0;
}
}
return (0);
}
/*ARGSUSED*/
static int
{
ulong_t i;
int sts = 0;
return (0);
}
return (ENOMEM);
}
/*
* A shm's project never changes, so no lock needed.
* The shm has a hold on the project, so it will not go away.
* Since we have a mapping to shm within this zone, we know
* that the zone will not go away.
*/
/*
* Need to align addr and size request if they are not
* aligned so we can always allocate large page(s) however
* we only lock what was requested in initial request.
*/
share_sz);
KM_NOSLEEP)) == NULL) {
return (ENOMEM);
}
/*
* Don't cache any new pages for IO and
* flush any cached pages.
*/
if (sts != 0) {
return (sts);
}
/* enforce locked memory rctl */
mutex_enter(&p->p_lock);
mutex_exit(&p->p_lock);
} else {
mutex_exit(&p->p_lock);
/*
* correct locked count if not all pages could be
* locked
*/
}
}
/*
* unlock pages
*/
for (i = 0; i < a_npages; i++)
page_unlock(ppa[i]);
if (shmd->shm_lckpgs == 0) {
return (0);
}
/*
* Don't cache new IO pages.
*/
}
return (sts);
}
/*ARGSUSED*/
int
{
/*
* ISM segment is always rw.
*/
while (--pgno >= 0)
return (0);
}
/*ARGSUSED*/
{
/* Offset does not matter in ISM memory */
return ((u_offset_t)0);
}
/* ARGSUSED */
int
{
/*
* The shared memory mapping is always MAP_SHARED, SWAP is only
* reserved for DISM
*/
return (MAP_SHARED |
}
/*ARGSUSED*/
int
{
return (0);
}
/*
* We need to wait for pending IO to complete to a DISM segment in order for
* pages to get kicked out of the seg_pcache. 120 seconds should be more
* than enough time to wait.
*/
/*ARGSUSED*/
static int
{
int writer;
return (0);
return (0);
}
/*
* Purge all DISM cached pages
*/
/*
* Drop the AS_LOCK so that other threads can grab it
* in the as_pageunlock path and hopefully get the segment
* kicked out of the seg_pcache. We bump the shm_softlockcnt
* to keep this segment resident.
*/
/*
* Try to wait for pages to get kicked out of the seg_pcache.
*/
ddi_get_lbolt() < end_lbolt) {
break;
}
}
/* Regrab the AS_LOCK and release our hold on the segment */
if (shmd->shm_softlockcnt <= 0) {
}
}
}
int already_set;
/*
* Align address and length to page size of underlying segment
*/
/*
* And now we may have to adjust size downward if we have
* exceeded the realsize of the segment or initial anon
* allocations.
*/
if ((sptseg_addr + size) >
/*
* Set memory allocation policy for this segment
*/
/*
* If random memory allocation policy set already,
* don't bother reapplying it.
*/
return (0);
/*
* Mark any existing pages in the given range for
* migration, flushing the I/O page cache, and using
* underlying segment to calculate anon index and get
* anonmap and vnode pointer from
*/
if (shmd->shm_softlockcnt > 0)
}
return (0);
}
/*ARGSUSED*/
void
{
/* no-op for ISM segment */
}
/*ARGSUSED*/
static faultcode_t
{
return (ENOTSUP);
}
/*
* get a memory ID for an addr in a given segment
*/
static int
{
return (EFAULT);
}
return (ENOMEM);
}
}
return (0);
}
/*
* Get memory allocation policy info for specified address in given segment
*/
static lgrp_mem_policy_info_t *
{
/*
* Get anon_map from segshm
*
* Assume that no lock needs to be held on anon_map, since
* it should be protected by its reference count which must be
* nonzero for an existing segment
* Need to grab readers lock on policy tree though
*/
return (NULL);
/*
* Get policy info
*
* Assume starting anon index of 0
*/
return (policy_info);
}
/*ARGSUSED*/
static int
{
return (0);
}