vm_as.c revision 406882169e00272f14067d948324d690893e6fe3
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
/*
* VM - address spaces.
*/
#include <sys/sysmacros.h>
#include <sys/tnf_probe.h>
#include <vm/seg_kmem.h>
static struct kmem_cache *as_cache;
/*
* Verifying the segment lists is very time-consuming; it may not be
* desirable always to define VERIFY_SEGLIST when DEBUG is set.
*/
#ifdef DEBUG
#define VERIFY_SEGLIST
int do_as_verify = 0;
#endif
/*
* Allocate a new callback data structure entry and fill in the events of
* interest, the address range of interest, and the callback argument.
* Link the entry on the as->a_callbacks list. A callback entry for the
* entire address space may be specified with vaddr = 0 and size = -1.
*
* CALLERS RESPONSIBILITY: If not calling from within the process context for
* the specified as, the caller must guarantee persistence of the specified as
* for the duration of this function (eg. pages being locked within the as
* will guarantee persistence).
*/
int
{
/* callback function and an event are mandatory */
return (EINVAL);
/* Adding a callback after as_free has been called is not allowed */
return (ENOMEM);
/*
* vaddr = 0 and size = -1 is used to indicate that the callback range
* is the entire address space so no rounding is done in that case.
*/
if (size != -1) {
/* check for wraparound */
return (ENOMEM);
} else {
if (vaddr != 0)
return (EINVAL);
}
/* Allocate and initialize a callback entry */
return (EAGAIN);
/* Add the entry to the list */
/*
* The call to this function may lose in a race with
* a pertinent event - eg. a thread does long term memory locking
* but before the callback is added another thread executes as_unmap.
* A broadcast here resolves that.
*/
}
return (0);
}
/*
* Search the callback list for an entry which pertains to arg.
*
* This is called from within the client upon completion of the callback.
* RETURN VALUES:
* AS_CALLBACK_DELETED (callback entry found and deleted)
* AS_CALLBACK_NOTFOUND (no callback entry found - this is ok)
* AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this
* entry will be made in as_do_callbacks)
*
* If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED
* set, it indicates that as_do_callbacks is processing this entry. The
* AS_ALL_EVENT events are cleared in the entry, and a broadcast is made
* to unblock as_do_callbacks, in case it is blocked.
*
* CALLERS RESPONSIBILITY: If not calling from within the process context for
* the specified as, the caller must guarantee persistence of the specified as
* for the duration of this function (eg. pages being locked within the as
* will guarantee persistence).
*/
{
struct as_callback *cb;
continue;
/*
* If the events indicate AS_CALLBACK_CALLED, just clear
* AS_ALL_EVENT in the events field and wakeup the thread
* that may be waiting in as_do_callbacks. as_do_callbacks
* will take care of removing this entry from the list. In
* that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise
* (AS_CALLBACK_CALLED not set), just remove it from the
* list, return the memory and return AS_CALLBACK_DELETED.
*/
/* leave AS_CALLBACK_CALLED */
} else {
}
break;
}
return (rc);
}
/*
* Searches the as callback list for a matching entry.
* Returns a pointer to the first matching callback, or NULL if
* nothing is found.
* This function never sleeps so it is ok to call it with more
* locks held but the (required) a_contents mutex.
*
* See also comment on as_do_callbacks below.
*/
static struct as_callback *
{
struct as_callback *cb;
/*
* If the callback has not already been called, then
* check if events or address range pertains. An event_len
* of zero means do an unconditional callback.
*/
continue;
}
break;
}
return (cb);
}
/*
* Executes a given callback and removes it from the callback list for
* this address space.
* This function may sleep so the caller must drop all locks except
* a_contents before calling this func.
*
* See also comments on as_do_callbacks below.
*/
static void
{
struct as_callback **prevcb;
void *cb_arg;
/*
* the callback function is required to delete the callback
* when the callback function determines it is OK for
* this thread to continue. as_delete_callback will clear
* the AS_ALL_EVENT in the events field when it is deleted.
* If the callback function called as_delete_callback,
* events will already be cleared and there will be no blocking.
*/
}
/*
* This entry needs to be taken off the list. Normally, the
* callback func itself does that, but unfortunately the list
* may have changed while the callback was running because the
* a_contents mutex was dropped and someone else other than the
* callback func itself could have called as_delete_callback,
* so we have to search to find this entry again. The entry
* must have AS_CALLBACK_CALLED, and have the same 'arg'.
*/
continue;
}
break;
}
}
/*
* Check the callback list for a matching event and intersection of
* address range. If there is a match invoke the callback. Skip an entry if:
* - a callback is already in progress for this entry (AS_CALLBACK_CALLED)
* - not event of interest
* - not address range of interest
*
* An event_len of zero indicates a request for an unconditional callback
* (regardless of event), only the AS_CALLBACK_CALLED is checked. The
* a_contents lock must be dropped before a callback, so only one callback
* can be done before returning. Return -1 (true) if a callback was
* executed and removed from the list, else return 0 (false).
*
* The logically separate parts, i.e. finding a matching callback and
* executing a given callback have been separated into two functions
* so that they can be called with different sets of locks held beyond
* the always-required a_contents. as_find_callback does not sleep so
* it is ok to call it if more locks than a_contents (i.e. the a_lock
* rwlock) are held. as_execute_callback on the other hand may sleep
* so all locks beyond a_contents must be dropped by the caller if one
* does not want to end comatose.
*/
static int
{
struct as_callback *cb;
return (-1);
}
return (0);
}
/*
* Search for the segment containing addr. If a segment containing addr
* exists, that segment is returned. If no such segment exists, and
* the list spans addresses greater than addr, then the first segment
* whose base is greater than addr is returned; otherwise, NULL is
* returned unless tail is true, in which case the last element of the
* list is returned.
*
* a_seglast is used to cache the last found segment for repeated
* searches to the same addr (which happens frequently).
*/
struct seg *
{
return (seg);
}
#ifdef VERIFY_SEGLIST
/*
* verify that the linked list is coherent
*/
static void
{
if (do_as_verify == 0)
return;
nsegs++;
}
}
#endif /* VERIFY_SEGLIST */
/*
* Add a new segment to the address space. The avl_find()
* may be expensive so we attempt to use last segment accessed
* in as_gap() as an insertion point.
*/
int
{
} else {
}
return (0);
}
}
/*
* If top of seg is below the requested address, then
* the insertion point is at the end of the linked list,
* and seg points to the tail of the list. Otherwise,
* the insertion point is immediately before seg.
*/
#ifdef __sparc
/*
* no-fault segs must disappear if overlaid.
* XXX need new segment type so
* we don't have to check s_ops
*/
goto again;
}
#endif
return (-1); /* overlapping segment */
}
}
}
#ifdef VERIFY_SEGLIST
#endif
return (0);
}
struct seg *
{
avl_tree_t *t;
return (NULL);
/*
* if this segment is at an address higher than
* a_lastgap, set a_lastgap to the next segment (NULL if last segment)
*/
/*
* remove the segment from the seg tree
*/
avl_remove(t, seg);
#ifdef VERIFY_SEGLIST
#endif
return (seg);
}
/*
* Find a segment containing addr.
*/
struct seg *
{
return (seg);
return (seg);
}
/*
* Serialize all searches for holes in an address space to
* prevent two or more threads from allocating the same virtual
* locked by the caller since we may block.
*/
void
{
while (AS_ISCLAIMGAP(as))
}
/*
* Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads.
*/
void
{
}
/*
* compar segments (or just an address) by segment address range
*/
static int
as_segcompar(const void *x, const void *y)
{
return (-1);
return (1);
return (0);
}
void
{
}
/*ARGSUSED*/
static int
{
as_avlinit(as);
return (0);
}
/*ARGSUSED1*/
static void
{
}
void
as_init(void)
{
}
/*
* Allocate and initialize an address space data structure.
* We call hat_alloc to allow any machine dependent
* information in the hat structure to be initialized.
*/
struct as *
as_alloc(void)
{
as->a_resvsize = 0;
as->a_updatedir = 0;
return (as);
}
/*
* Free an address space data structure.
* Need to free the hat first and then
* all the segments on this as and finally
* the space for the as struct itself.
*/
void
{
int called = 0;
top:
/*
* Invoke ALL callbacks. as_do_callbacks will do one callback
* per call, and not return (-1) until the callback has completed.
* When as_do_callbacks returns zero, all callbacks have completed.
*/
;
/* This will prevent new XHATs from attaching to as */
if (!called)
AS_SETBUSY(as);
if (!called) {
called = 1;
}
int err;
if (as->a_callbacks) {
} else if (!AS_ISNOUNMAPWAIT(as)) {
/*
* Memory is currently locked. Wait for a
* cv_signal that it has been unlocked, then
* try the operation again.
*/
if (AS_ISUNMAPWAIT(as) == 0)
while (AS_ISUNMAPWAIT(as))
} else {
/*
* We may have raced with
* segvn_reclaim()/segspt_reclaim(). In this
* case clean nounmapwait flag and retry since
* softlockcnt in this segment may be already
* 0. We don't drop as writer lock so our
* number of retries without sleeping should
* be very small. See segvn_reclaim() for
* more comments.
*/
goto retry;
}
goto top;
} else {
/*
* We do not expect any other error return at this
* time. This is similar to an ASSERT in seg_unmap()
*/
}
}
/* /proc stuff */
if (as->a_objectdir) {
}
/*
* Free the struct as back to kmem. Assert it has no segments.
*/
}
int
{
int error;
/* This will prevent new XHATs from attaching */
AS_SETBUSY(as);
continue;
}
AS_CLRBUSY(as);
return (-1);
}
/*
* We call seg_free() on the new seg
* because the segment is not set up
* completely; i.e. it has no ops.
*/
AS_CLRBUSY(as);
return (error);
}
}
AS_CLRBUSY(as);
if (error != 0) {
return (error);
}
return (0);
}
/*
* Handle a ``fault'' at addr for size bytes.
*/
{
faultcode_t res = 0;
int as_lock_held;
int is_xhat = 0;
int holding_wpage = 0;
extern struct seg_ops segdev_ops;
/* This must be an XHAT then */
is_xhat = 1;
return (FC_NOSUPPORT);
}
if (!is_xhat) {
/*
* Indicate that the lwp is not to be stopped while waiting
* for a pagefault. This is to avoid deadlock while debugging
* a process via /proc over NFS (in particular).
*/
lwp->lwp_nostop++;
/*
* same length must be used when we softlock and softunlock.
* We don't support softunlocking lengths less than
* the original length when there is largepage support.
* See seg_dev.c for more comments.
*/
switch (type) {
case F_SOFTLOCK:
break;
case F_SOFTUNLOCK:
break;
case F_PROT:
break;
case F_INVAL:
break;
}
}
/* Kernel probe */
/*
* XXX -- Don't grab the as lock for segkmap. We should grab it for
* correctness, but then we could be stuck holding this lock for
* a LONG time if the fault needs to be resolved on a slow
* filesystem, and then no-one will be able to exec new commands,
* as exec'ing requires the write lock on the as.
*/
/*
* if (as==&kas), this can't be XHAT: we've already returned
* FC_NOSUPPORT.
*/
as_lock_held = 0;
} else {
/*
* Grab and hold the writers' lock on the as
* if the fault is to a watched page.
* This will keep CPUs from "peeking" at the
* address range while we're temporarily boosting
* the permissions for the XHAT device to
* resolve the fault in the segment layer.
*
* We could check whether faulted address
* is within a watched page and only then grab
* the writer lock, but this is simpler.
*/
}
lwp->lwp_nostop--;
return (FC_NOMAP);
}
as_lock_held = 1;
}
break;
}
}
else
/*
* Handle watch pages. If we're faulting on a
* watched page from an X-hat, we have to
* restore the original permissions while we
* handle the fault.
*/
holding_wpage = 1;
}
/* Restore watchpoints */
if (holding_wpage) {
holding_wpage = 0;
}
if (res != 0)
break;
} else {
/* XHAT does not support seg_dev */
res = FC_NOSUPPORT;
break;
}
}
/*
* If we were SOFTLOCKing and encountered a failure,
* we must SOFTUNLOCK the range we already did. (Maybe we
* should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
* right here...)
*/
/*
* Now call the fault routine again to perform the
* unlock using S_OTHER instead of the rw variable
* since we never got a chance to touch the pages.
*/
else
}
}
if (as_lock_held)
lwp->lwp_nostop--;
/*
* If the lower levels returned EDEADLK for a fault,
* It means that we should retry the fault. Let's wait
* a bit also to let the deadlock causing condition clear.
* This is part of a gross hack to work around a design flaw
* logging code is re-designed to fix the problem. See bug
* 4125102 for details of the problem.
*/
res = 0;
goto retry;
}
return (res);
}
/*
* Asynchronous ``fault'' at addr for size bytes.
*/
{
faultcode_t res = 0;
/*
* Indicate that the lwp is not to be stopped while waiting
* for a pagefault. This is to avoid deadlock while debugging
* a process via /proc over NFS (in particular).
*/
lwp->lwp_nostop++;
lwp->lwp_nostop--;
return (FC_NOMAP);
}
break;
}
}
if (res != 0)
break;
}
lwp->lwp_nostop--;
/*
* If the lower levels returned EDEADLK for a fault,
* It means that we should retry the fault. Let's wait
* a bit also to let the deadlock causing condition clear.
* This is part of a gross hack to work around a design flaw
* logging code is re-designed to fix the problem. See bug
* 4125102 for details of the problem.
*/
res = 0;
goto retry;
}
return (res);
}
/*
* Set the virtual mapping for the interval from [addr : addr + size)
* in address space `as' to have the specified protection.
* It is ok for the range to cross over several segments,
* as long as they are contiguous.
*/
int
{
struct as_callback *cb;
return (ENOMEM);
/*
* Normally we only lock the as as a reader. But
* if due to setprot the segment driver needs to split
* a segment it will return IE_RETRY. Therefore we re-acquire
* the as lock as a writer so the segment driver can change
* the seg list. Also the segment driver will return IE_RETRY
* after it has changed the segment list so we therefore keep
* locking as a writer. Since these opeartions should be rare
* want to only lock as a writer when necessary.
*/
} else {
}
return (ENOMEM);
}
break;
}
}
else
break;
}
writer = 1;
goto setprot_top;
}
/*
* Make sure we have a_lock as writer.
*/
if (writer == 0) {
writer = 1;
goto setprot_top;
}
/*
* Memory is currently locked. It must be unlocked
* before this operation can succeed through a retry.
* The possible reasons for locked memory and
* corresponding strategies for unlocking are:
* (1) Normal I/O
* wait for a signal that the I/O operation
* has completed and the memory is unlocked.
* (2) Asynchronous I/O
* The aio subsystem does not unlock pages when
* the I/O is completed. Those pages are unlocked
* So, to prevent blocking forever, cv_broadcast()
* is done to wake up aio_cleanup_thread.
* Subsequently, segvn_reclaim will be called, and
* that will do AS_CLRUNMAPWAIT() and wake us up.
* (3) Long term page locking:
* Drivers intending to have pages locked for a
* period considerably longer than for normal I/O
* (essentially forever) may have registered for a
* callback so they may unlock these pages on
* request. This is needed to allow this operation
* to succeed. Each entry on the callback list is
* examined. If the event or address range pertains
* the callback is invoked (unless it already is in
* progress). The a_contents lock must be dropped
* before the callback, so only one callback can
* be done at a time. Go to the top and do more
* until zero is returned. If zero is returned,
* either there were no callbacks for this event
* or they were already in progress.
*/
if (as->a_callbacks &&
} else if (!AS_ISNOUNMAPWAIT(as)) {
if (AS_ISUNMAPWAIT(as) == 0)
while (AS_ISUNMAPWAIT(as))
} else {
/*
* We may have raced with
* segvn_reclaim()/segspt_reclaim(). In this
* case clean nounmapwait flag and retry since
* softlockcnt in this segment may be already
* 0. We don't drop as writer lock so our
* number of retries without sleeping should
* be very small. See segvn_reclaim() for
* more comments.
*/
goto retry;
}
goto setprot_top;
} else if (error != 0)
break;
}
if (error != 0) {
} else {
}
return (error);
}
/*
* Check to make sure that the interval [addr, addr + size)
* in address space `as' has at least the specified protection.
* It is ok for the range to cross over several segments, as long
* as they are contiguous.
*/
int
{
int error = 0;
return (ENOMEM);
/*
* This is ugly as sin...
* Normally, we only acquire the address space readers lock.
* However, if the address space has watchpoints present,
* we must acquire the writer lock on the address space for
* the benefit of as_clearwatchprot() and as_setwatchprot().
*/
else
return (ENOMEM);
}
break;
}
}
else
if (error != 0)
break;
}
return (error);
}
int
{
struct as_callback *cb;
int err;
top:
/*
* Use as_findseg to find the first segment in the range, then
* step through the segments in order, following s_next.
*/
break; /* eaddr was in a gap; all done */
/* this is implied by the test above */
else
/*
* Save next segment pointer since seg can be
* destroyed during the segment unmap operation.
*/
/*
* We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
* we have to do this check here while we have seg.)
*/
if (!SEG_IS_DEVNULL_MAPPING(seg) &&
/*
* Memory is currently locked. It must be unlocked
* before this operation can succeed through a retry.
* The possible reasons for locked memory and
* corresponding strategies for unlocking are:
* (1) Normal I/O
* wait for a signal that the I/O operation
* has completed and the memory is unlocked.
* (2) Asynchronous I/O
* The aio subsystem does not unlock pages when
* the I/O is completed. Those pages are unlocked
* So, to prevent blocking forever, cv_broadcast()
* is done to wake up aio_cleanup_thread.
* Subsequently, segvn_reclaim will be called, and
* that will do AS_CLRUNMAPWAIT() and wake us up.
* (3) Long term page locking:
* Drivers intending to have pages locked for a
* period considerably longer than for normal I/O
* (essentially forever) may have registered for a
* callback so they may unlock these pages on
* request. This is needed to allow this operation
* to succeed. Each entry on the callback list is
* examined. If the event or address range pertains
* the callback is invoked (unless it already is in
* progress). The a_contents lock must be dropped
* before the callback, so only one callback can
* be done at a time. Go to the top and do more
* until zero is returned. If zero is returned,
* either there were no callbacks for this event
* or they were already in progress.
*/
if (as->a_callbacks &&
} else if (!AS_ISNOUNMAPWAIT(as)) {
if (AS_ISUNMAPWAIT(as) == 0)
while (AS_ISUNMAPWAIT(as))
} else {
/*
* We may have raced with
* segvn_reclaim()/segspt_reclaim(). In this
* case clean nounmapwait flag and retry since
* softlockcnt in this segment may be already
* 0. We don't drop as writer lock so our
* number of retries without sleeping should
* be very small. See segvn_reclaim() for
* more comments.
*/
goto retry;
}
goto top;
goto top;
} else if (err) {
return (-1);
}
}
return (0);
}
static int
{
int error;
caddr_t a;
if (!do_off) {
}
if (szcvec <= 1) {
return (ENOMEM);
}
if (error != 0) {
} else {
/*
* We'll count MAP_NORESERVE mappings as we fault
* pages in.
*/
if (!SEG_IS_PARTIAL_RESV(seg))
}
return (error);
}
szcvec >>= 1;
szc = 0;
nszc = 0;
while (szcvec) {
if ((szcvec & 0x1) == 0) {
nszc++;
szcvec >>= 1;
continue;
}
nszc++;
if (a != addr) {
return (ENOMEM);
}
if (error != 0) {
return (error);
}
/*
* We'll count MAP_NORESERVE mappings as we fault
*/
if (!SEG_IS_DEVNULL_MAPPING(seg) &&
*segcreated = 1;
if (do_off) {
}
addr = a;
}
szcvec >>= 1;
}
while (szcvec) {
if (a != addr) {
return (ENOMEM);
}
if (error != 0) {
return (error);
}
/*
* We'll count MAP_NORESERVE mappings as we fault
*/
if (!SEG_IS_DEVNULL_MAPPING(seg) &&
*segcreated = 1;
if (do_off) {
}
addr = a;
}
if (szcvec) {
}
}
return (0);
}
static int
{
type, 0);
int error;
extern size_t textrepl_size_thresh;
if (szcvec <= 1) {
return (ENOMEM);
}
if (error != 0) {
} else {
/*
* We'll count MAP_NORESERVE mappings as we fault
* pages in.
*/
if (!SEG_IS_PARTIAL_RESV(seg))
}
return (error);
}
szcvec = 0;
goto again;
}
szcvec = 0;
goto again;
}
type, 0);
if (szcvec <= 1) {
goto again;
}
}
if (size > textrepl_size_thresh) {
}
if (error != 0) {
return (error);
}
if (save_size) {
szcvec = 0;
goto again;
}
return (0);
}
/*
* as_map_ansegs: shared or private anonymous memory. Note that the flags
* passed to map_pgszvec cannot be MAP_INITDATA, for anon.
*/
static int
{
type = MAPPGSZC_SHM;
} else {
}
}
}
int
{
}
int
void *argsp)
{
int error;
int unmap = 0;
struct segvn_crargs crargs;
/*
* check for wrap around
*/
return (ENOMEM);
}
return (ENOMEM);
}
if (error != 0) {
if (unmap) {
}
return (error);
}
if (error != 0) {
if (unmap) {
}
return (error);
}
} else {
return (ENOMEM);
}
if (error != 0) {
return (error);
}
/*
* Add size now so as_unmap will work if as_ctl fails.
*/
/*
* We'll count MAP_NORESERVE mappings as we fault
*/
if (!SEG_IS_DEVNULL_MAPPING(seg) &&
}
/*
* If the address space is locked,
* establish memory locks for the new segment.
*/
if (AS_ISPGLCK(as)) {
if (error != 0)
} else {
}
return (error);
}
/*
* Delete all segments in the address space marked with S_PURGE.
* This is currently used for Sparc V9 nofault ASI segments (seg_nf.c).
* These segments are deleted as a first step before calls to as_gap(), so
* that they don't affect mmap() or shmat().
*/
void
{
/*
* the setting of NEEDSPURGE is protect by as_rangelock(), so
* no need to grab a_contents mutex for this check
*/
return;
}
}
/*
* Find a hole within [*basep, *basep + *lenp), which contains a mappable
* range of addresses at least "minlen" long, where the base of the range is
* at "off" phase from an "align" boundary and there is space for a
* "redzone"-sized redzone on eithe rside of the range. Thus,
* if align was 4M and off was 16k, the user wants a hole which will start
* 16k into a 4M page.
*
* If flags specifies AH_HI, the hole will have the highest possible address
* in the range. We use the as->a_lastgap field to figure out where to
* start looking for a gap.
*
* Otherwise, the gap will have the lowest possible address.
*
* If flags specifies AH_CONTAIN, the hole will contain the address addr.
*
* If an adequate hole is found, *basep and *lenp are set to reflect the part of
* the hole that is within range, and 0 is returned. On failure, -1 is returned.
*
* NOTE: This routine is not correct when base+len overflows caddr_t.
*/
int
{
int forward;
int fast_path = 1;
/*
* minlen since if we get an allocation, we can guarantee that it
* will fit the alignment and redzone requested.
* This increases the chance that hibound will be adjusted to
* a_lastgap->s_base which will likely allow us to find an
* acceptable hole in the address space quicker.
* If we can't find a hole with this fast_path, then we look for
* smaller holes in which the alignment and offset may allow
* the allocation to fit.
*/
redzone = 0;
return (0);
} else {
return (-1);
}
}
/*
* Set up to iterate over all the inter-segment holes in the given
* direction. lseg is NULL for the lowest-addressed hole and hseg is
* NULL for the highest-addressed hole. If moving backwards, we reset
* sseg to denote the highest-addressed segment.
*/
if (forward) {
} else {
/*
* If allocating at least as much as the last allocation,
* use a_lastgap's base as a better estimate of hibound.
*/
} else {
}
}
for (;;) {
/*
* Set lo and hi to the hole's boundaries. (We should really
* use MAXADDR in place of hibound in the expression below,
* but can't express it easily; using hibound in its place is
* harmless.)
*/
/*
* If the iteration has moved past the interval from lobound
* to hibound it's pointless to continue.
*/
break;
goto cont;
/*
* Candidate hole lies at least partially within the allowable
* range. Restrict it to fall completely within that range,
* i.e., to [max(lo, lobound), min(hi, hibound)].
*/
/*
* Verify that the candidate hole is big enough and meets
* hardware constraints. If the hole is too small, no need
* to do the further checks since they will fail.
*/
((flags & AH_CONTAIN) == 0 ||
if (!forward)
else
return (0);
}
cont:
/*
* Move to the next hole.
*/
if (forward) {
break;
} else {
break;
}
}
fast_path = 0;
goto retry;
}
return (-1);
}
/*
* Find a hole of at least size minlen within [*basep, *basep + *lenp).
*
* If flags specifies AH_HI, the hole will have the highest possible address
* in the range. We use the as->a_lastgap field to figure out where to
* start looking for a gap.
*
* Otherwise, the gap will have the lowest possible address.
*
* If flags specifies AH_CONTAIN, the hole will contain the address addr.
*
* If an adequate hole is found, base and len are set to reflect the part of
* the hole that is within range, and 0 is returned, otherwise,
* -1 is returned.
*
* NOTE: This routine is not correct when base+len overflows caddr_t.
*/
int
{
}
/*
* Return the next range within [base, base + len) that is backed
* with "real memory". Skip holes and non-seg_vn segments.
* We're lazy and only return one segment at a time.
*/
int
{
for (;;) {
return (EINVAL);
}
break;
}
/*
* We do ISM by looking into the private data
* to determine the real size of the segment.
*/
break;
}
}
else
return (0);
}
/*
* Swap the pages associated with the address space as out to
* secondary storage, returning the number of bytes actually
* swapped.
*
* The value returned is intended to correlate well with the process's
* memory requirements. Its usefulness for this purpose depends on
* how well the segment-level routines do at returning accurate
* information.
*/
{
/*
* Kernel-only processes have given up their address
* spaces. Of course, we shouldn't be attempting to
* swap out such processes in the first place...
*/
return (0);
/* Prevent XHATs from attaching */
AS_SETBUSY(as);
/*
* Free all mapping resources associated with the address
* space. The segment-level swapout routines capitalize
* on this unmapping by scavanging pages that have become
* unmapped here.
*/
AS_CLRBUSY(as);
/*
* Call the swapout routines of all segments in the address
* space to do the actual work, accumulating the amount of
* space reclaimed.
*/
/*
* We have to check to see if the seg has
* an ops vector because the seg may have
* been in the middle of being set up when
* the process was picked for swapout.
*/
}
return (swpcnt);
}
/*
* Determine whether data from the mappings in interval [addr, addr + size)
* are in the primary memory (core) cache.
*/
int
{
int error = 0; /* result, assume success */
*sizep = 0;
return (ENOMEM);
return (-1);
}
error = -1;
break;
}
}
else
error = -1;
break;
}
}
return (error);
}
static void
{
}
}
static void
{
while (rsize != 0) {
else
}
}
/*
* Cache control operations over the interval [addr, addr + size) in
* address space "as".
*/
/*ARGSUSED*/
int
{
int error = 0; /* result */
/* to represent the locked */
/* pages. */
else
/*
* all segments in the address space, as appropriate.
*/
if (arg & MCL_FUTURE) {
}
if ((arg & MCL_CURRENT) == 0) {
return (0);
}
return (0);
}
do {
return (EAGAIN);
}
if (error != 0)
break;
}
if (error) {
}
}
goto lockerr;
} else if (func == MC_UNLOCKAS) {
if (error != 0)
break;
}
goto lockerr;
}
/*
* Normalize addresses and sizes.
*/
return (ENOMEM);
}
/*
* Get initial segment.
*/
return (ENOMEM);
}
return (EAGAIN);
}
}
/*
* Loop over all segments. If a hole in the address range is
* discovered, then fail. For each segment, perform the appropriate
* control operation.
*/
while (rsize != 0) {
/*
* Make sure there's no hole, calculate the portion
* of the next segment to be operated over.
*/
mlock_size * sizeof (ulong_t));
}
return (ENOMEM);
}
}
else
/*
* Dispatch on specific function.
*/
switch (func) {
/*
* Synchronize cached data from mappings with backing
* objects.
*/
case MC_SYNC:
return (error);
}
break;
/*
* Lock pages in memory.
*/
case MC_LOCK:
sizeof (ulong_t));
goto lockerr;
}
break;
/*
* Unlock mapped pages.
*/
case MC_UNLOCK:
break;
/*
* Store VM advise for mapped pages in segment layer.
*/
case MC_ADVISE:
/*
* Check for regular errors and special retry error
*/
if (error) {
/*
* Need to acquire writers lock, so
* have to drop readers lock and start
* all over again
*/
goto retry;
} else if (error == IE_REATTACH) {
/*
* Find segment for current address
* because current segment just got
* split or concatenated
*/
return (ENOMEM);
}
} else {
/*
* Regular error
*/
return (error);
}
}
break;
/*
* Can't happen.
*/
default:
/*NOTREACHED*/
}
}
return (0);
/*
* If the lower levels returned EDEADLK for a segment lockop,
* it means that we should retry the operation. Let's wait
* a bit also to let the deadlock causing condition clear.
* This is part of a gross hack to work around a design flaw
* logging code is re-designed to fix the problem. See bug
* 4125102 for details of the problem.
*/
error = 0;
goto retry;
}
return (error);
}
int
{
int error = 0;
case FC_OBJERR:
break;
case FC_PROT:
break;
default:
break;
}
return (error);
}
/*
* Pagelock pages from a range that spans more than 1 segment. Obtain shadow
* lists from each segment and copy them to one contiguous shadow list (plist)
* as expected by the caller. Save pointers to per segment shadow lists at
* the tail of plist so that they can be used during as_pageunlock().
*/
static int
{
int error;
faultcode_t fault_err = 0;
extern struct seg_ops segspt_shmops;
/*
* Count the number of segments covered by the range we are about to
* lock. The segment count is used to size the shadow list we return
* back to the caller.
*/
return (EFAULT);
}
/*
* Do a quick check if subsequent segments
* will most likely support pagelock.
*/
goto slow;
}
goto slow;
}
segcnt++;
}
} else {
}
}
cnt++;
}
} else {
}
L_PAGELOCK, rw);
if (error) {
break;
}
}
if (size == 0) {
return (0);
}
/*
* one of pagelock calls failed. The error type is in error variable.
* Unlock what we've locked so far and retry with F_SOFTLOCK if error
* type is either EFAULT or ENOTSUP. Otherwise just return the error
* back to the caller.
*/
cnt++;
}
} else {
}
L_PAGEUNLOCK, rw);
}
return (error);
}
slow:
/*
* If we are here because pagelock failed due to the need to cow fault
* in the pages we want to lock F_SOFTLOCK will do this job and in
* next as_pagelock() call for this address range pagelock will
* hopefully succeed.
*/
if (fault_err != 0) {
}
return (0);
}
/*
* lock pages in a given address space. Return shadow list. If
* the list is NULL, the MMU mapping is also locked.
*/
int
{
int err;
/*
* if the request crosses two segments let
* as_fault handle it.
*/
return (EFAULT);
}
}
return (EFAULT);
}
/*
* try to lock pages and pass back shadow list
*/
return (err);
}
/*
* Use F_SOFTLOCK to lock the pages because pagelock failed either due
* to no pagelock support for this segment or pages need to be cow
* faulted in. If fault is needed F_SOFTLOCK will do this job for
* this as_pagelock() call and in the next as_pagelock() call for the
* same address range pagelock call will hopefull succeed.
*/
if (fault_err != 0) {
}
return (0);
}
/*
* unlock pages locked by as_pagelock_segs(). Retrieve per segment shadow
* lists from the end of plist and call pageunlock interface for each segment.
* Drop as lock and free plist.
*/
static void
{
cnt++;
}
} else {
}
L_PAGEUNLOCK, rw);
}
cnt++;
}
/*
* unlock pages in a given address range
*/
void
{
/*
* if the shadow list is NULL, as_pagelock was
* falling back to as_fault
*/
return;
}
} else {
return;
}
}
int
{
int error = 0;
return (EINVAL);
}
return (ENOMEM);
return (ENOMEM);
}
break;
}
}
} else {
}
break;
}
goto setpgsz_top;
}
break;
}
/*
* Memory is currently locked. It must be unlocked
* before this operation can succeed through a retry.
* The possible reasons for locked memory and
* corresponding strategies for unlocking are:
* (1) Normal I/O
* wait for a signal that the I/O operation
* has completed and the memory is unlocked.
* (2) Asynchronous I/O
* The aio subsystem does not unlock pages when
* the I/O is completed. Those pages are unlocked
* So, to prevent blocking forever, cv_broadcast()
* is done to wake up aio_cleanup_thread.
* Subsequently, segvn_reclaim will be called, and
* that will do AS_CLRUNMAPWAIT() and wake us up.
* (3) Long term page locking:
* This is not relevant for as_setpagesize()
* because we cannot change the page size for
* driver memory. The attempt to do so will
* fail with a different error than EAGAIN so
* there's no need to trigger as callbacks like
* as_unmap, as_setprot or as_free would do.
*/
if (!AS_ISNOUNMAPWAIT(as)) {
if (AS_ISUNMAPWAIT(as) == 0) {
}
while (AS_ISUNMAPWAIT(as)) {
}
} else {
/*
* We may have raced with
* segvn_reclaim()/segspt_reclaim(). In this
* case clean nounmapwait flag and retry since
* softlockcnt in this segment may be already
* 0. We don't drop as writer lock so our
* number of retries without sleeping should
* be very small. See segvn_reclaim() for
* more comments.
*/
goto retry;
}
goto setpgsz_top;
} else if (error != 0) {
break;
}
}
return (error);
}
/*
* as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments
* in its chunk where s_szc is less than the szc we want to set.
*/
static int
int *retry)
{
int error;
panic("as_iset3_default_lpsize: no seg");
}
panic("as_iset3_default_lpsize: as changed");
}
}
} else {
}
/* Only retry on EINVAL segments that have no vnode. */
*retry = 1;
} else {
*retry = 0;
}
}
if (error) {
return (error);
}
}
}
return (0);
}
/*
* as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
* pagesize on each segment in its range, but if any fails with EINVAL,
* then it reduces the pagesizes to the next size in the bitmap and
* retries as_iset3_default_lpsize(). The reason why the code retries
* smaller allowed sizes on EINVAL is because (a) the anon offset may not
* match the bigger sizes, and (b) it's hard to get this offset (to begin
* with) to pass to map_pgszcvec().
*/
static int
{
int error;
int retry;
for (;;) {
if (szcvec <= 1) {
return (EINVAL);
}
} else {
return (error);
}
}
}
/*
* as_iset1_default_lpsize() breaks its chunk into areas where existing
* segments have a smaller szc than we want to set. For each such area,
* it calls as_iset2_default_lpsize()
*/
static int
{
int set;
int error;
panic("as_iset1_default_lpsize: no seg");
}
set = 1;
} else {
set = 0;
}
panic("as_iset1_default_lpsize: as changed");
}
if (error) {
return (error);
}
set = 0;
setsize = 0;
set = 1;
}
}
} else {
}
}
error = 0;
if (set) {
}
return (error);
}
/*
* as_iset_default_lpsize() breaks its chunk according to the size code bitmap
* returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each
* chunk to as_iset1_default_lpsize().
*/
static int
int type)
{
int error;
caddr_t a;
szcvec &= ~1;
return (0);
}
/* Get the pagesize of the first larger page size. */
while (szcvec) {
if ((szcvec & 0x1) == 0) {
nszc++;
szcvec >>= 1;
continue;
}
nszc++;
if (a != addr) {
if (error) {
return (error);
}
addr = a;
}
szcvec >>= 1;
}
while (szcvec) {
if (a != addr) {
if (error) {
return (error);
}
addr = a;
}
if (szcvec) {
}
}
return (0);
}
/*
* Set the default large page size for the range. Called via memcntl with
* page size set to 0. as_set_default_lpsize breaks the range down into
* each chunk to as_iset_default_lpsize().
*/
int
{
int error;
int segvn;
if (size == 0)
return (0);
error = 0;
return (ENOMEM);
}
return (ENOMEM);
}
segvn = 1;
} else {
segvn = 0;
}
setsize = 0;
break;
}
/*
* The next segment is also segvn but
*/
if (error) {
break;
}
setsize = 0;
} else if (!segvn) {
setsize = 0;
segvn = 1;
}
} else if (segvn) {
/* The next segment is not segvn. */
if (error) {
break;
}
segvn = 0;
}
}
} else {
}
}
/* The last chunk when rsize == 0. */
}
goto again;
if (!AS_ISNOUNMAPWAIT(as)) {
if (AS_ISUNMAPWAIT(as) == 0) {
}
while (AS_ISUNMAPWAIT(as)) {
}
} else {
/*
* We may have raced with
* segvn_reclaim()/segspt_reclaim(). In this case
* clean nounmapwait flag and retry since softlockcnt
* in this segment may be already 0. We don't drop as
* writer lock so our number of retries without
* sleeping should be very small. See segvn_reclaim()
* for more comments.
*/
}
goto again;
}
return (error);
}
/*
* Setup all of the uninitialized watched pages that we can.
*/
void
{
struct watched_page *pwp;
return;
retrycnt = 0;
continue;
prot &= ~PROT_WRITE;
retrycnt++;
goto retry;
}
}
}
}
/*
* Clear all of the watched pages in the address space.
*/
void
{
struct watched_page *pwp;
return;
retrycnt = 0;
continue;
retrycnt++;
goto retry;
}
}
}
}
/*
* Force a new setup for all the watched pages in the range.
*/
static void
{
struct watched_page *pwp;
struct watched_page tpw;
return;
retrycnt = 0;
wprot &= ~PROT_WRITE;
panic("as_setwatchprot: no seg");
/*NOTREACHED*/
}
retrycnt++;
goto retry;
}
}
}
}
/*
* Clear all of the watched pages in the range.
*/
static void
{
struct watched_page *pwp;
struct watched_page tpw;
return;
retrycnt = 0;
continue;
retrycnt++;
goto retry;
}
}
}
}
}
void
{
struct proc *p;
mutex_enter(&p->p_lock);
mutex_exit(&p->p_lock);
}
}
}
/*
* return memory object ID
*/
int
{
int sts;
return (EFAULT);
}
/*
* catch old drivers which may not support getmemid
*/
return (ENODEV);
}
return (sts);
}