vm_seg.c revision 0209230bf1261579beab4f55226bb509e6b850cb
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* VM - segment management.
*/
#include <sys/types.h>
#include <sys/inttypes.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/vmsystm.h>
#include <sys/debug.h>
#include <sys/cmn_err.h>
#include <sys/callb.h>
#include <sys/mem_config.h>
#include <sys/mman.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_kmem.h>
#include <vm/seg_spt.h>
#include <vm/seg_vn.h>
/*
* kstats for segment advise
*/
segadvstat_t segadvstat = {
{ "MADV_FREE_hit", KSTAT_DATA_ULONG },
{ "MADV_FREE_miss", KSTAT_DATA_ULONG },
};
kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat;
uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t);
/* #define PDEBUG */
#if defined(PDEBUG) || defined(lint) || defined(__lint)
int pdebug = 0;
#else
#define pdebug 0
#endif /* PDEBUG */
#define PPRINTF if (pdebug) printf
#define PPRINT(x) PPRINTF(x)
#define PPRINT1(x, a) PPRINTF(x, a)
#define PPRINT2(x, a, b) PPRINTF(x, a, b)
#define PPRINT3(x, a, b, c) PPRINTF(x, a, b, c)
#define PPRINT4(x, a, b, c, d) PPRINTF(x, a, b, c, d)
#define PPRINT5(x, a, b, c, d, e) PPRINTF(x, a, b, c, d, e)
#define P_HASHMASK (p_hashsize - 1)
#define P_BASESHIFT 6
/*
* entry in the segment page cache
*/
struct seg_pcache {
struct seg_pcache *p_hnext; /* list for hashed blocks */
struct seg_pcache *p_hprev;
int p_active; /* active count */
int p_ref; /* ref bit */
size_t p_len; /* segment length */
caddr_t p_addr; /* base address */
struct seg *p_seg; /* segment */
struct page **p_pp; /* pp shadow list */
enum seg_rw p_rw; /* rw */
uint_t p_flags; /* bit flags */
int (*p_callback)(struct seg *, caddr_t, size_t,
struct page **, enum seg_rw);
};
struct seg_phash {
struct seg_pcache *p_hnext; /* list for hashed blocks */
struct seg_pcache *p_hprev;
int p_qlen; /* Q length */
kmutex_t p_hmutex; /* protects hash bucket */
};
static int seg_preap_time = 20; /* reclaim every 20 secs */
static int seg_pmaxqlen = 5; /* max Q length in hash list */
static int seg_ppcount = 5; /* max # of purges per reclaim interval */
static int seg_plazy = 1; /* if 1, pages are cached after pageunlock */
static pgcnt_t seg_pwindow; /* max # of pages that can be cached */
static pgcnt_t seg_plocked; /* # of pages which are cached by pagelock */
static pgcnt_t seg_plocked_window; /* # pages from window */
int seg_preapahead;
static uint_t seg_pdisable = 0; /* if not 0, caching temporarily disabled */
static int seg_pupdate_active = 1; /* background reclaim thread */
static clock_t seg_preap_interval; /* reap interval in ticks */
static kmutex_t seg_pcache; /* protects the whole pagelock cache */
static kmutex_t seg_pmem; /* protects window counter */
static ksema_t seg_psaync_sem; /* sema for reclaim thread */
static struct seg_phash *p_hashtab;
static int p_hashsize = 0;
#define p_hash(seg) \
(P_HASHMASK & \
((uintptr_t)(seg) >> P_BASESHIFT))
#define p_match(pcp, seg, addr, len, rw) \
(((pcp)->p_seg == (seg) && \
(pcp)->p_addr == (addr) && \
(pcp)->p_rw == (rw) && \
(pcp)->p_len == (len)) ? 1 : 0)
#define p_match_pp(pcp, seg, addr, len, pp, rw) \
(((pcp)->p_seg == (seg) && \
(pcp)->p_addr == (addr) && \
(pcp)->p_pp == (pp) && \
(pcp)->p_rw == (rw) && \
(pcp)->p_len == (len)) ? 1 : 0)
/*
* lookup an address range in pagelock cache. Return shadow list
* and bump up active count.
*/
struct page **
seg_plookup(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
{
struct seg_pcache *pcp;
struct seg_phash *hp;
/*
* Skip pagelock cache, while DR is in progress or
* seg_pcache is off.
*/
if (seg_pdisable || seg_plazy == 0) {
return (NULL);
}
hp = &p_hashtab[p_hash(seg)];
mutex_enter(&hp->p_hmutex);
for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
pcp = pcp->p_hnext) {
if (p_match(pcp, seg, addr, len, rw)) {
pcp->p_active++;
mutex_exit(&hp->p_hmutex);
PPRINT5("seg_plookup hit: seg %p, addr %p, "
"len %lx, count %d, pplist %p \n",
(void *)seg, (void *)addr, len, pcp->p_active,
(void *)pcp->p_pp);
return (pcp->p_pp);
}
}
mutex_exit(&hp->p_hmutex);
PPRINT("seg_plookup miss:\n");
return (NULL);
}
/*
* mark address range inactive. If the cache is off or the address
* range is not in the cache we call the segment driver to reclaim
* the pages. Otherwise just decrement active count and set ref bit.
*/
void
seg_pinactive(struct seg *seg, caddr_t addr, size_t len, struct page **pp,
enum seg_rw rw, int (*callback)(struct seg *, caddr_t, size_t,
struct page **, enum seg_rw))
{
struct seg_pcache *pcp;
struct seg_phash *hp;
if (seg_plazy == 0) {
(void) (*callback)(seg, addr, len, pp, rw);
return;
}
hp = &p_hashtab[p_hash(seg)];
mutex_enter(&hp->p_hmutex);
for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
pcp = pcp->p_hnext) {
if (p_match_pp(pcp, seg, addr, len, pp, rw)) {
pcp->p_active--;
ASSERT(pcp->p_active >= 0);
if (pcp->p_active == 0 && seg_pdisable) {
int npages;
ASSERT(callback == pcp->p_callback);
/* free the entry */
hp->p_qlen--;
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
mutex_exit(&hp->p_hmutex);
npages = pcp->p_len >> PAGESHIFT;
mutex_enter(&seg_pmem);
seg_plocked -= npages;
if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
seg_plocked_window -= npages;
}
mutex_exit(&seg_pmem);
kmem_free(pcp, sizeof (struct seg_pcache));
goto out;
}
pcp->p_ref = 1;
mutex_exit(&hp->p_hmutex);
return;
}
}
mutex_exit(&hp->p_hmutex);
out:
(void) (*callback)(seg, addr, len, pp, rw);
}
/*
* The seg_pinsert_check() is used by segment drivers to predict whether
* a call to seg_pinsert will fail and thereby avoid wasteful pre-processing.
*/
int
seg_pinsert_check(struct seg *seg, size_t len, uint_t flags)
{
struct seg_phash *hp;
if (seg_plazy == 0) {
return (SEGP_FAIL);
}
if (seg_pdisable != 0) {
return (SEGP_FAIL);
}
ASSERT((len & PAGEOFFSET) == 0);
hp = &p_hashtab[p_hash(seg)];
if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) {
return (SEGP_FAIL);
}
/*
* If the SEGP_FORCE_WIRED flag is set,
* we skip the check for seg_pwindow.
*/
if ((flags & SEGP_FORCE_WIRED) == 0) {
pgcnt_t npages;
npages = len >> PAGESHIFT;
if ((seg_plocked_window + npages) > seg_pwindow) {
return (SEGP_FAIL);
}
}
return (SEGP_SUCCESS);
}
/*
* insert address range with shadow list into pagelock cache. If
* the cache is off or caching is temporarily disabled or the allowed
* 'window' is exceeded - return SEGP_FAIL. Otherwise return
* SEGP_SUCCESS.
*/
int
seg_pinsert(struct seg *seg, caddr_t addr, size_t len, struct page **pp,
enum seg_rw rw, uint_t flags, int (*callback)(struct seg *, caddr_t,
size_t, struct page **, enum seg_rw))
{
struct seg_pcache *pcp;
struct seg_phash *hp;
pgcnt_t npages;
if (seg_plazy == 0) {
return (SEGP_FAIL);
}
if (seg_pdisable != 0) {
return (SEGP_FAIL);
}
ASSERT((len & PAGEOFFSET) == 0);
hp = &p_hashtab[p_hash(seg)];
if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) {
return (SEGP_FAIL);
}
npages = len >> PAGESHIFT;
mutex_enter(&seg_pmem);
/*
* If the SEGP_FORCE_WIRED flag is set,
* we skip the check for seg_pwindow.
*/
if ((flags & SEGP_FORCE_WIRED) == 0) {
seg_plocked_window += npages;
if (seg_plocked_window > seg_pwindow) {
seg_plocked_window -= npages;
mutex_exit(&seg_pmem);
return (SEGP_FAIL);
}
}
seg_plocked += npages;
mutex_exit(&seg_pmem);
pcp = kmem_alloc(sizeof (struct seg_pcache), KM_SLEEP);
pcp->p_seg = seg;
pcp->p_addr = addr;
pcp->p_len = len;
pcp->p_pp = pp;
pcp->p_rw = rw;
pcp->p_callback = callback;
pcp->p_active = 1;
pcp->p_flags = flags;
PPRINT4("seg_pinsert: seg %p, addr %p, len %lx, pplist %p\n",
(void *)seg, (void *)addr, len, (void *)pp);
hp = &p_hashtab[p_hash(seg)];
mutex_enter(&hp->p_hmutex);
hp->p_qlen++;
pcp->p_hnext = hp->p_hnext;
pcp->p_hprev = (struct seg_pcache *)hp;
hp->p_hnext->p_hprev = pcp;
hp->p_hnext = pcp;
mutex_exit(&hp->p_hmutex);
return (SEGP_SUCCESS);
}
/*
* purge all entries from the pagelock cache if not active
* and not recently used. Drop all locks and call through
* the address space into the segment driver to reclaim
* the pages. This makes sure we get the address space
* and segment driver locking right.
*/
static void
seg_ppurge_all(int force)
{
struct seg_pcache *delcallb_list = NULL;
struct seg_pcache *pcp;
struct seg_phash *hp;
int purge_count = 0;
pgcnt_t npages = 0;
pgcnt_t npages_window = 0;
/*
* if the cache if off or empty, return
*/
if (seg_plazy == 0 || seg_plocked == 0) {
return;
}
for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) {
mutex_enter(&hp->p_hmutex);
pcp = hp->p_hnext;
/*
* While 'force' is set, seg_pasync_thread is not
* throttled. This is to speedup flushing of seg_pcache
* in preparation for DR.
*
* In normal case, when 'force' is not set, we throttle
* seg_pasync_thread so that we don't spend all the time
* time in purging the cache.
*/
while ((pcp != (struct seg_pcache *)hp) &&
(force || (purge_count <= seg_ppcount))) {
/*
* purge entries which are not active and
* have not been used recently and
* have the SEGP_ASYNC_FLUSH flag.
*
* In the 'force' case, we ignore the
* SEGP_ASYNC_FLUSH flag.
*/
if (!(pcp->p_flags & SEGP_ASYNC_FLUSH))
pcp->p_ref = 1;
if (force)
pcp->p_ref = 0;
if (!pcp->p_ref && !pcp->p_active) {
struct as *as = pcp->p_seg->s_as;
/*
* try to get the readers lock on the address
* space before taking out the cache element.
* This ensures as_pagereclaim() can actually
* call through the address space and free
* the pages. If we don't get the lock, just
* skip this entry. The pages will be reclaimed
* by the segment driver at unmap time.
*/
if (AS_LOCK_TRYENTER(as, &as->a_lock,
RW_READER)) {
hp->p_qlen--;
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
pcp->p_hprev = delcallb_list;
delcallb_list = pcp;
purge_count++;
}
} else {
pcp->p_ref = 0;
}
pcp = pcp->p_hnext;
}
mutex_exit(&hp->p_hmutex);
if (!force && purge_count > seg_ppcount)
break;
}
/*
* run the delayed callback list. We don't want to hold the
* cache lock during a call through the address space.
*/
while (delcallb_list != NULL) {
struct as *as;
pcp = delcallb_list;
delcallb_list = pcp->p_hprev;
as = pcp->p_seg->s_as;
PPRINT4("seg_ppurge_all: purge seg %p, addr %p, len %lx, "
"pplist %p\n", (void *)pcp->p_seg, (void *)pcp->p_addr,
pcp->p_len, (void *)pcp->p_pp);
as_pagereclaim(as, pcp->p_pp, pcp->p_addr,
pcp->p_len, pcp->p_rw);
AS_LOCK_EXIT(as, &as->a_lock);
npages += pcp->p_len >> PAGESHIFT;
if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
npages_window += pcp->p_len >> PAGESHIFT;
}
kmem_free(pcp, sizeof (struct seg_pcache));
}
mutex_enter(&seg_pmem);
seg_plocked -= npages;
seg_plocked_window -= npages_window;
mutex_exit(&seg_pmem);
}
/*
* Remove cached pages for segment(s) entries from hashtable.
* The segments are identified by a given clients callback
* function.
* This is useful for multiple seg's cached on behalf of
* dummy segment (ISM/DISM) with common callback function.
* The clients callback function may return status indicating
* that the last seg's entry has been purged. In such a case
* the seg_ppurge_seg() stops searching hashtable and exits.
* Otherwise all hashtable entries are scanned.
*/
void
seg_ppurge_seg(int (*callback)(struct seg *, caddr_t, size_t,
struct page **, enum seg_rw))
{
struct seg_pcache *pcp, *npcp;
struct seg_phash *hp;
pgcnt_t npages = 0;
pgcnt_t npages_window = 0;
int done = 0;
/*
* if the cache if off or empty, return
*/
if (seg_plazy == 0 || seg_plocked == 0) {
return;
}
mutex_enter(&seg_pcache);
seg_pdisable++;
mutex_exit(&seg_pcache);
for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) {
mutex_enter(&hp->p_hmutex);
pcp = hp->p_hnext;
while (pcp != (struct seg_pcache *)hp) {
/*
* purge entries which are not active
*/
npcp = pcp->p_hnext;
if (!pcp->p_active && pcp->p_callback == callback) {
hp->p_qlen--;
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
if ((*pcp->p_callback)(pcp->p_seg, pcp->p_addr,
pcp->p_len, pcp->p_pp, pcp->p_rw)) {
done = 1;
}
npages += pcp->p_len >> PAGESHIFT;
if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
npages_window +=
pcp->p_len >> PAGESHIFT;
}
kmem_free(pcp, sizeof (struct seg_pcache));
}
pcp = npcp;
if (done)
break;
}
mutex_exit(&hp->p_hmutex);
if (done)
break;
}
mutex_enter(&seg_pcache);
seg_pdisable--;
mutex_exit(&seg_pcache);
mutex_enter(&seg_pmem);
seg_plocked -= npages;
seg_plocked_window -= npages_window;
mutex_exit(&seg_pmem);
}
/*
* purge all entries for a given segment. Since we
* callback into the segment driver directly for page
* reclaim the caller needs to hold the right locks.
*/
void
seg_ppurge(struct seg *seg)
{
struct seg_pcache *delcallb_list = NULL;
struct seg_pcache *pcp;
struct seg_phash *hp;
pgcnt_t npages = 0;
pgcnt_t npages_window = 0;
if (seg_plazy == 0) {
return;
}
hp = &p_hashtab[p_hash(seg)];
mutex_enter(&hp->p_hmutex);
pcp = hp->p_hnext;
while (pcp != (struct seg_pcache *)hp) {
if (pcp->p_seg == seg) {
if (pcp->p_active) {
break;
}
hp->p_qlen--;
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
pcp->p_hprev = delcallb_list;
delcallb_list = pcp;
}
pcp = pcp->p_hnext;
}
mutex_exit(&hp->p_hmutex);
while (delcallb_list != NULL) {
pcp = delcallb_list;
delcallb_list = pcp->p_hprev;
PPRINT4("seg_ppurge: purge seg %p, addr %p, len %lx, "
"pplist %p\n", (void *)seg, (void *)pcp->p_addr,
pcp->p_len, (void *)pcp->p_pp);
ASSERT(seg == pcp->p_seg);
(void) (*pcp->p_callback)(seg, pcp->p_addr,
pcp->p_len, pcp->p_pp, pcp->p_rw);
npages += pcp->p_len >> PAGESHIFT;
if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
npages_window += pcp->p_len >> PAGESHIFT;
}
kmem_free(pcp, sizeof (struct seg_pcache));
}
mutex_enter(&seg_pmem);
seg_plocked -= npages;
seg_plocked_window -= npages_window;
mutex_exit(&seg_pmem);
}
static void seg_pinit_mem_config(void);
/*
* setup the pagelock cache
*/
static void
seg_pinit(void)
{
struct seg_phash *hp;
int i;
uint_t physmegs;
sema_init(&seg_psaync_sem, 0, NULL, SEMA_DEFAULT, NULL);
mutex_enter(&seg_pcache);
if (p_hashtab == NULL) {
physmegs = physmem >> (20 - PAGESHIFT);
/* If p_hashsize was not set in /etc/system ... */
if (p_hashsize == 0) {
/*
* Choose p_hashsize based on physmem.
*/
if (physmegs < 64) {
p_hashsize = 64;
} else if (physmegs < 1024) {
p_hashsize = 1024;
} else if (physmegs < 10 * 1024) {
p_hashsize = 8192;
} else if (physmegs < 20 * 1024) {
p_hashsize = 2 * 8192;
seg_pmaxqlen = 16;
} else {
p_hashsize = 128 * 1024;
seg_pmaxqlen = 128;
}
}
p_hashtab = kmem_zalloc(
p_hashsize * sizeof (struct seg_phash), KM_SLEEP);
for (i = 0; i < p_hashsize; i++) {
hp = (struct seg_phash *)&p_hashtab[i];
hp->p_hnext = (struct seg_pcache *)hp;
hp->p_hprev = (struct seg_pcache *)hp;
mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
}
if (seg_pwindow == 0) {
if (physmegs < 24) {
/* don't use cache */
seg_plazy = 0;
} else if (physmegs < 64) {
seg_pwindow = physmem >> 5; /* 3% of memory */
} else if (physmegs < 10 * 1024) {
seg_pwindow = physmem >> 3; /* 12% of memory */
} else {
seg_pwindow = physmem >> 1;
}
}
}
mutex_exit(&seg_pcache);
seg_pinit_mem_config();
}
/*
* called by pageout if memory is low
*/
void
seg_preap(void)
{
/*
* if the cache if off or empty, return
*/
if (seg_plocked == 0 || seg_plazy == 0) {
return;
}
sema_v(&seg_psaync_sem);
}
static void seg_pupdate(void *);
/*
* run as a backgroud thread and reclaim pagelock
* pages which have not been used recently
*/
void
seg_pasync_thread(void)
{
callb_cpr_t cpr_info;
kmutex_t pasync_lock; /* just for CPR stuff */
mutex_init(&pasync_lock, NULL, MUTEX_DEFAULT, NULL);
CALLB_CPR_INIT(&cpr_info, &pasync_lock,
callb_generic_cpr, "seg_pasync");
if (seg_preap_interval == 0) {
seg_preap_interval = seg_preap_time * hz;
} else {
seg_preap_interval *= hz;
}
if (seg_plazy && seg_pupdate_active) {
(void) timeout(seg_pupdate, NULL, seg_preap_interval);
}
for (;;) {
mutex_enter(&pasync_lock);
CALLB_CPR_SAFE_BEGIN(&cpr_info);
mutex_exit(&pasync_lock);
sema_p(&seg_psaync_sem);
mutex_enter(&pasync_lock);
CALLB_CPR_SAFE_END(&cpr_info, &pasync_lock);
mutex_exit(&pasync_lock);
seg_ppurge_all(0);
}
}
static void
seg_pupdate(void *dummy)
{
sema_v(&seg_psaync_sem);
if (seg_plazy && seg_pupdate_active) {
(void) timeout(seg_pupdate, dummy, seg_preap_interval);
}
}
static struct kmem_cache *seg_cache;
/*
* Initialize segment management data structures.
*/
void
seg_init(void)
{
kstat_t *ksp;
seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg),
0, NULL, NULL, NULL, NULL, NULL, 0);
ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED,
segadvstat_ndata, KSTAT_FLAG_VIRTUAL);
if (ksp) {
ksp->ks_data = (void *)segadvstat_ptr;
kstat_install(ksp);
}
seg_pinit();
}
/*
* Allocate a segment to cover [base, base+size]
* and attach it to the specified address space.
*/
struct seg *
seg_alloc(struct as *as, caddr_t base, size_t size)
{
struct seg *new;
caddr_t segbase;
size_t segsize;
segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) -
(uintptr_t)segbase;
if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
return ((struct seg *)NULL); /* bad virtual addr range */
if (as != &kas &&
valid_usr_range(segbase, segsize, 0, as,
as->a_userlimit) != RANGE_OKAY)
return ((struct seg *)NULL); /* bad virtual addr range */
new = kmem_cache_alloc(seg_cache, KM_SLEEP);
new->s_ops = NULL;
new->s_data = NULL;
new->s_szc = 0;
new->s_flags = 0;
if (seg_attach(as, segbase, segsize, new) < 0) {
kmem_cache_free(seg_cache, new);
return ((struct seg *)NULL);
}
/* caller must fill in ops, data */
return (new);
}
/*
* Attach a segment to the address space. Used by seg_alloc()
* and for kernel startup to attach to static segments.
*/
int
seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg)
{
seg->s_as = as;
seg->s_base = base;
seg->s_size = size;
/*
* as_addseg() will add the segment at the appropraite point
* in the list. It will return -1 if there is overlap with
* an already existing segment.
*/
return (as_addseg(as, seg));
}
/*
* Unmap a segment and free it from its associated address space.
* This should be called by anybody who's finished with a whole segment's
* mapping. Just calls SEGOP_UNMAP() on the whole mapping . It is the
* responsibility of the segment driver to unlink the the segment
* from the address space, and to free public and private data structures
* associated with the segment. (This is typically done by a call to
* seg_free()).
*/
void
seg_unmap(struct seg *seg)
{
#ifdef DEBUG
int ret;
#endif /* DEBUG */
ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
/* Shouldn't have called seg_unmap if mapping isn't yet established */
ASSERT(seg->s_data != NULL);
/* Unmap the whole mapping */
#ifdef DEBUG
ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
ASSERT(ret == 0);
#else
SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
#endif /* DEBUG */
}
/*
* Free the segment from its associated as. This should only be called
* if a mapping to the segment has not yet been established (e.g., if
* an error occurs in the middle of doing an as_map when the segment
* has already been partially set up) or if it has already been deleted
* (e.g., from a segment driver unmap routine if the unmap applies to the
* entire segment). If the mapping is currently set up then seg_unmap() should
* be called instead.
*/
void
seg_free(struct seg *seg)
{
register struct as *as = seg->s_as;
struct seg *tseg = as_removeseg(as, seg);
ASSERT(tseg == seg);
/*
* If the segment private data field is NULL,
* then segment driver is not attached yet.
*/
if (seg->s_data != NULL)
SEGOP_FREE(seg);
kmem_cache_free(seg_cache, seg);
}
/*ARGSUSED*/
static void
seg_p_mem_config_post_add(
void *arg,
pgcnt_t delta_pages)
{
/* Nothing to do. */
}
/*
* Attempt to purge seg_pcache. May need to return before this has
* completed to allow other pre_del callbacks to unlock pages. This is
* ok because:
* 1) The seg_pdisable flag has been set so at least we won't
* cache anymore locks and the locks we couldn't purge
* will not be held if they do get released by a subsequent
* pre-delete callback.
*
* 2) The rest of the memory delete thread processing does not
* depend on the changes made in this pre-delete callback. No
* panics will result, the worst that will happen is that the
* DR code will timeout and cancel the delete.
*/
/*ARGSUSED*/
static int
seg_p_mem_config_pre_del(
void *arg,
pgcnt_t delta_pages)
{
pgcnt_t old_plocked;
int stall_count = 0;
mutex_enter(&seg_pcache);
seg_pdisable++;
ASSERT(seg_pdisable != 0);
mutex_exit(&seg_pcache);
/*
* Attempt to empty the cache. Terminate if seg_plocked does not
* diminish with SEGP_STALL_THRESHOLD consecutive attempts.
*/
while (seg_plocked != 0) {
old_plocked = seg_plocked;
seg_ppurge_all(1);
if (seg_plocked == old_plocked) {
if (stall_count++ > SEGP_STALL_THRESHOLD) {
cmn_err(CE_NOTE, "!Pre-delete couldn't purge"
" pagelock cache - continuing");
break;
}
} else
stall_count = 0;
if (seg_plocked != 0)
delay(hz/SEGP_PREDEL_DELAY_FACTOR);
}
return (0);
}
/*ARGSUSED*/
static void
seg_p_mem_config_post_del(
void *arg,
pgcnt_t delta_pages,
int cancelled)
{
mutex_enter(&seg_pcache);
ASSERT(seg_pdisable != 0);
seg_pdisable--;
mutex_exit(&seg_pcache);
}
static kphysm_setup_vector_t seg_p_mem_config_vec = {
KPHYSM_SETUP_VECTOR_VERSION,
seg_p_mem_config_post_add,
seg_p_mem_config_pre_del,
seg_p_mem_config_post_del,
};
static void
seg_pinit_mem_config(void)
{
int ret;
ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL);
/*
* Want to catch this in the debug kernel. At run time, if the
* callbacks don't get run all will be OK as the disable just makes
* it more likely that the pages can be collected.
*/
ASSERT(ret == 0);
}
extern struct seg_ops segvn_ops;
extern struct seg_ops segspt_shmops;
/*
* Verify that segment is not a shared anonymous segment which reserves
* swap. zone.max-swap accounting (zone->zone_max_swap) cannot be transfered
* from one zone to another if any segments are shared. This is because the
* last process to exit will credit the swap reservation. This could lead
* to the swap being reserved by one zone, and credited to another.
*/
boolean_t
seg_can_change_zones(struct seg *seg)
{
struct segvn_data *svd;
if (seg->s_ops == &segspt_shmops)
return (B_FALSE);
if (seg->s_ops == &segvn_ops) {
svd = (struct segvn_data *)seg->s_data;
if (svd->type == MAP_SHARED &&
svd->amp != NULL &&
svd->amp->swresv > 0)
return (B_FALSE);
}
return (B_TRUE);
}
/*
* Return swap reserved by a segment backing a private mapping.
*/
size_t
seg_swresv(struct seg *seg)
{
struct segvn_data *svd;
size_t swap = 0;
if (seg->s_ops == &segvn_ops) {
svd = (struct segvn_data *)seg->s_data;
if (svd->type == MAP_PRIVATE && svd->swresv > 0)
swap = svd->swresv;
}
return (swap);
}