seg_kp.c revision 1bd5c35fd400f7f19eee9efd795c32cedb602b06
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Portions of this source code were derived from Berkeley 4.3 BSD
* under license from the Regents of the University of California.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* segkp is a segment driver that administers the allocation and deallocation
* of pageable variable size chunks of kernel virtual address space. Each
* allocated resource is page-aligned.
*
* The user may specify whether the resource should be initialized to 0,
* include a redzone, or locked in memory.
*/
#include <sys/sysmacros.h>
#include <sys/tuneable.h>
#include <sys/archsystm.h>
#include <vm/seg_kmem.h>
/*
* Private seg op routines
*/
static void segkp_badop(void);
/*
* Lock used to protect the hash table(s) and caches.
*/
static kmutex_t segkp_lock;
/*
* The segkp caches
*/
#define SEGKP_BADOP(t) (t(*)())segkp_badop
/*
* When there are fewer than red_minavail bytes left on the stack,
* segkp_map_red() will map in the redzone (if called). 5000 seems
* to work reasonably well...
*/
long red_minavail = 5000;
/*
* will be set to 1 for 32 bit x86 systems only, in startup.c
*/
int segkp_fromheap = 0;
/*
* If segkp_map_red() is called with the redzone already mapped and
* with less than RED_DEEP_THRESHOLD bytes available on the stack,
* then the stack situation has become quite serious; if much more stack
* structure. To help debug the "can't happen" panics which may
* result from this condition, we record lbolt and the calling thread
* in red_deep_lbolt and red_deep_thread respectively.
*/
#define RED_DEEP_THRESHOLD 2000
SEGKP_BADOP(int), /* dup */
SEGKP_BADOP(int), /* unmap */
SEGKP_BADOP(void), /* free */
SEGKP_BADOP(int), /* setprot */
SEGKP_BADOP(int), /* sync */
SEGKP_BADOP(int), /* lockop */
SEGKP_BADOP(int), /* getprot */
SEGKP_BADOP(int), /* gettype */
SEGKP_BADOP(int), /* getvp */
SEGKP_BADOP(int), /* advise */
segkp_dump, /* dump */
segkp_pagelock, /* pagelock */
SEGKP_BADOP(int), /* setpgsz */
segkp_getmemid, /* getmemid */
segkp_getpolicy, /* getpolicy */
segkp_capable, /* capable */
};
static void
segkp_badop(void)
{
panic("segkp_badop");
/*NOTREACHED*/
}
static void segkpinit_mem_config(struct seg *);
static uint32_t segkp_indel;
/*
* Allocate the segment specific private data struct and fill it in
* with the per kp segment mutex, anon ptr. array and hash table.
*/
int
{
struct segkp_segdata *kpsd;
panic("Bad segkp size");
/*NOTREACHED*/
}
/*
* Allocate the virtual memory for segkp and initialize it
*/
if (segkp_fromheap) {
} else {
segkp_bitmap = NULL;
VM_SLEEP);
}
KM_SLEEP);
return (0);
}
/*
* Find a free 'freelist' and initialize it with the appropriate attributes
*/
void *
{
int i;
return ((void *)-1);
for (i = 0; i < SEGKP_MAX_CACHE; i++) {
if (segkp_cache[i].kpf_inuse)
continue;
return ((void *)(uintptr_t)i);
}
return ((void *)-1);
}
/*
* Free all the cache resources.
*/
void
segkp_cache_free(void)
{
struct segkp_data *kpd;
int i;
for (i = 0; i < SEGKP_MAX_CACHE; i++) {
if (!segkp_cache[i].kpf_inuse)
continue;
/*
* Disconnect the freelist and process each element
*/
segkp_cache[i].kpf_count = 0;
struct segkp_data *next;
}
}
}
/*
* There are 2 entries into segkp_get_internal. The first includes a cookie
* used to access a pool of cached segkp resources. The second does not
* use the cache.
*/
{
}
return (NULL);
}
/*
* Return a 'cached' segkp address
*/
segkp_cache_get(void *cookie)
{
return (NULL);
}
}
return (NULL);
}
{
flags |= KPD_HASAMP;
}
return (NULL);
}
/*
* This does the real work of segkp allocation.
* Return to client base addr. len must be page-aligned. A null value is
* returned if there are no more vm resources (e.g. pages, swap). The len
* and base recorded in the private data structure include the redzone
* and the redzone length (if applicable). If the user requests a redzone
* either the first or last page is left unmapped depending whether stacks
* grow to low or high memory.
*
* The client may also specify a no-wait flag. If that is set then the
* request will choose a non-blocking path when requesting resources.
* The default is make the client wait.
*/
static caddr_t
struct segkp_data **tkpd,
{
struct segkp_data *kpd;
long i;
if (len & PAGEOFFSET) {
panic("segkp_get: len is not page-aligned");
/*NOTREACHED*/
}
/* Only allow KPD_NO_ANON if we are going to lock it down */
return (NULL);
return (NULL);
/*
* Fix up the len to reflect the REDZONE if applicable
*/
if (flags & KPD_HASREDZONE)
return (NULL);
}
/* If locking, reserve physical memory */
if (flags & KPD_LOCKED) {
return (NULL);
}
if ((flags & KPD_NO_ANON) == 0)
}
/*
* Reserve sufficient swap space for this vm resource. We'll
* actually allocate it in the loop below, but reserving it
* here allows us to back out more gracefully than if we
* had an allocation failure in the body of the loop.
*
* Note that we don't need swap space for the red zone page.
*/
/* The reserve has been done and the anon_hdr is separate. */
anon_idx = 0;
} else if ((flags & KPD_NO_ANON) == 0) {
if (flags & KPD_LOCKED) {
-pages);
}
return (NULL);
}
} else {
kpd->kp_anon_idx = 0;
}
/*
* Allocate page and anon resources for the virtual address range
* except the redzone
*/
if (segkp_fromheap)
int err;
/*
* Mark this page to be a segkp page in the bitmap.
*/
if (segkp_fromheap) {
segkpindex++;
}
/*
* If this page is the red zone page, we don't need swap
* space for it. Note that we skip over the code that
* establishes MMU mappings, so that the page remains
* invalid.
*/
continue;
== NULL);
/*
* Determine the "vp" and "off" of the anon slot.
*/
ap, ANON_SLEEP);
/*
* Create a page with the specified identity. The
* page is returned with the "shared" lock held.
*/
kcred);
if (err) {
/*
* XXX - This should not fail.
*/
panic("segkp_get: no pages");
/*NOTREACHED*/
}
} else {
/*
* Legitimize resource; then destroy it.
* Easier than trying to unwind here.
*/
return (NULL);
}
}
/*
* Load and lock an MMU translation for the page.
*/
/*
* Now, release lock on the page.
*/
if (flags & KPD_LOCKED)
else
}
}
/*
* Release the resource to cache if the pool(designate by the cookie)
* has less than the maximum allowable. If inserted in cache,
* segkp_delete insures element is taken off of active list.
*/
void
{
struct segkp_cache *freelist;
panic("segkp_release: null kpd");
/*NOTREACHED*/
}
return;
} else {
}
}
}
/*
* Free the entire resource. segkp_unlock gets called with the start of the
* mapped portion of the resource. The length is the size of the mapped
* portion
*/
static void
{
long i;
long redzone;
/* Remove from active hash list */
}
/*
* Precompute redzone page index.
*/
redzone = -1;
/*
* Free up those anon resources that are quiescent.
*/
if (segkp_fromheap)
/*
* Clear the bit for this page from the bitmap.
*/
if (segkp_fromheap) {
segkpindex++;
}
if (i == redzone)
continue;
/*
* Free up anon resources and destroy the
* associated pages.
*
* Release the lock if there is one. Have to get the
* page to do this, unfortunately.
*/
kpd->kp_anon_idx + i);
/* Find the shared-locked page. */
panic("segkp_release: "
"kp_anon: no page to unlock ");
/*NOTREACHED*/
}
}
PAGESIZE);
}
TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
} else {
panic("segkp_release: "
"no page to unlock");
/*NOTREACHED*/
}
/*
* We should just upgrade the lock here
* but there is no upgrade that waits.
*/
}
SE_EXCL);
page_destroy(pp, 0);
}
}
/* If locked, release physical memory reservation */
}
}
/*
* segkp_map_red() will check the current frame pointer against the
* stack base. If the amount of stack remaining is questionable
* (less than red_minavail), then segkp_map_red() will map in the redzone
* and return 1. Otherwise, it will return 0. segkp_map_red() can
* _only_ be called when:
*
* - it is safe to sleep on page_create_va().
* - the caller is non-swappable.
*
* It is up to the caller to remember whether segkp_map_red() successfully
* mapped the redzone, and, if so, to call segkp_unmap_red() at a later
* time. Note that the caller must _remain_ non-swappable until after
* calling segkp_unmap_red().
*
* Currently, this routine is only called from pagefault() (which necessarily
* satisfies the above conditions).
*/
#if defined(STACK_GROWTH_DOWN)
int
segkp_map_red(void)
{
#ifndef _LP64
#endif
/*
* Optimize for the common case where we simply return.
*/
return (0);
#if defined(_LP64)
/*
* XXX We probably need something better than this.
*/
panic("kernel stack overflow");
/*NOTREACHED*/
#else /* _LP64 */
PAGESIZE);
NULL);
/*
* Allocate the physical for the red page.
*/
/*
* No PG_NORELOC here to avoid waits. Unlikely to get
* a relocate happening in the short time the page exists
* and it will be OK anyway.
*/
/*
* So we now have a page to jam into the redzone...
*/
/*
* The page is left SE_SHARED locked so we can hold on to
* the page_t pointer.
*/
}
return (1);
}
/*
* Oh boy. We're already deep within the mapped-in
* redzone page, and the caller is trying to prepare
* for a deep stack run. We're running without a
* redzone right now: if the caller plows off the
* end of the stack, it'll plow another thread or
* LWP structure. That situation could result in
* a very hard-to-debug panic, so, in the spirit of
* recording the name of one's killer in one's own
* blood, we're going to record lbolt and the calling
* thread.
*/
}
/*
* If this is a DEBUG kernel, and we've run too deep for comfort, toss.
*/
return (0);
#endif /* _LP64 */
}
void
segkp_unmap_red(void)
{
/*
* Because we locked the mapping down, we can't simply rely
* on page_destroy() to clean everything up; we need to call
* hat_unload() to explicitly unlock the mapping resources.
*/
/*
* Need to upgrade the SE_SHARED lock to SE_EXCL.
*/
if (!page_tryupgrade(pp)) {
/*
* As there is now wait for upgrade, release the
* SE_SHARED lock and wait for SE_EXCL.
*/
/* pp may be NULL here, hence the test below */
}
/*
* Destroy the page, with dontfree set to zero (i.e. free it).
*/
page_destroy(pp, 0);
}
#else
#endif
/*
* Handle a fault on an address corresponding to one of the
* resources in the segkp segment.
*/
enum fault_type type,
{
int err;
/*
* Sanity checks.
*/
panic("segkp_fault: unexpected F_PROT fault");
/*NOTREACHED*/
}
return (FC_NOMAP);
if (type == F_SOFTLOCK) {
/*
* The F_SOFTLOCK case has more stringent
* range requirements: the given range must exactly coincide
* with the resource's mapped portion. Note reference to
* redzone is handled since vaddr would not equal base
*/
return (FC_MAKE_ERR(EFAULT));
}
return (FC_MAKE_ERR(err));
}
return (0);
}
/*
* Check if we touched the redzone. Somewhat optimistic
* here if we are touching the redzone of our own stack
* since we wouldn't have a stack to get this far...
*/
panic("segkp_fault: accessing redzone");
/*
* This fault may occur while the page is being F_SOFTLOCK'ed.
* Return since a 2nd segkp_load is unnecessary and also would
* result in the page being locked twice and eventually
* hang the thread_reaper thread.
*/
return (0);
}
}
if (type == F_SOFTUNLOCK) {
/*
* Make sure the addr is LOCKED and it has anon backing
* before unlocking
*/
panic("segkp_fault: bad unlock");
/*NOTREACHED*/
}
panic("segkp_fault: bad range");
/*NOTREACHED*/
}
else
}
/*NOTREACHED*/
}
/*
* Check that the given protections suffice over the range specified by
* vaddr and len. For this segment type, the only issue is whether or
* not the range lies completely within the mapped part of an allocated
* resource.
*/
/* ARGSUSED */
static int
{
return (EACCES);
return (EACCES);
}
return (0);
}
/*
* addr + delta relative to the mapping at addr. We assume here
* that delta is a signed PAGESIZE'd multiple (which can be negative).
*
* For seg_u we always "approve" of this action from our standpoint.
*/
/*ARGSUSED*/
static int
{
return (0);
}
/*
* Load and possibly lock intra-slot resources in the range given by
* vaddr and len.
*/
static int
struct segkp_data *kpd,
{
ulong_t i;
/* If locking, reserve physical memory */
if (flags & KPD_LOCKED) {
}
/*
* Loop through the pages in the given range.
*/
int err;
/*
* Summon the page. If it's not resident, arrange
* for synchronous i/o to pull it in.
*/
/*
* The returned page list will have exactly one entry,
* which is returned to us already kept.
*/
if (err) {
/*
* Back out of what we've done so far.
*/
return (err);
}
/*
* Load an MMU translation for the page.
*/
if (!lock) {
/*
* Now, release "shared" lock on the page.
*/
page_unlock(pl[0]);
}
}
return (0);
}
/*
* At the very least unload the mmu-translations and unlock the range if locked
* Can be called with the following flag value KPD_WRITEDIRTY which specifies
* any dirty pages should be written to disk.
*/
static int
struct segkp_data *kpd,
{
ulong_t i;
#ifdef lint
#endif /* lint */
/*
* Loop through the pages in the given range. It is assumed
* segkp_unlock is called with page aligned base
*/
/*
* Find the page associated with this part of the
* slot, tracking it down through its associated swap
* space.
*/
if (flags & KPD_LOCKED) {
if (flags & KPD_LOCKED) {
panic("segkp_softunlock: missing page");
/*NOTREACHED*/
}
}
} else {
/*
* Nothing to do if the slot is not locked and the
* page doesn't exist.
*/
continue;
}
/*
* If the page doesn't have any translations, is
* dirty and not being shared, then push it out
* asynchronously and avoid waiting for the
* pageout daemon to do it for us.
*
* XXX - Do we really need to get the "exclusive"
* lock via an upgrade?
*/
/*
* Hold the vnode before releasing the page lock to
* prevent it from being freed and re-used by some
* other thread.
*/
/*
* Want most powerful credentials we can get so
* use kcred.
*/
} else {
}
}
/* If unlocking, release physical memory */
if (flags & KPD_LOCKED) {
}
return (0);
}
/*
* Insert the kpd in the hash table.
*/
static void
{
int index;
/*
* Insert the kpd based on the address that will be returned
* via segkp_release.
*/
}
/*
* Remove kpd from the hash table.
*/
static void
{
struct segkp_data **kpp;
int index;
return;
}
}
panic("segkp_delete: unable to find element to delete");
/*NOTREACHED*/
}
/*
* Find the kpd associated with a vaddr.
*
* Most of the callers of segkp_find will pass the vaddr that
* hashes to the desired index, but there are cases where
* this is not true in which case we have to (potentially) scan
* the whole table looking for it. This should be very rare
* (e.g. a segkp_fault(F_INVAL) on an address somewhere in the
* middle of the segkp_data region).
*/
static struct segkp_data *
{
struct segkp_data *kpd;
int i;
int stop;
do {
return (kpd);
}
}
if (--i < 0)
} while (i != stop);
return (NULL); /* Not found */
}
/*
* returns size of swappable area.
*/
{
struct segkp_data *kpd;
else
return (NULL);
}
/*
* Dump out all the active segkp pages
*/
static void
{
int i;
struct segkp_data *kpd;
for (i = 0; i < SEGKP_HASHSZ; i++) {
if (pfn != PFN_INVALID)
}
}
}
}
/*ARGSUSED*/
static int
{
return (ENOTSUP);
}
/*ARGSUSED*/
static int
{
return (ENODEV);
}
/*ARGSUSED*/
static lgrp_mem_policy_info_t *
{
return (NULL);
}
/*ARGSUSED*/
static int
{
return (0);
}
#include <sys/mem_config.h>
/*ARGSUSED*/
static void
{}
/*
* During memory delete, turn off caches so that pages are not held.
* A better solution may be to unlock the pages while they are
* in the cache so that they may be collected naturally.
*/
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED*/
static void
{
}
static kphysm_setup_vector_t segkp_mem_config_vec = {
};
static void
{
int ret;
}