vpm.c revision a5652762e5f7bf683d19f18542e5e39df63bad79
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* VM - generic vnode page mapping interfaces.
*
* Mechanism to provide temporary mappings to vnode pages.
*/
#include <sys/sysmacros.h>
#include <vm/seg_kmem.h>
/*
* Needs to be enabled by each platform.
*/
int vpm_enable = 0;
#ifdef SEGKPM_SUPPORT
int vpm_cache_enable = 1;
long vpm_cache_percent = 12;
long vpm_cache_size;
int vpm_nfreelist = 0;
int vpmd_freemsk = 0;
#define VPM_S_PAD 64
union vpm_cpu {
struct {
int vcpu_free_ndx;
} vcpu;
};
int vpm_cachemode = VPMCACHE_LRU;
#ifdef DEBUG
struct vpm_debug {
int vpmd_steals;
int vpmd_contend;
int vpmd_prevpagelocked;
int vpmd_getpagefailed;
int vpmd_zerostart;
int vpmd_emptyfreelist;
int vpmd_nofreevpms;
} vpm_debug;
int steals;
int steals_mtbf = 7;
int contend;
int contend_mtbf = 127;
#define VPM_MTBF(v, f) (((++(v)) & (f)) != (f))
#else /* DEBUG */
#define VPM_MTBF(v, f) (1)
#define VPM_DEBUG(x) /* nothing */
#endif
/*
* The vpm cache.
*
* The main purpose of having a cache here is to speed up page_lookup()
* operations and also provide an LRU(default) behaviour of file pages. The
* page_lookup() operation tends to be expensive if a page has to be
* reclaimed from the system page cache("cachelist"). Once we speed up the
* page_lookup()->page_reclaim() path then there there should be no need for
* this cache. The system page cache(cachelist) should effectively serve the
* purpose of caching file pages.
*
* This cache is very similar to segmap's smap cache. Each page in the
* cache is tracked by the structure vpmap_t. But unlike segmap, there is no
* hash table. The page_t has a reference to the vpmap_t when cached. For a
* given vnode, offset the page is found by means of a page_lookup() operation.
* Any page which has a mapping(i.e when cached) will not be in the
* system 'cachelist'. Hence the page_lookup() will not have to do a
* page_reclaim(). That is how the cache serves to speed up page_lookup()
* operations.
*
*/
void
vpm_init()
{
long npages;
int i, ndx;
extern void prefetch_smap_w(void *);
if (!vpm_cache_enable) {
return;
}
/*
* Set the size of the cache.
*/
if (vpm_cache_size < VPMAP_MINCACHE) {
}
/*
* Number of freelists.
*/
if (vpm_nfreelist == 0) {
}
/*
* Round it up to the next power of 2
*/
}
/*
* Use a per cpu rotor index to spread the allocations evenly
* across the available vpm freelists.
*/
ndx = 0;
for (i = 0; i < max_ncpus; i++) {
}
/*
* Allocate and initialize the freelist.
*/
KM_SLEEP);
for (i = 0; i < vpm_nfreelist; i++) {
/*
* Set up initial queue pointers. They will get flipped
* back and forth.
*/
}
/*
* Allocate and initialize the vpmap structs.
*/
/*
* Use prefetch as we have to walk thru a large number of
* these data structures. We just use the smap's prefetch
* routine as it does the same. This should work fine
* for x64(this needs to be modifed when enabled on sparc).
*/
prefetch_smap_w((void *)vpm);
} else {
}
/*
* Indicate that the vpmap is on the releq at start
*/
}
}
/*
* unhooks vpm from the freelist if it is still on the freelist.
*/
#define VPMAP_RMFREELIST(vpm) \
{ \
} else { \
} \
} \
}
static int
get_freelndx(int mode)
{
int ndx;
switch (mode) {
case VPMCACHE_LRU:
default:
break;
}
return (ndx);
}
/*
* Find one vpmap structure from the free lists and use it for the newpage.
* The previous page it cached is dissociated and released. The page_t's
* p_vpmref is cleared only when the vpm it is pointing to is locked(or
* for AMD64 when the page is exclusively locked in page_unload. That is
* because the p_vpmref is treated as mapping).
*
* The page's p_vpmref is set when the page is
* locked(at least SHARED locked).
*/
static struct vpmap *
{
int end_ndx, page_locked = 0;
int free_ndx;
/*
* get the freelist bin index.
*/
/*
* The alloc list is empty or this queue is being skipped;
* first see if the allocq toggled.
*/
/* queue changed */
goto retry_queue;
}
/* cannot get releq; a free vpmap may be there now */
/*
* This loop could spin forever if this thread has
* higher priority than the thread that is holding
* releq->vpmq_mtx. In order to force the other thread
* since we just unlocked the allocq mutex.
*/
goto retry_queue;
}
/*
* This freelist is empty.
* This should not happen unless clients
* are failing to release the vpmap after
* accessing the data. Before resorting
* to sleeping, try the next list of the same color.
*/
goto retry_queue;
}
/*
* Tried all freelists.
* wait on this list and hope something gets freed.
*/
goto retry_queue;
} else {
/*
* Something on the rele queue; flip the alloc
* and rele queues and retry.
*/
if (page_locked) {
page_locked = 0;
}
goto retry_queue;
}
} else {
int gotnewvpm;
/*
* Fastpath the case we get the vpmap mutex
* on the first try.
*/
if (!mutex_tryenter(vmtx)) {
/*
* Another thread is trying to reclaim this slot.
* Skip to the next queue or vpmap.
*/
goto skip_queue;
} else {
goto next_vpmap;
}
}
/*
* Assign this vpm to the newpage.
*/
gotnewvpm = 0;
/*
* Check if some other thread already assigned a vpm to
* this page.
*/
gotnewvpm = 1;
} else {
}
if (gotnewvpm) {
/*
* At this point, we've selected the vpm. Remove vpm
* from its freelist. If vpm is the first one in
* the freelist, update the head of the freelist.
*/
}
/*
* If the head of the freelist still points to vpm,
* then there are no more free vpmaps in that list.
*/
/*
* Took the last one
*/
else {
}
/*
* Disassociate the previous page. On x64 systems
* p_vpmref is used as a mapping reference to the page.
*/
/*
* Now verify that it is the correct
* page. If not someone else stole it,
* so just unlock it and leave.
*/
} else {
/*
* Release the page.
*/
hat_kpm_mapout(pp, 0,
}
} else {
/*
* If the page cannot be locked, just
* clear the p_vpmref and go.
*/
}
}
}
/*
* Setup vpm to point to the new page.
*/
} else {
/*
* Page already has a vpm assigned just use that.
* Grab the vpm mutex and verify that it is still
* the correct one. The pp->p_vpmref should not change
* once we have the vpm mutex and the page lock.
*/
/*
* The vpm got stolen, retry.
* clear the p_vpmref.
*/
}
goto retry_queue;
} else if (vpm->vpm_refcnt == 0) {
/*
* Remove it from the free list if it
* exists there.
*/
}
}
return (vpm);
}
}
static void
{
struct vpmap *vpmfreelist;
if (vpm->vpm_refcnt != 0) {
panic("free_vpmap");
/*NOTREACHED*/
}
/*
* Add to the tail of the release queue
* Note that vpm_releq and vpm_allocq could toggle
* before we get the lock. This does not affect
* correctness as the 2 queues are only maintained
* to reduce lock pressure.
*/
vpm->vpm_ndxflg = 0;
} else {
}
if (vpmfreelist == 0) {
int want;
/*
* Both queue mutexes are held to set vpm_want;
* snapshot the value before dropping releq mutex.
* If vpm_want appears after the releq mutex is dropped,
* then the vpmap just freed is already gone.
*/
/*
* See if there was a waiter before dropping the releq mutex
* then recheck after obtaining vpm_freeq[0] mutex as
* the another thread may have already signaled.
*/
if (want) {
}
} else {
}
}
/*
* Get the vpmap for the page.
* The refcnt of this vpm is incremented.
*/
static struct vpmap *
{
unsigned int refid;
/*
* Since we have the page lock and the vpm mutex, the
* pp->p_vpmref cannot change.
*/
/*
* Clear the p_vpmref as it is incorrect.
* This can happen if the page was stolen.
* On x64 this should not happen as p_vpmref
* is treated as a mapping on the page. So
* if the page is stolen, the mapping would have
* been cleared in page_unload().
*/
} else if (vpm->vpm_refcnt == 0) {
/*
* Got the vpm, remove it from the free
* list if it exists there.
*/
}
}
/*
* get_free_vpmap() returns with the vpmap mutex held.
*/
} else {
}
vpm->vpm_refcnt++;
return (vpm);
}
/* END --- vpm cache ---- */
/*
* The vnode page mapping(vpm) interface routines.
*/
/*
* Find or create the pages starting form baseoff for specified
* length 'len'.
*/
static int
int nseg,
int *newpage)
{
int i;
/*
* the seg pointer passed in is just advisor. Just
* pass segkmap for now like segmap does with
* segmap_kpm enabled.
*/
panic("segmap_pagecreate_vpm: "
"page_create failed");
/*NOTREACHED*/
}
*newpage = 1;
}
/*
* Get the vpm for this page_t.
*/
if (vpm_cache_enable) {
} else {
}
}
return (0);
}
/*
* Returns vpm mappings of pages in the range [off, off+len], where
* len is rounded up to the PAGESIZE boundary. The list of pages and
* the page addresses are returned in the SGL vml (vmap_t) array passed in.
* The nseg is the number of vmap_t entries in the array.
*
* Currently max len allowed is MAXBSIZE therefore, it will either
*
* The segmap's SM_LOCKPROTO usage is not supported by these interfaces.
* For such cases, use the seg_map interfaces.
*/
int
int fetchpage,
int nseg,
int *newpage,
{
extern struct vnode *common_specvp();
int i, error = 0;
/*
* For now, lets restrict it to MAXBSIZE. XXX - We can allow
* len longer then MAXBSIZE, but there should be a limit
* which should be determined by how many pages the VOP_GETPAGE()
* can fetch.
*/
panic("vpm_map_pages bad len");
/*NOTREACHED*/
}
/*
* If this is a block device we have to be sure to use the
* "common" block device vnode for the mapping.
*/
if (!fetchpage)
/*
* If we did not find the page or if this page was not
* in our cache, then let VOP_GETPAGE get all the pages.
* We need to call VOP_GETPAGE so that filesytems can do some
* (un)necessary tracking for sequential access.
*/
}
/*
* Pass a dummy address as it will be required
* by page_create_va(). We pass segkmap as the seg
* as some file systems(UFS) check it.
*/
if (error) {
}
break;
} else {
}
}
if (error) {
page_unlock(pplist[i]);
}
return (FC_MAKE_ERR(error));
}
/*
* Get the vpm's for pages.
*/
if (vpm_cache_enable) {
} else {
}
}
return (0);
}
/*
* Release the vpm mappings on the pages and unlock them.
*/
void
{
int i;
if (vpm_cache_enable) {
} else {
}
/*
* Mark page as being modified or referenced, bacause vpm pages
* would not cause faults where it would be set normally.
*/
} else {
hat_setref(pp);
}
if (vpm_cache_enable) {
if (--vpm->vpm_refcnt == 0) {
}
} else {
}
}
}
/*
* Given the vp, off and the uio structure, this routine will do the
* the copy (uiomove). If the last page created is partially written,
* the rest of the page is zeroed out. It also zeros the beginning of
* the first page till the start offset if requested(zerostart).
* If pages are to be fetched, it will call the filesystem's getpage
* function (VOP_GETPAGE) to get them, otherwise they will be created if
* not already present in the page cache.
*/
int
int fetchpage,
int *newpage,
int zerostart,
{
int error;
int npages = 0;
/*
* 'off' will be the offset where the I/O starts.
* We get the pages starting at the (off & PAGEMASK)
* page boundary.
*/
if (!error) {
/*
* Clear from the beginning of the page to start offset
* if requested.
*/
}
pon = 0;
}
/*
* When new pages are created, zero out part of the
* page we did not copy to.
*/
int nzero;
}
}
return (error);
}
/*
* called to flush pages for the given vnode covering
* [off, off+len] range.
*/
int
{
extern struct vnode *common_specvp();
int bflags = 0;
int error = 0;
/*
* If this is a block device we have to be sure to use the
* "common" block device vnode for the mapping.
*/
if ((flags & ~SM_DONTNEED) != 0) {
if (flags & SM_DESTROY)
if (flags & SM_DONTNEED)
bflags |= B_DONTNEED;
}
return (error);
}
#else /* SEGKPM_SUPPORT */
/* vpm stubs */
void
vpm_init()
{
}
/*ARGSUSED*/
int
int nseg,
int *newpage)
{
return (0);
}
/*ARGSUSED*/
int
int fetchpage,
int nseg,
int *newpage,
{
return (0);
}
/*ARGSUSED*/
int
int fetchpage,
int *newpage,
int zerostart,
{
return (0);
}
/*ARGSUSED*/
void
{
}
/*ARGSUSED*/
int
{
return (0);
}
#endif /* SEGKPM_SUPPORT */