page_lock.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* VM - page locking primitives
*/
#include <sys/param.h>
#include <sys/t_lock.h>
#include <sys/vtrace.h>
#include <sys/debug.h>
#include <sys/cmn_err.h>
#include <sys/vnode.h>
#include <sys/bitmap.h>
#include <sys/lockstat.h>
#include <sys/condvar_impl.h>
#include <vm/page.h>
#include <vm/seg_enum.h>
#include <vm/vm_dep.h>
/*
* This global mutex is for logical page locking.
* The following fields in the page structure are protected
* by this lock:
*
* p_lckcnt
* p_cowcnt
*/
kmutex_t page_llock;
/*
* This is a global lock for the logical page free list. The
* logical free list, in this implementation, is maintained as two
* separate physical lists - the cache list and the free list.
*/
kmutex_t page_freelock;
/*
* The hash table, page_hash[], the p_selock fields, and the
* list of pages associated with vnodes are protected by arrays of mutexes.
*
* Unless the hashes are changed radically, the table sizes must be
* a power of two. Also, we typically need more mutexes for the
* vnodes since these locks are occasionally held for long periods.
* And since there seem to be two special vnodes (kvp and swapvp),
* we make room for private mutexes for them.
*
* The pse_mutex[] array holds the mutexes to protect the p_selock
* fields of all page_t structures.
*
* PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex
* when given a pointer to a page_t.
*
* PSE_TABLE_SIZE must be a power of two. One could argue that we
* should go to the trouble of setting it up at run time and base it
* on memory size rather than the number of compile time CPUs.
*
* XX64 We should be using physmem size to calculate PSE_TABLE_SIZE,
* PSE_SHIFT, PIO_SHIFT.
*
* These might break in 64 bit world.
*/
#define PSE_SHIFT 7 /* log2(PSE_TABLE_SIZE) */
#define PSE_TABLE_SIZE 128 /* number of mutexes to have */
#define PIO_SHIFT PSE_SHIFT /* next power of 2 bigger than page_t */
#define PIO_TABLE_SIZE PSE_TABLE_SIZE /* number of io mutexes to have */
pad_mutex_t ph_mutex[PH_TABLE_SIZE];
pad_mutex_t pse_mutex[PSE_TABLE_SIZE];
kmutex_t pio_mutex[PIO_TABLE_SIZE];
#define PAGE_SE_MUTEX(pp) \
&pse_mutex[((((uintptr_t)(pp) >> PSE_SHIFT) ^ \
((uintptr_t)(pp) >> (PSE_SHIFT << 1))) & \
(PSE_TABLE_SIZE - 1))].pad_mutex
#define PAGE_IO_MUTEX(pp) \
&pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)]
#define PSZC_MTX_TABLE_SIZE 128
#define PSZC_MTX_TABLE_SHIFT 7
static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE];
#define PAGE_SZC_MUTEX(_pp) \
&pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \
((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \
((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \
(PSZC_MTX_TABLE_SIZE - 1))].pad_mutex
/*
* The vph_mutex[] array holds the mutexes to protect the vnode chains,
* (i.e., the list of pages anchored by v_pages and connected via p_vpprev
* and p_vpnext).
*
* The page_vnode_mutex(vp) function returns the address of the appropriate
* mutex from this array given a pointer to a vnode. It is complicated
* by the fact that the kernel's vnode and the swapfs vnode are referenced
* frequently enough to warrent their own mutexes.
*
* The VP_HASH_FUNC returns the index into the vph_mutex array given
* an address of a vnode.
*/
/*
* XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world.
* Need to review again.
*/
#define VPH_TABLE_SIZE (2 << VP_SHIFT)
#define VP_HASH_FUNC(vp) \
((((uintptr_t)(vp) >> 6) + \
((uintptr_t)(vp) >> 8) + \
((uintptr_t)(vp) >> 10) + \
((uintptr_t)(vp) >> 12)) \
& (VPH_TABLE_SIZE - 1))
extern struct vnode kvp;
kmutex_t vph_mutex[VPH_TABLE_SIZE + 2];
/*
* Initialize the locks used by the Virtual Memory Management system.
*/
void
page_lock_init()
{
}
/*
* At present we only use page ownership to aid debugging, so it's
* OK if the owner field isn't exact. In the 32-bit world two thread ids
* can map to the same owner because we just 'or' in 0x80000000 and
* then clear the second highest bit, so that (for example) 0x2faced00
* and 0xafaced00 both map to 0xafaced00.
* In the 64-bit world, p_selock may not be large enough to hold a full
* thread pointer. If we ever need precise ownership (e.g. if we implement
* priority inheritance for page locks) then p_selock should become a
* uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2).
*/
#define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED)
#define SE_READER 1
/*
* A page that is deleted must be marked as such using the
* page_lock_delete() function. The page must be exclusively locked.
* The SE_DELETED marker is put in p_selock when this function is called.
* SE_DELETED must be distinct from any SE_WRITER value.
*/
#define SE_DELETED (1 | INT_MIN)
#ifdef VM_STATS
uint_t vph_kvp_count;
uint_t vph_swapfsvp_count;
uint_t vph_other;
#endif /* VM_STATS */
#ifdef VM_STATS
uint_t page_lock_count;
uint_t page_lock_miss;
uint_t page_lock_miss_lock;
uint_t page_lock_reclaim;
uint_t page_lock_bad_reclaim;
uint_t page_lock_same_page;
uint_t page_lock_upgrade;
uint_t page_lock_upgrade_failed;
uint_t page_lock_deleted;
uint_t page_trylock_locked;
uint_t page_trylock_missed;
uint_t page_try_reclaim_upgrade;
#endif /* VM_STATS */
/*
* Acquire the "shared/exclusive" lock on a page.
*
* Returns 1 on success and locks the page appropriately.
* 0 on failure and does not lock the page.
*
* If `lock' is non-NULL, it will be dropped and reacquired in the
* failure case. This routine can block, and if it does
* it will always return a failure since the page identity [vp, off]
* or state may have changed.
*/
int
page_lock(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim)
{
return (page_lock_es(pp, se, lock, reclaim, 0));
}
/*
* With the addition of reader-writer lock semantics to page_lock_es,
* callers wanting an exclusive (writer) lock may prevent shared-lock
* (reader) starvation by setting the es parameter to SE_EXCL_WANTED.
* In this case, when an exclusive lock cannot be acquired, p_selock's
* SE_EWANTED bit is set.
* This bit, along with the se and es parameters, are used to decide
* if the requested lock should be granted:
*
* Lock wanted SE_EXCL_WANTED p_selock/SE_EWANTED Action
* ---------- -------------- ------------------- ---------
* SE_EXCL no dont-care/1 deny lock
* SE_EXCL any(see note) unlocked/any grant lock, clear SE_EWANTED
* SE_EXCL yes any lock/any deny, set SE_EWANTED
* SE_EXCL no any lock/any deny
* SE_SHARED not applicable shared/0 grant
* SE_SHARED not applicable unlocked/0 grant
* SE_SHARED not applicable shared/1 deny
* SE_SHARED not applicable unlocked/1 deny
* SE_SHARED not applicable excl/any deny
*
* Note: the code grants an exclusive lock to the caller and clears
* SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED
* bit's value. This was deemed acceptable as we are not concerned about
* exclusive-lock starvation. If this ever becomes an issue, a priority or
* fifo mechanism should also be implemented.
*/
int
page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es)
{
int retval;
kmutex_t *pse = PAGE_SE_MUTEX(pp);
int upgraded;
int reclaim_it;
ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1);
VM_STAT_ADD(page_lock_count);
upgraded = 0;
reclaim_it = 0;
mutex_enter(pse);
/*
* Current uses of 'es':
* es == 1 page_lookup_create will attempt page relocation
* es == SE_EXCL_WANTED caller wants SE_EWANTED set (eg. delete
* memory thread); this prevents reader-starvation of waiting
* writer thread(s).
*/
ASSERT(((es & SE_EXCL_WANTED) == 0) ||
((es == SE_EXCL_WANTED) && (se == SE_EXCL)));
if (se == SE_SHARED && es == 1 && pp->p_selock == 0) {
se = SE_EXCL;
}
if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) {
reclaim_it = 1;
if (se == SE_SHARED) {
/*
* This is an interesting situation.
*
* Remember that p_free can only change if
* p_selock < 0.
* p_free does not depend on our holding `pse'.
* And, since we hold `pse', p_selock can not change.
* So, if p_free changes on us, the page is already
* exclusively held, and we would fail to get p_selock
* regardless.
*
* We want to avoid getting the share
* lock on a free page that needs to be reclaimed.
* It is possible that some other thread has the share
* lock and has left the free page on the cache list.
* pvn_vplist_dirty() does this for brief periods.
* If the se_share is currently SE_EXCL, we will fail
* to acquire p_selock anyway. Blocking is the
* right thing to do.
* If we need to reclaim this page, we must get
* exclusive access to it, force the upgrade now.
* Again, we will fail to acquire p_selock if the
* page is not free and block.
*/
upgraded = 1;
se = SE_EXCL;
VM_STAT_ADD(page_lock_upgrade);
}
}
if (se == SE_EXCL) {
if ((es != SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) {
/*
* if the caller wants a writer lock (but did not
* specify exclusive access), and there is a pending
* writer that wants exclusive access, return failure
*/
retval = 0;
} else if ((pp->p_selock & ~SE_EWANTED) == 0) {
/* no reader/writer lock held */
THREAD_KPRI_REQUEST();
/* this clears our setting of the SE_EWANTED bit */
pp->p_selock = SE_WRITER;
retval = 1;
} else {
/* page is locked */
if (es == SE_EXCL_WANTED) {
/* set the SE_EWANTED bit */
pp->p_selock |= SE_EWANTED;
}
retval = 0;
}
} else {
retval = 0;
if (pp->p_selock >= 0) {
/* readers are not allowed when excl wanted */
if (!(pp->p_selock & SE_EWANTED)) {
pp->p_selock += SE_READER;
retval = 1;
}
}
}
if (retval == 0) {
if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) {
VM_STAT_ADD(page_lock_deleted);
mutex_exit(pse);
return (retval);
}
#ifdef VM_STATS
VM_STAT_ADD(page_lock_miss);
if (upgraded) {
VM_STAT_ADD(page_lock_upgrade_failed);
}
#endif
if (lock) {
VM_STAT_ADD(page_lock_miss_lock);
mutex_exit(lock);
}
/*
* Now, wait for the page to be unlocked and
* release the lock protecting p_cv and p_selock.
*/
cv_wait(&pp->p_cv, pse);
mutex_exit(pse);
/*
* The page identity may have changed while we were
* blocked. If we are willing to depend on "pp"
* still pointing to a valid page structure (i.e.,
* assuming page structures are not dynamically allocated
* or freed), we could try to lock the page if its
* identity hasn't changed.
*
* This needs to be measured, since we come back from
* cv_wait holding pse (the expensive part of this
* operation) we might as well try the cheap part.
* Though we would also have to confirm that dropping
* `lock' did not cause any grief to the callers.
*/
if (lock) {
mutex_enter(lock);
}
} else {
/*
* We have the page lock.
* If we needed to reclaim the page, and the page
* needed reclaiming (ie, it was free), then we
* have the page exclusively locked. We may need
* to downgrade the page.
*/
ASSERT((upgraded) ?
((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1);
mutex_exit(pse);
/*
* We now hold this page's lock, either shared or
* exclusive. This will prevent its identity from changing.
* The page, however, may or may not be free. If the caller
* requested, and it is free, go reclaim it from the
* free list. If the page can't be reclaimed, return failure
* so that the caller can start all over again.
*
* NOTE:page_reclaim() releases the page lock (p_selock)
* if it can't be reclaimed.
*/
if (reclaim_it) {
if (!page_reclaim(pp, lock)) {
VM_STAT_ADD(page_lock_bad_reclaim);
retval = 0;
} else {
VM_STAT_ADD(page_lock_reclaim);
if (upgraded) {
page_downgrade(pp);
}
}
}
}
return (retval);
}
/*
* Clear the SE_EWANTED bit from p_selock. This function allows
* callers of page_lock_es and page_try_reclaim_lock to clear
* their setting of this bit if they decide they no longer wish
* to gain exclusive access to the page. Currently only
* delete_memory_thread uses this when the delete memory
* operation is cancelled.
*/
void
page_lock_clr_exclwanted(page_t *pp)
{
kmutex_t *pse = PAGE_SE_MUTEX(pp);
mutex_enter(pse);
pp->p_selock &= ~SE_EWANTED;
if (CV_HAS_WAITERS(&pp->p_cv))
cv_broadcast(&pp->p_cv);
mutex_exit(pse);
}
/*
* Read the comments inside of page_lock_es() carefully.
*
* SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the
* SE_EWANTED bit of p_selock to be set when the lock cannot be obtained.
* This is used by threads subject to reader-starvation (eg. memory delete).
*
* When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock,
* it is expected that it will retry at a later time. Threads that will
* not retry the lock *must* call page_lock_clr_exclwanted to clear the
* SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock,
* the bit is cleared.)
*/
int
page_try_reclaim_lock(page_t *pp, se_t se, int es)
{
kmutex_t *pse = PAGE_SE_MUTEX(pp);
selock_t old;
mutex_enter(pse);
old = pp->p_selock;
ASSERT(((es & SE_EXCL_WANTED) == 0) ||
((es == SE_EXCL_WANTED) && (se == SE_EXCL)));
if (se == SE_SHARED && es == 1 && old == 0) {
se = SE_EXCL;
}
if (se == SE_SHARED) {
if (!PP_ISFREE(pp)) {
if (old >= 0) {
/* readers are not allowed when excl wanted */
if (!(old & SE_EWANTED)) {
pp->p_selock = old + SE_READER;
mutex_exit(pse);
return (1);
}
}
mutex_exit(pse);
return (0);
}
/*
* The page is free, so we really want SE_EXCL (below)
*/
VM_STAT_ADD(page_try_reclaim_upgrade);
}
/*
* The caller wants a writer lock. We try for it only if
* SE_EWANTED is not set, or if the caller specified
* SE_EXCL_WANTED.
*/
if (!(old & SE_EWANTED) || (es == SE_EXCL_WANTED)) {
if ((old & ~SE_EWANTED) == 0) {
/* no reader/writer lock held */
THREAD_KPRI_REQUEST();
/* this clears out our setting of the SE_EWANTED bit */
pp->p_selock = SE_WRITER;
mutex_exit(pse);
return (1);
}
}
if (es == SE_EXCL_WANTED) {
/* page is locked, set the SE_EWANTED bit */
pp->p_selock |= SE_EWANTED;
}
mutex_exit(pse);
return (0);
}
/*
* Acquire a page's "shared/exclusive" lock, but never block.
* Returns 1 on success, 0 on failure.
*/
int
page_trylock(page_t *pp, se_t se)
{
kmutex_t *pse = PAGE_SE_MUTEX(pp);
mutex_enter(pse);
if (pp->p_selock & SE_EWANTED) {
/* fail if a thread wants exclusive access */
mutex_exit(pse);
return (0);
}
if (se == SE_EXCL) {
if (pp->p_selock == 0) {
THREAD_KPRI_REQUEST();
pp->p_selock = SE_WRITER;
mutex_exit(pse);
return (1);
}
} else {
if (pp->p_selock >= 0) {
pp->p_selock += SE_READER;
mutex_exit(pse);
return (1);
}
}
mutex_exit(pse);
return (0);
}
/*
* Release the page's "shared/exclusive" lock and wake up anyone
* who might be waiting for it.
*/
void
page_unlock(page_t *pp)
{
kmutex_t *pse = PAGE_SE_MUTEX(pp);
selock_t old;
mutex_enter(pse);
old = pp->p_selock;
if ((old & ~SE_EWANTED) == SE_READER) {
pp->p_selock = old & ~SE_READER;
if (CV_HAS_WAITERS(&pp->p_cv))
cv_broadcast(&pp->p_cv);
} else if ((old & ~SE_EWANTED) == SE_DELETED) {
panic("page_unlock: page %p is deleted", pp);
} else if (old < 0) {
THREAD_KPRI_RELEASE();
pp->p_selock &= SE_EWANTED;
if (CV_HAS_WAITERS(&pp->p_cv))
cv_broadcast(&pp->p_cv);
} else if ((old & ~SE_EWANTED) > SE_READER) {
pp->p_selock = old - SE_READER;
} else {
panic("page_unlock: page %p is not locked", pp);
}
mutex_exit(pse);
}
/*
* Try to upgrade the lock on the page from a "shared" to an
* "exclusive" lock. Since this upgrade operation is done while
* holding the mutex protecting this page, no one else can acquire this page's
* lock and change the page. Thus, it is safe to drop the "shared"
* lock and attempt to acquire the "exclusive" lock.
*
* Returns 1 on success, 0 on failure.
*/
int
page_tryupgrade(page_t *pp)
{
kmutex_t *pse = PAGE_SE_MUTEX(pp);
mutex_enter(pse);
if (!(pp->p_selock & SE_EWANTED)) {
/* no threads want exclusive access, try upgrade */
if (pp->p_selock == SE_READER) {
THREAD_KPRI_REQUEST();
/* convert to exclusive lock */
pp->p_selock = SE_WRITER;
mutex_exit(pse);
return (1);
}
}
mutex_exit(pse);
return (0);
}
/*
* Downgrade the "exclusive" lock on the page to a "shared" lock
* while holding the mutex protecting this page's p_selock field.
*/
void
page_downgrade(page_t *pp)
{
kmutex_t *pse = PAGE_SE_MUTEX(pp);
int excl_waiting;
ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED);
ASSERT(PAGE_EXCL(pp));
mutex_enter(pse);
excl_waiting = pp->p_selock & SE_EWANTED;
THREAD_KPRI_RELEASE();
pp->p_selock = SE_READER | excl_waiting;
if (CV_HAS_WAITERS(&pp->p_cv))
cv_broadcast(&pp->p_cv);
mutex_exit(pse);
}
void
page_lock_delete(page_t *pp)
{
kmutex_t *pse = PAGE_SE_MUTEX(pp);
ASSERT(PAGE_EXCL(pp));
ASSERT(pp->p_vnode == NULL);
ASSERT(pp->p_offset == (u_offset_t)-1);
ASSERT(!PP_ISFREE(pp));
mutex_enter(pse);
THREAD_KPRI_RELEASE();
pp->p_selock = SE_DELETED;
if (CV_HAS_WAITERS(&pp->p_cv))
cv_broadcast(&pp->p_cv);
mutex_exit(pse);
}
/*
* Implement the io lock for pages
*/
void
page_iolock_init(page_t *pp)
{
pp->p_iolock_state = 0;
cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL);
}
/*
* Acquire the i/o lock on a page.
*/
void
page_io_lock(page_t *pp)
{
kmutex_t *pio;
pio = PAGE_IO_MUTEX(pp);
mutex_enter(pio);
while (pp->p_iolock_state & PAGE_IO_INUSE) {
cv_wait(&(pp->p_io_cv), pio);
}
pp->p_iolock_state |= PAGE_IO_INUSE;
mutex_exit(pio);
}
/*
* Release the i/o lock on a page.
*/
void
page_io_unlock(page_t *pp)
{
kmutex_t *pio;
pio = PAGE_IO_MUTEX(pp);
mutex_enter(pio);
cv_signal(&pp->p_io_cv);
pp->p_iolock_state &= ~PAGE_IO_INUSE;
mutex_exit(pio);
}
/*
* Try to acquire the i/o lock on a page without blocking.
* Returns 1 on success, 0 on failure.
*/
int
page_io_trylock(page_t *pp)
{
kmutex_t *pio;
if (pp->p_iolock_state & PAGE_IO_INUSE)
return (0);
pio = PAGE_IO_MUTEX(pp);
mutex_enter(pio);
if (pp->p_iolock_state & PAGE_IO_INUSE) {
mutex_exit(pio);
return (0);
}
pp->p_iolock_state |= PAGE_IO_INUSE;
mutex_exit(pio);
return (1);
}
/*
* Assert that the i/o lock on a page is held.
* Returns 1 on success, 0 on failure.
*/
int
page_iolock_assert(page_t *pp)
{
return (pp->p_iolock_state & PAGE_IO_INUSE);
}
/*
* Wrapper exported to kernel routines that are built
* platform-independent (the macro is platform-dependent;
* the size of vph_mutex[] is based on NCPU).
*
* Note that you can do stress testing on this by setting the
* variable page_vnode_mutex_stress to something other than
* zero in a DEBUG kernel in a debugger after loading the kernel.
* Setting it after the kernel is running may not work correctly.
*/
#ifdef DEBUG
static int page_vnode_mutex_stress = 0;
#endif
kmutex_t *
page_vnode_mutex(vnode_t *vp)
{
if (vp == &kvp)
return (&vph_mutex[VPH_TABLE_SIZE + 0]);
#ifdef DEBUG
if (page_vnode_mutex_stress != 0)
return (&vph_mutex[0]);
#endif
return (&vph_mutex[VP_HASH_FUNC(vp)]);
}
kmutex_t *
page_se_mutex(page_t *pp)
{
return (PAGE_SE_MUTEX(pp));
}
#ifdef VM_STATS
uint_t pszclck_stat[4];
#endif
/*
* Find, take and return a mutex held by hat_page_demote().
* Called by page_demote_vp_pages() before hat_page_demote() call and by
* routines that want to block hat_page_demote() but can't do it
* via locking all constituent pages.
*
* Return NULL if p_szc is 0.
*
* It should only be used for pages that can be demoted by hat_page_demote()
* i.e. non swapfs file system pages. The logic here is lifted from
* sfmmu_mlspl_enter() except there's no need to worry about p_szc increase
* since the page is locked and not free.
*
* Hash of the root page is used to find the lock.
* To find the root in the presense of hat_page_demote() chageing the location
* of the root this routine relies on the fact that hat_page_demote() changes
* root last.
*
* If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is
* returned pp's p_szc may be any value.
*/
kmutex_t *
page_szc_lock(page_t *pp)
{
kmutex_t *mtx;
page_t *rootpp;
uint_t szc;
uint_t rszc;
uint_t pszc = pp->p_szc;
ASSERT(pp != NULL);
ASSERT(PAGE_LOCKED(pp));
ASSERT(!PP_ISFREE(pp));
ASSERT(pp->p_vnode != NULL);
ASSERT(!IS_SWAPFSVP(pp->p_vnode));
ASSERT(pp->p_vnode != &kvp);
again:
if (pszc == 0) {
VM_STAT_ADD(pszclck_stat[0]);
return (NULL);
}
/* The lock lives in the root page */
rootpp = PP_GROUPLEADER(pp, pszc);
mtx = PAGE_SZC_MUTEX(rootpp);
mutex_enter(mtx);
/*
* since p_szc can only decrease if pp == rootpp
* rootpp will be always the same i.e we have the right root
* regardless of rootpp->p_szc.
* If location of pp's root didn't change after we took
* the lock we have the right root. return mutex hashed off it.
*/
if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) {
VM_STAT_ADD(pszclck_stat[1]);
return (mtx);
}
/*
* root location changed because page got demoted.
* locate the new root.
*/
if (rszc < pszc) {
szc = pp->p_szc;
ASSERT(szc < pszc);
mutex_exit(mtx);
pszc = szc;
VM_STAT_ADD(pszclck_stat[2]);
goto again;
}
VM_STAT_ADD(pszclck_stat[3]);
/*
* current hat_page_demote not done yet.
* wait for it to finish.
*/
mutex_exit(mtx);
rootpp = PP_GROUPLEADER(rootpp, rszc);
mtx = PAGE_SZC_MUTEX(rootpp);
mutex_enter(mtx);
mutex_exit(mtx);
ASSERT(rootpp->p_szc < rszc);
goto again;
}
int
page_szc_lock_assert(page_t *pp)
{
page_t *rootpp = PP_PAGEROOT(pp);
kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp);
return (MUTEX_HELD(mtx));
}