common/vm/seg_map.c

	seg_map.c revision dc32d872cbeb56532bcea030255db9cd79bac7da
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*  Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/*    All Rights Reserved   */

/*
 * Portions of this source code were derived from Berkeley 4.3 BSD
 * under license from the Regents of the University of California.
 */

/*
 * VM - generic vnode mapping segment.
 *
 * The segmap driver is used only by the kernel to get faster (than seg_vn)
 * mappings [lower routine overhead; more persistent cache] to random
 * vnode/offsets.  Note than the kernel may (and does) use seg_vn as well.
 */

#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/buf.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/mman.h>
#include <sys/errno.h>
#include <sys/cred.h>
#include <sys/kmem.h>
#include <sys/vtrace.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/thread.h>
#include <sys/dumphdr.h>
#include <sys/bitmap.h>
#include <sys/lgrp.h>

#include <vm/seg_kmem.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_kpm.h>
#include <vm/seg_map.h>
#include <vm/page.h>
#include <vm/pvn.h>
#include <vm/rm.h>

/*
 * Private seg op routines.
 */
static void segmap_free(struct seg *seg);
faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
            size_t len, enum fault_type type, enum seg_rw rw);
static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
static int  segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
            uint_t prot);
static int  segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);
static int  segmap_getprot(struct seg *seg, caddr_t addr, size_t len,
            uint_t *protv);
static u_offset_t   segmap_getoffset(struct seg *seg, caddr_t addr);
static int  segmap_gettype(struct seg *seg, caddr_t addr);
static int  segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
static void segmap_dump(struct seg *seg);
static int  segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
            struct page ***ppp, enum lock_type type,
            enum seg_rw rw);
static void segmap_badop(void);
static int  segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
static lgrp_mem_policy_info_t   *segmap_getpolicy(struct seg *seg,
    caddr_t addr);
static int  segmap_capable(struct seg *seg, segcapability_t capability);

/* segkpm support */
static caddr_t  segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t,
            struct smap *, enum seg_rw);
struct smap *get_smap_kpm(caddr_t, page_t **);

#define SEGMAP_BADOP(t) (t(*)())segmap_badop

static struct seg_ops segmap_ops = {
    SEGMAP_BADOP(int),  /* dup */
    SEGMAP_BADOP(int),  /* unmap */
    segmap_free,
    segmap_fault,
    segmap_faulta,
    SEGMAP_BADOP(int),  /* setprot */
    segmap_checkprot,
    segmap_kluster,
    SEGMAP_BADOP(size_t),   /* swapout */
    SEGMAP_BADOP(int),  /* sync */
    SEGMAP_BADOP(size_t),   /* incore */
    SEGMAP_BADOP(int),  /* lockop */
    segmap_getprot,
    segmap_getoffset,
    segmap_gettype,
    segmap_getvp,
    SEGMAP_BADOP(int),  /* advise */
    segmap_dump,
    segmap_pagelock,    /* pagelock */
    SEGMAP_BADOP(int),  /* setpgsz */
    segmap_getmemid,    /* getmemid */
    segmap_getpolicy,   /* getpolicy */
    segmap_capable,     /* capable */
    seg_inherit_notsup  /* inherit */
};

/*
 * Private segmap routines.
 */
static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
            size_t len, enum seg_rw rw, struct smap *smp);
static void segmap_smapadd(struct smap *smp);
static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,
            u_offset_t off, int hashid);
static void segmap_hashout(struct smap *smp);


/*
 * Statistics for segmap operations.
 *
 * No explicit locking to protect these stats.
 */
struct segmapcnt segmapcnt = {
    { "fault",      KSTAT_DATA_ULONG },
    { "faulta",     KSTAT_DATA_ULONG },
    { "getmap",     KSTAT_DATA_ULONG },
    { "get_use",        KSTAT_DATA_ULONG },
    { "get_reclaim",    KSTAT_DATA_ULONG },
    { "get_reuse",      KSTAT_DATA_ULONG },
    { "get_unused",     KSTAT_DATA_ULONG },
    { "get_nofree",     KSTAT_DATA_ULONG },
    { "rel_async",      KSTAT_DATA_ULONG },
    { "rel_write",      KSTAT_DATA_ULONG },
    { "rel_free",       KSTAT_DATA_ULONG },
    { "rel_abort",      KSTAT_DATA_ULONG },
    { "rel_dontneed",   KSTAT_DATA_ULONG },
    { "release",        KSTAT_DATA_ULONG },
    { "pagecreate",     KSTAT_DATA_ULONG },
    { "free_notfree",   KSTAT_DATA_ULONG },
    { "free_dirty",     KSTAT_DATA_ULONG },
    { "free",       KSTAT_DATA_ULONG },
    { "stolen",     KSTAT_DATA_ULONG },
    { "get_nomtx",      KSTAT_DATA_ULONG }
};

kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);

/*
 * Return number of map pages in segment.
 */
#define MAP_PAGES(seg)      ((seg)->s_size >> MAXBSHIFT)

/*
 * Translate addr into smap number within segment.
 */
#define MAP_PAGE(seg, addr)  (((addr) - (seg)->s_base) >> MAXBSHIFT)

/*
 * Translate addr in seg into struct smap pointer.
 */
#define GET_SMAP(seg, addr) \
    &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])

/*
 * Bit in map (16 bit bitmap).
 */
#define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))

static int smd_colormsk = 0;
static int smd_ncolor = 0;
static int smd_nfree = 0;
static int smd_freemsk = 0;
#ifdef DEBUG
static int *colors_used;
#endif
static struct smap *smd_smap;
static struct smaphash *smd_hash;
#ifdef SEGMAP_HASHSTATS
static unsigned int *smd_hash_len;
#endif
static struct smfree *smd_free;
static ulong_t smd_hashmsk = 0;

#define SEGMAP_MAXCOLOR     2
#define SEGMAP_CACHE_PAD    64

union segmap_cpu {
    struct {
        uint32_t    scpu_free_ndx[SEGMAP_MAXCOLOR];
        struct smap *scpu_last_smap;
        ulong_t     scpu_getmap;
        ulong_t     scpu_release;
        ulong_t     scpu_get_reclaim;
        ulong_t     scpu_fault;
        ulong_t     scpu_pagecreate;
        ulong_t     scpu_get_reuse;
    } scpu;
    char    scpu_pad[SEGMAP_CACHE_PAD];
};
static union segmap_cpu *smd_cpu;

/*
 * There are three locks in seg_map:
 *  - per freelist mutexes
 *  - per hashchain mutexes
 *  - per smap mutexes
 *
 * The lock ordering is to get the smap mutex to lock down the slot
 * first then the hash lock (for hash in/out (vp, off) list) or the
 * freelist lock to put the slot back on the free list.
 *
 * The hash search is done by only holding the hashchain lock, when a wanted
 * slot is found, we drop the hashchain lock then lock the slot so there
 * is no overlapping of hashchain and smap locks. After the slot is
 * locked, we verify again if the slot is still what we are looking
 * for.
 *
 * Allocation of a free slot is done by holding the freelist lock,
 * then locking the smap slot at the head of the freelist. This is
 * in reversed lock order so mutex_tryenter() is used.
 *
 * The smap lock protects all fields in smap structure except for
 * the link fields for hash/free lists which are protected by
 * hashchain and freelist locks.
 */

#define SHASHMTX(hashid)    (&smd_hash[hashid].sh_mtx)

#define SMP2SMF(smp)        (&smd_free[(smp - smd_smap) & smd_freemsk])
#define SMP2SMF_NDX(smp)    (ushort_t)((smp - smd_smap) & smd_freemsk)

#define SMAPMTX(smp) (&smp->sm_mtx)

#define SMAP_HASHFUNC(vp, off, hashid) \
    { \
    hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
        ((off) >> MAXBSHIFT)) & smd_hashmsk); \
    }

/*
 * The most frequently updated kstat counters are kept in the
 * per cpu array to avoid hot cache blocks. The update function
 * sums the cpu local counters to update the global counters.
 */

/* ARGSUSED */
int
segmap_kstat_update(kstat_t *ksp, int rw)
{
    int i;
    ulong_t getmap, release, get_reclaim;
    ulong_t fault, pagecreate, get_reuse;

    if (rw == KSTAT_WRITE)
        return (EACCES);
    getmap = release = get_reclaim = (ulong_t)0;
    fault = pagecreate = get_reuse = (ulong_t)0;
    for (i = 0; i < max_ncpus; i++) {
        getmap += smd_cpu[i].scpu.scpu_getmap;
        release  += smd_cpu[i].scpu.scpu_release;
        get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
        fault  += smd_cpu[i].scpu.scpu_fault;
        pagecreate  += smd_cpu[i].scpu.scpu_pagecreate;
        get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
    }
    segmapcnt.smp_getmap.value.ul = getmap;
    segmapcnt.smp_release.value.ul = release;
    segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
    segmapcnt.smp_fault.value.ul = fault;
    segmapcnt.smp_pagecreate.value.ul = pagecreate;
    segmapcnt.smp_get_reuse.value.ul = get_reuse;
    return (0);
}

int
segmap_create(struct seg *seg, void *argsp)
{
    struct segmap_data *smd;
    struct smap *smp;
    struct smfree *sm;
    struct segmap_crargs *a = (struct segmap_crargs *)argsp;
    struct smaphash *shashp;
    union segmap_cpu *scpu;
    long i, npages;
    size_t hashsz;
    uint_t nfreelist;
    extern void prefetch_smap_w(void *);
    extern int max_ncpus;

    ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));

    if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
        panic("segkmap not MAXBSIZE aligned");
        /*NOTREACHED*/
    }

    smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);

    seg->s_data = (void *)smd;
    seg->s_ops = &segmap_ops;
    smd->smd_prot = a->prot;

    /*
     * Scale the number of smap freelists to be
     * proportional to max_ncpus * number of virtual colors.
     * The caller can over-ride this scaling by providing
     * a non-zero a->nfreelist argument.
     */
    nfreelist = a->nfreelist;
    if (nfreelist == 0)
        nfreelist = max_ncpus;
    else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) {
        cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
        "%d, using %d", nfreelist, max_ncpus);
        nfreelist = max_ncpus;
    }
    if (!ISP2(nfreelist)) {
        /* round up nfreelist to the next power of two. */
        nfreelist = 1 << (highbit(nfreelist));
    }

    /*
     * Get the number of virtual colors - must be a power of 2.
     */
    if (a->shmsize)
        smd_ncolor = a->shmsize >> MAXBSHIFT;
    else
        smd_ncolor = 1;
    ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
    ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
    smd_colormsk = smd_ncolor - 1;
    smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
    smd_freemsk = smd_nfree - 1;

    /*
     * Allocate and initialize the freelist headers.
     * Note that sm_freeq[1] starts out as the release queue. This
     * is known when the smap structures are initialized below.
     */
    smd_free = smd->smd_free =
        kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
    for (i = 0; i < smd_nfree; i++) {
        sm = &smd->smd_free[i];
        mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
        sm->sm_allocq = &sm->sm_freeq[0];
        sm->sm_releq = &sm->sm_freeq[1];
    }

    /*
     * Allocate and initialize the smap hash chain headers.
     * Compute hash size rounding down to the next power of two.
     */
    npages = MAP_PAGES(seg);
    smd->smd_npages = npages;
    hashsz = npages / SMAP_HASHAVELEN;
    hashsz = 1 << (highbit(hashsz)-1);
    smd_hashmsk = hashsz - 1;
    smd_hash = smd->smd_hash =
        kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
#ifdef SEGMAP_HASHSTATS
    smd_hash_len =
        kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
#endif
    for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
        shashp->sh_hash_list = NULL;
        mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
    }

    /*
     * Allocate and initialize the smap structures.
     * Link all slots onto the appropriate freelist.
     * The smap array is large enough to affect boot time
     * on large systems, so use memory prefetching and only
     * go through the array 1 time. Inline a optimized version
     * of segmap_smapadd to add structures to freelists with
     * knowledge that no locks are needed here.
     */
    smd_smap = smd->smd_sm =
        kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);

    for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
        smp >= smd->smd_sm; smp--) {
        struct smap *smpfreelist;
        struct sm_freeq *releq;

        prefetch_smap_w((char *)smp);

        smp->sm_vp = NULL;
        smp->sm_hash = NULL;
        smp->sm_off = 0;
        smp->sm_bitmap = 0;
        smp->sm_refcnt = 0;
        mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
        smp->sm_free_ndx = SMP2SMF_NDX(smp);

        sm = SMP2SMF(smp);
        releq = sm->sm_releq;

        smpfreelist = releq->smq_free;
        if (smpfreelist == 0) {
            releq->smq_free = smp->sm_next = smp->sm_prev = smp;
        } else {
            smp->sm_next = smpfreelist;
            smp->sm_prev = smpfreelist->sm_prev;
            smpfreelist->sm_prev = smp;
            smp->sm_prev->sm_next = smp;
            releq->smq_free = smp->sm_next;
        }

        /*
         * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
         */
        smp->sm_flags = 0;

#ifdef  SEGKPM_SUPPORT
        /*
         * Due to the fragile prefetch loop no
         * separate function is used here.
         */
        smp->sm_kpme_next = NULL;
        smp->sm_kpme_prev = NULL;
        smp->sm_kpme_page = NULL;
#endif
    }

    /*
     * Allocate the per color indices that distribute allocation
     * requests over the free lists. Each cpu will have a private
     * rotor index to spread the allocations even across the available
     * smap freelists. Init the scpu_last_smap field to the first
     * smap element so there is no need to check for NULL.
     */
    smd_cpu =
        kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
    for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
        int j;
        for (j = 0; j < smd_ncolor; j++)
            scpu->scpu.scpu_free_ndx[j] = j;
        scpu->scpu.scpu_last_smap = smd_smap;
    }

    vpm_init();

#ifdef DEBUG
    /*
     * Keep track of which colors are used more often.
     */
    colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
#endif /* DEBUG */

    return (0);
}

static void
segmap_free(seg)
    struct seg *seg;
{
    ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
}

/*
 * Do a F_SOFTUNLOCK call over the range requested.
 * The range must have already been F_SOFTLOCK'ed.
 */
static void
segmap_unlock(
    struct hat *hat,
    struct seg *seg,
    caddr_t addr,
    size_t len,
    enum seg_rw rw,
    struct smap *smp)
{
    page_t *pp;
    caddr_t adr;
    u_offset_t off;
    struct vnode *vp;
    kmutex_t *smtx;

    ASSERT(smp->sm_refcnt > 0);

#ifdef lint
    seg = seg;
#endif

    if (segmap_kpm && IS_KPM_ADDR(addr)) {

        /*
         * We're called only from segmap_fault and this was a
         * NOP in case of a kpm based smap, so dangerous things
         * must have happened in the meantime. Pages are prefaulted
         * and locked in segmap_getmapflt and they will not be
         * unlocked until segmap_release.
         */
        panic("segmap_unlock: called with kpm addr %p", (void *)addr);
        /*NOTREACHED*/
    }

    vp = smp->sm_vp;
    off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET);

    hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
    for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
        ushort_t bitmask;

        /*
         * Use page_find() instead of page_lookup() to
         * find the page since we know that it has
         * "shared" lock.
         */
        pp = page_find(vp, off);
        if (pp == NULL) {
            panic("segmap_unlock: page not found");
            /*NOTREACHED*/
        }

        if (rw == S_WRITE) {
            hat_setrefmod(pp);
        } else if (rw != S_OTHER) {
            TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
            "segmap_fault:pp %p vp %p offset %llx", pp, vp, off);
            hat_setref(pp);
        }

        /*
         * Clear bitmap, if the bit corresponding to "off" is set,
         * since the page and translation are being unlocked.
         */
        bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);

        /*
         * Large Files: Following assertion is to verify
         * the correctness of the cast to (int) above.
         */
        ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
        smtx = SMAPMTX(smp);
        mutex_enter(smtx);
        if (smp->sm_bitmap & bitmask) {
            smp->sm_bitmap &= ~bitmask;
        }
        mutex_exit(smtx);

        page_unlock(pp);
    }
}

#define MAXPPB  (MAXBSIZE/4096) /* assumes minimum page size of 4k */

/*
 * This routine is called via a machine specific fault handling
 * routine.  It is also called by software routines wishing to
 * lock or unlock a range of addresses.
 *
 * Note that this routine expects a page-aligned "addr".
 */
faultcode_t
segmap_fault(
    struct hat *hat,
    struct seg *seg,
    caddr_t addr,
    size_t len,
    enum fault_type type,
    enum seg_rw rw)
{
    struct segmap_data *smd = (struct segmap_data *)seg->s_data;
    struct smap *smp;
    page_t *pp, **ppp;
    struct vnode *vp;
    u_offset_t off;
    page_t *pl[MAXPPB + 1];
    uint_t prot;
    u_offset_t addroff;
    caddr_t adr;
    int err;
    u_offset_t sm_off;
    int hat_flag;

    if (segmap_kpm && IS_KPM_ADDR(addr)) {
        int newpage;
        kmutex_t *smtx;

        /*
         * Pages are successfully prefaulted and locked in
         * segmap_getmapflt and can't be unlocked until
         * segmap_release. No hat mappings have to be locked
         * and they also can't be unlocked as long as the
         * caller owns an active kpm addr.
         */
#ifndef DEBUG
        if (type != F_SOFTUNLOCK)
            return (0);
#endif

        if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
            panic("segmap_fault: smap not found "
                "for addr %p", (void *)addr);
            /*NOTREACHED*/
        }

        smtx = SMAPMTX(smp);
#ifdef  DEBUG
        newpage = smp->sm_flags & SM_KPM_NEWPAGE;
        if (newpage) {
            cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
                (void *)smp);
        }

        if (type != F_SOFTUNLOCK) {
            mutex_exit(smtx);
            return (0);
        }
#endif
        mutex_exit(smtx);
        vp = smp->sm_vp;
        sm_off = smp->sm_off;

        if (vp == NULL)
            return (FC_MAKE_ERR(EIO));

        ASSERT(smp->sm_refcnt > 0);

        addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
        if (addroff + len > MAXBSIZE)
            panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
                (void *)(addr + len));

        off = sm_off + addroff;

        pp = page_find(vp, off);

        if (pp == NULL)
            panic("segmap_fault: softunlock page not found");

        /*
         * Set ref bit also here in case of S_OTHER to avoid the
         * overhead of supporting other cases than F_SOFTUNLOCK
         * with segkpm. We can do this because the underlying
         * pages are locked anyway.
         */
        if (rw == S_WRITE) {
            hat_setrefmod(pp);
        } else {
            TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
                "segmap_fault:pp %p vp %p offset %llx",
                pp, vp, off);
            hat_setref(pp);
        }

        return (0);
    }

    smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
    smp = GET_SMAP(seg, addr);
    vp = smp->sm_vp;
    sm_off = smp->sm_off;

    if (vp == NULL)
        return (FC_MAKE_ERR(EIO));

    ASSERT(smp->sm_refcnt > 0);

    addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
    if (addroff + len > MAXBSIZE) {
        panic("segmap_fault: endaddr %p "
            "exceeds MAXBSIZE chunk", (void *)(addr + len));
        /*NOTREACHED*/
    }
    off = sm_off + addroff;

    /*
     * First handle the easy stuff
     */
    if (type == F_SOFTUNLOCK) {
        segmap_unlock(hat, seg, addr, len, rw, smp);
        return (0);
    }

    TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
        "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
    err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
        seg, addr, rw, CRED(), NULL);

    if (err)
        return (FC_MAKE_ERR(err));

    prot &= smd->smd_prot;

    /*
     * Handle all pages returned in the pl[] array.
     * This loop is coded on the assumption that if
     * there was no error from the VOP_GETPAGE routine,
     * that the page list returned will contain all the
     * needed pages for the vp from [off..off + len].
     */
    ppp = pl;
    while ((pp = *ppp++) != NULL) {
        u_offset_t poff;
        ASSERT(pp->p_vnode == vp);
        hat_flag = HAT_LOAD;

        /*
         * Verify that the pages returned are within the range
         * of this segmap region.  Note that it is theoretically
         * possible for pages outside this range to be returned,
         * but it is not very likely.  If we cannot use the
         * page here, just release it and go on to the next one.
         */
        if (pp->p_offset < sm_off ||
            pp->p_offset >= sm_off + MAXBSIZE) {
            (void) page_release(pp, 1);
            continue;
        }

        ASSERT(hat == kas.a_hat);
        poff = pp->p_offset;
        adr = addr + (poff - off);
        if (adr >= addr && adr < addr + len) {
            hat_setref(pp);
            TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
                "segmap_fault:pp %p vp %p offset %llx",
                pp, vp, poff);
            if (type == F_SOFTLOCK)
                hat_flag = HAT_LOAD_LOCK;
        }

        /*
         * Deal with VMODSORT pages here. If we know this is a write
         * do the setmod now and allow write protection.
         * As long as it's modified or not S_OTHER, remove write
         * protection. With S_OTHER it's up to the FS to deal with this.
         */
        if (IS_VMODSORT(vp)) {
            if (rw == S_WRITE)
                hat_setmod(pp);
            else if (rw != S_OTHER && !hat_ismod(pp))
                prot &= ~PROT_WRITE;
        }

        hat_memload(hat, adr, pp, prot, hat_flag);
        if (hat_flag != HAT_LOAD_LOCK)
            page_unlock(pp);
    }
    return (0);
}

/*
 * This routine is used to start I/O on pages asynchronously.
 */
static faultcode_t
segmap_faulta(struct seg *seg, caddr_t addr)
{
    struct smap *smp;
    struct vnode *vp;
    u_offset_t off;
    int err;

    if (segmap_kpm && IS_KPM_ADDR(addr)) {
        int newpage;
        kmutex_t *smtx;

        /*
         * Pages are successfully prefaulted and locked in
         * segmap_getmapflt and can't be unlocked until
         * segmap_release. No hat mappings have to be locked
         * and they also can't be unlocked as long as the
         * caller owns an active kpm addr.
         */
#ifdef  DEBUG
        if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
            panic("segmap_faulta: smap not found "
                "for addr %p", (void *)addr);
            /*NOTREACHED*/
        }

        smtx = SMAPMTX(smp);
        newpage = smp->sm_flags & SM_KPM_NEWPAGE;
        mutex_exit(smtx);
        if (newpage)
            cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
                (void *)smp);
#endif
        return (0);
    }

    segmapcnt.smp_faulta.value.ul++;
    smp = GET_SMAP(seg, addr);

    ASSERT(smp->sm_refcnt > 0);

    vp = smp->sm_vp;
    off = smp->sm_off;

    if (vp == NULL) {
        cmn_err(CE_WARN, "segmap_faulta - no vp");
        return (FC_MAKE_ERR(EIO));
    }

    TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
        "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);

    err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
        & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
        seg, addr, S_READ, CRED(), NULL);

    if (err)
        return (FC_MAKE_ERR(err));
    return (0);
}

/*ARGSUSED*/
static int
segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
{
    struct segmap_data *smd = (struct segmap_data *)seg->s_data;

    ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));

    /*
     * Need not acquire the segment lock since
     * "smd_prot" is a read-only field.
     */
    return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
}

static int
segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
{
    struct segmap_data *smd = (struct segmap_data *)seg->s_data;
    size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;

    ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));

    if (pgno != 0) {
        do {
            protv[--pgno] = smd->smd_prot;
        } while (pgno != 0);
    }
    return (0);
}

static u_offset_t
segmap_getoffset(struct seg *seg, caddr_t addr)
{
    struct segmap_data *smd = (struct segmap_data *)seg->s_data;

    ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));

    return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base));
}

/*ARGSUSED*/
static int
segmap_gettype(struct seg *seg, caddr_t addr)
{
    ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));

    return (MAP_SHARED);
}

/*ARGSUSED*/
static int
segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
{
    struct segmap_data *smd = (struct segmap_data *)seg->s_data;

    ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));

    /* XXX - This doesn't make any sense */
    *vpp = smd->smd_sm->sm_vp;
    return (0);
}

/*
 * Check to see if it makes sense to do kluster/read ahead to
 * addr + delta relative to the mapping at addr.  We assume here
 * that delta is a signed PAGESIZE'd multiple (which can be negative).
 *
 * For segmap we always "approve" of this action from our standpoint.
 */
/*ARGSUSED*/
static int
segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
{
    return (0);
}

static void
segmap_badop()
{
    panic("segmap_badop");
    /*NOTREACHED*/
}

/*
 * Special private segmap operations
 */

/*
 * Add smap to the appropriate free list.
 */
static void
segmap_smapadd(struct smap *smp)
{
    struct smfree *sm;
    struct smap *smpfreelist;
    struct sm_freeq *releq;

    ASSERT(MUTEX_HELD(SMAPMTX(smp)));

    if (smp->sm_refcnt != 0) {
        panic("segmap_smapadd");
        /*NOTREACHED*/
    }

    sm = &smd_free[smp->sm_free_ndx];
    /*
     * Add to the tail of the release queue
     * Note that sm_releq and sm_allocq could toggle
     * before we get the lock. This does not affect
     * correctness as the 2 queues are only maintained
     * to reduce lock pressure.
     */
    releq = sm->sm_releq;
    if (releq == &sm->sm_freeq[0])
        smp->sm_flags |= SM_QNDX_ZERO;
    else
        smp->sm_flags &= ~SM_QNDX_ZERO;
    mutex_enter(&releq->smq_mtx);
    smpfreelist = releq->smq_free;
    if (smpfreelist == 0) {
        int want;

        releq->smq_free = smp->sm_next = smp->sm_prev = smp;
        /*
         * Both queue mutexes held to set sm_want;
         * snapshot the value before dropping releq mutex.
         * If sm_want appears after the releq mutex is dropped,
         * then the smap just freed is already gone.
         */
        want = sm->sm_want;
        mutex_exit(&releq->smq_mtx);
        /*
         * See if there was a waiter before dropping the releq mutex
         * then recheck after obtaining sm_freeq[0] mutex as
         * the another thread may have already signaled.
         */
        if (want) {
            mutex_enter(&sm->sm_freeq[0].smq_mtx);
            if (sm->sm_want)
                cv_signal(&sm->sm_free_cv);
            mutex_exit(&sm->sm_freeq[0].smq_mtx);
        }
    } else {
        smp->sm_next = smpfreelist;
        smp->sm_prev = smpfreelist->sm_prev;
        smpfreelist->sm_prev = smp;
        smp->sm_prev->sm_next = smp;
        mutex_exit(&releq->smq_mtx);
    }
}


static struct smap *
segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid)
{
    struct smap **hpp;
    struct smap *tmp;
    kmutex_t *hmtx;

    ASSERT(MUTEX_HELD(SMAPMTX(smp)));
    ASSERT(smp->sm_vp == NULL);
    ASSERT(smp->sm_hash == NULL);
    ASSERT(smp->sm_prev == NULL);
    ASSERT(smp->sm_next == NULL);
    ASSERT(hashid >= 0 && hashid <= smd_hashmsk);

    hmtx = SHASHMTX(hashid);

    mutex_enter(hmtx);
    /*
     * First we need to verify that no one has created a smp
     * with (vp,off) as its tag before we us.
     */
    for (tmp = smd_hash[hashid].sh_hash_list;
        tmp != NULL; tmp = tmp->sm_hash)
        if (tmp->sm_vp == vp && tmp->sm_off == off)
            break;

    if (tmp == NULL) {
        /*
         * No one created one yet.
         *
         * Funniness here - we don't increment the ref count on the
         * vnode * even though we have another pointer to it here.
         * The reason for this is that we don't want the fact that
         * a seg_map entry somewhere refers to a vnode to prevent the
         * vnode * itself from going away.  This is because this
         * reference to the vnode is a "soft one".  In the case where
         * a mapping is being used by a rdwr [or directory routine?]
         * there already has to be a non-zero ref count on the vnode.
         * In the case where the vp has been freed and the the smap
         * structure is on the free list, there are no pages in memory
         * that can refer to the vnode.  Thus even if we reuse the same
         * vnode/smap structure for a vnode which has the same
         * address but represents a different object, we are ok.
         */
        smp->sm_vp = vp;
        smp->sm_off = off;

        hpp = &smd_hash[hashid].sh_hash_list;
        smp->sm_hash = *hpp;
        *hpp = smp;
#ifdef SEGMAP_HASHSTATS
        smd_hash_len[hashid]++;
#endif
    }
    mutex_exit(hmtx);

    return (tmp);
}

static void
segmap_hashout(struct smap *smp)
{
    struct smap **hpp, *hp;
    struct vnode *vp;
    kmutex_t *mtx;
    int hashid;
    u_offset_t off;

    ASSERT(MUTEX_HELD(SMAPMTX(smp)));

    vp = smp->sm_vp;
    off = smp->sm_off;

    SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
    mtx = SHASHMTX(hashid);
    mutex_enter(mtx);

    hpp = &smd_hash[hashid].sh_hash_list;
    for (;;) {
        hp = *hpp;
        if (hp == NULL) {
            panic("segmap_hashout");
            /*NOTREACHED*/
        }
        if (hp == smp)
            break;
        hpp = &hp->sm_hash;
    }

    *hpp = smp->sm_hash;
    smp->sm_hash = NULL;
#ifdef SEGMAP_HASHSTATS
    smd_hash_len[hashid]--;
#endif
    mutex_exit(mtx);

    smp->sm_vp = NULL;
    smp->sm_off = (u_offset_t)0;

}

/*
 * Attempt to free unmodified, unmapped, and non locked segmap
 * pages.
 */
void
segmap_pagefree(struct vnode *vp, u_offset_t off)
{
    u_offset_t pgoff;
    page_t  *pp;

    for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {

        if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL)
            continue;

        switch (page_release(pp, 1)) {
        case PGREL_NOTREL:
            segmapcnt.smp_free_notfree.value.ul++;
            break;
        case PGREL_MOD:
            segmapcnt.smp_free_dirty.value.ul++;
            break;
        case PGREL_CLEAN:
            segmapcnt.smp_free.value.ul++;
            break;
        }
    }
}

/*
 * Locks held on entry: smap lock
 * Locks held on exit : smap lock.
 */

static void
grab_smp(struct smap *smp, page_t *pp)
{
    ASSERT(MUTEX_HELD(SMAPMTX(smp)));
    ASSERT(smp->sm_refcnt == 0);

    if (smp->sm_vp != (struct vnode *)NULL) {
        struct vnode    *vp = smp->sm_vp;
        u_offset_t  off = smp->sm_off;
        /*
         * Destroy old vnode association and
         * unload any hardware translations to
         * the old object.
         */
        smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
        segmap_hashout(smp);

        /*
         * This node is off freelist and hashlist,
         * so there is no reason to drop/reacquire sm_mtx
         * across calls to hat_unload.
         */
        if (segmap_kpm) {
            caddr_t vaddr;
            int hat_unload_needed = 0;

            /*
             * unload kpm mapping
             */
            if (pp != NULL) {
                vaddr = hat_kpm_page2va(pp, 1);
                hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
                page_unlock(pp);
            }

            /*
             * Check if we have (also) the rare case of a
             * non kpm mapping.
             */
            if (smp->sm_flags & SM_NOTKPM_RELEASED) {
                hat_unload_needed = 1;
                smp->sm_flags &= ~SM_NOTKPM_RELEASED;
            }

            if (hat_unload_needed) {
                hat_unload(kas.a_hat, segkmap->s_base +
                    ((smp - smd_smap) * MAXBSIZE),
                    MAXBSIZE, HAT_UNLOAD);
            }

        } else {
            ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
            smp->sm_flags &= ~SM_NOTKPM_RELEASED;
            hat_unload(kas.a_hat, segkmap->s_base +
                ((smp - smd_smap) * MAXBSIZE),
                MAXBSIZE, HAT_UNLOAD);
        }
        segmap_pagefree(vp, off);
    }
}

static struct smap *
get_free_smp(int free_ndx)
{
    struct smfree *sm;
    kmutex_t *smtx;
    struct smap *smp, *first;
    struct sm_freeq *allocq, *releq;
    struct kpme *kpme;
    page_t *pp = NULL;
    int end_ndx, page_locked = 0;

    end_ndx = free_ndx;
    sm = &smd_free[free_ndx];

retry_queue:
    allocq = sm->sm_allocq;
    mutex_enter(&allocq->smq_mtx);

    if ((smp = allocq->smq_free) == NULL) {

skip_queue:
        /*
         * The alloc list is empty or this queue is being skipped;
         * first see if the allocq toggled.
         */
        if (sm->sm_allocq != allocq) {
            /* queue changed */
            mutex_exit(&allocq->smq_mtx);
            goto retry_queue;
        }
        releq = sm->sm_releq;
        if (!mutex_tryenter(&releq->smq_mtx)) {
            /* cannot get releq; a free smp may be there now */
            mutex_exit(&allocq->smq_mtx);

            /*
             * This loop could spin forever if this thread has
             * higher priority than the thread that is holding
             * releq->smq_mtx. In order to force the other thread
             * to run, we'll lock/unlock the mutex which is safe
             * since we just unlocked the allocq mutex.
             */
            mutex_enter(&releq->smq_mtx);
            mutex_exit(&releq->smq_mtx);
            goto retry_queue;
        }
        if (releq->smq_free == NULL) {
            /*
             * This freelist is empty.
             * This should not happen unless clients
             * are failing to release the segmap
             * window after accessing the data.
             * Before resorting to sleeping, try
             * the next list of the same color.
             */
            free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
            if (free_ndx != end_ndx) {
                mutex_exit(&releq->smq_mtx);
                mutex_exit(&allocq->smq_mtx);
                sm = &smd_free[free_ndx];
                goto retry_queue;
            }
            /*
             * Tried all freelists of the same color once,
             * wait on this list and hope something gets freed.
             */
            segmapcnt.smp_get_nofree.value.ul++;
            sm->sm_want++;
            mutex_exit(&sm->sm_freeq[1].smq_mtx);
            cv_wait(&sm->sm_free_cv,
                &sm->sm_freeq[0].smq_mtx);
            sm->sm_want--;
            mutex_exit(&sm->sm_freeq[0].smq_mtx);
            sm = &smd_free[free_ndx];
            goto retry_queue;
        } else {
            /*
             * Something on the rele queue; flip the alloc
             * and rele queues and retry.
             */
            sm->sm_allocq = releq;
            sm->sm_releq = allocq;
            mutex_exit(&allocq->smq_mtx);
            mutex_exit(&releq->smq_mtx);
            if (page_locked) {
                delay(hz >> 2);
                page_locked = 0;
            }
            goto retry_queue;
        }
    } else {
        /*
         * Fastpath the case we get the smap mutex
         * on the first try.
         */
        first = smp;
next_smap:
        smtx = SMAPMTX(smp);
        if (!mutex_tryenter(smtx)) {
            /*
             * Another thread is trying to reclaim this slot.
             * Skip to the next queue or smap.
             */
            if ((smp = smp->sm_next) == first) {
                goto skip_queue;
            } else {
                goto next_smap;
            }
        } else {
            /*
             * if kpme exists, get shared lock on the page
             */
            if (segmap_kpm && smp->sm_vp != NULL) {

                kpme = GET_KPME(smp);
                pp = kpme->kpe_page;

                if (pp != NULL) {
                    if (!page_trylock(pp, SE_SHARED)) {
                        smp = smp->sm_next;
                        mutex_exit(smtx);
                        page_locked = 1;

                        pp = NULL;

                        if (smp == first) {
                            goto skip_queue;
                        } else {
                            goto next_smap;
                        }
                    } else {
                        if (kpme->kpe_page == NULL) {
                            page_unlock(pp);
                            pp = NULL;
                        }
                    }
                }
            }

            /*
             * At this point, we've selected smp.  Remove smp
             * from its freelist.  If smp is the first one in
             * the freelist, update the head of the freelist.
             */
            if (first == smp) {
                ASSERT(first == allocq->smq_free);
                allocq->smq_free = smp->sm_next;
            }

            /*
             * if the head of the freelist still points to smp,
             * then there are no more free smaps in that list.
             */
            if (allocq->smq_free == smp)
                /*
                 * Took the last one
                 */
                allocq->smq_free = NULL;
            else {
                smp->sm_prev->sm_next = smp->sm_next;
                smp->sm_next->sm_prev = smp->sm_prev;
            }
            mutex_exit(&allocq->smq_mtx);
            smp->sm_prev = smp->sm_next = NULL;

            /*
             * if pp != NULL, pp must have been locked;
             * grab_smp() unlocks pp.
             */
            ASSERT((pp == NULL) || PAGE_LOCKED(pp));
            grab_smp(smp, pp);
            /* return smp locked. */
            ASSERT(SMAPMTX(smp) == smtx);
            ASSERT(MUTEX_HELD(smtx));
            return (smp);
        }
    }
}

/*
 * Special public segmap operations
 */

/*
 * Create pages (without using VOP_GETPAGE) and load up translations to them.
 * If softlock is TRUE, then set things up so that it looks like a call
 * to segmap_fault with F_SOFTLOCK.
 *
 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
 *
 * All fields in the generic segment (struct seg) are considered to be
 * read-only for "segmap" even though the kernel address space (kas) may
 * not be locked, hence no lock is needed to access them.
 */
int
segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
{
    struct segmap_data *smd = (struct segmap_data *)seg->s_data;
    page_t *pp;
    u_offset_t off;
    struct smap *smp;
    struct vnode *vp;
    caddr_t eaddr;
    int newpage = 0;
    uint_t prot;
    kmutex_t *smtx;
    int hat_flag;

    ASSERT(seg->s_as == &kas);

    if (segmap_kpm && IS_KPM_ADDR(addr)) {
        /*
         * Pages are successfully prefaulted and locked in
         * segmap_getmapflt and can't be unlocked until
         * segmap_release. The SM_KPM_NEWPAGE flag is set
         * in segmap_pagecreate_kpm when new pages are created.
         * and it is returned as "newpage" indication here.
         */
        if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
            panic("segmap_pagecreate: smap not found "
                "for addr %p", (void *)addr);
            /*NOTREACHED*/
        }

        smtx = SMAPMTX(smp);
        newpage = smp->sm_flags & SM_KPM_NEWPAGE;
        smp->sm_flags &= ~SM_KPM_NEWPAGE;
        mutex_exit(smtx);

        return (newpage);
    }

    smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;

    eaddr = addr + len;
    addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);

    smp = GET_SMAP(seg, addr);

    /*
     * We don't grab smp mutex here since we assume the smp
     * has a refcnt set already which prevents the slot from
     * changing its id.
     */
    ASSERT(smp->sm_refcnt > 0);

    vp = smp->sm_vp;
    off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
    prot = smd->smd_prot;

    for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
        hat_flag = HAT_LOAD;
        pp = page_lookup(vp, off, SE_SHARED);
        if (pp == NULL) {
            ushort_t bitindex;

            if ((pp = page_create_va(vp, off,
                PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
                panic("segmap_pagecreate: page_create failed");
                /*NOTREACHED*/
            }
            newpage = 1;
            page_io_unlock(pp);

            /*
             * Since pages created here do not contain valid
             * data until the caller writes into them, the
             * "exclusive" lock will not be dropped to prevent
             * other users from accessing the page.  We also
             * have to lock the translation to prevent a fault
             * from occurring when the virtual address mapped by
             * this page is written into.  This is necessary to
             * avoid a deadlock since we haven't dropped the
             * "exclusive" lock.
             */
            bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);

            /*
             * Large Files: The following assertion is to
             * verify the cast above.
             */
            ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
            smtx = SMAPMTX(smp);
            mutex_enter(smtx);
            smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
            mutex_exit(smtx);

            hat_flag = HAT_LOAD_LOCK;
        } else if (softlock) {
            hat_flag = HAT_LOAD_LOCK;
        }

        if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
            hat_setmod(pp);

        hat_memload(kas.a_hat, addr, pp, prot, hat_flag);

        if (hat_flag != HAT_LOAD_LOCK)
            page_unlock(pp);

        TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE,
            "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
            seg, addr, pp, vp, off);
    }

    return (newpage);
}

void
segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
{
    struct smap *smp;
    ushort_t    bitmask;
    page_t      *pp;
    struct  vnode   *vp;
    u_offset_t  off;
    caddr_t     eaddr;
    kmutex_t    *smtx;

    ASSERT(seg->s_as == &kas);

    eaddr = addr + len;
    addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);

    if (segmap_kpm && IS_KPM_ADDR(addr)) {
        /*
         * Pages are successfully prefaulted and locked in
         * segmap_getmapflt and can't be unlocked until
         * segmap_release, so no pages or hat mappings have
         * to be unlocked at this point.
         */
#ifdef DEBUG
        if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
            panic("segmap_pageunlock: smap not found "
                "for addr %p", (void *)addr);
            /*NOTREACHED*/
        }

        ASSERT(smp->sm_refcnt > 0);
        mutex_exit(SMAPMTX(smp));
#endif
        return;
    }

    smp = GET_SMAP(seg, addr);
    smtx = SMAPMTX(smp);

    ASSERT(smp->sm_refcnt > 0);

    vp = smp->sm_vp;
    off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));

    for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
        bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);

        /*
         * Large Files: Following assertion is to verify
         * the correctness of the cast to (int) above.
         */
        ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);

        /*
         * If the bit corresponding to "off" is set,
         * clear this bit in the bitmap, unlock translations,
         * and release the "exclusive" lock on the page.
         */
        if (smp->sm_bitmap & bitmask) {
            mutex_enter(smtx);
            smp->sm_bitmap &= ~bitmask;
            mutex_exit(smtx);

            hat_unlock(kas.a_hat, addr, PAGESIZE);

            /*
             * Use page_find() instead of page_lookup() to
             * find the page since we know that it has
             * "exclusive" lock.
             */
            pp = page_find(vp, off);
            if (pp == NULL) {
                panic("segmap_pageunlock: page not found");
                /*NOTREACHED*/
            }
            if (rw == S_WRITE) {
                hat_setrefmod(pp);
            } else if (rw != S_OTHER) {
                hat_setref(pp);
            }

            page_unlock(pp);
        }
    }
}

caddr_t
segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off)
{
    return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
}

/*
 * This is the magic virtual address that offset 0 of an ELF
 * file gets mapped to in user space. This is used to pick
 * the vac color on the freelist.
 */
#define ELF_OFFZERO_VA  (0x10000)
/*
 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
 * The return address is  always MAXBSIZE aligned.
 *
 * If forcefault is nonzero and the MMU translations haven't yet been created,
 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
 */
caddr_t
segmap_getmapflt(
    struct seg *seg,
    struct vnode *vp,
    u_offset_t off,
    size_t len,
    int forcefault,
    enum seg_rw rw)
{
    struct smap *smp, *nsmp;
    extern struct vnode *common_specvp();
    caddr_t baseaddr;           /* MAXBSIZE aligned */
    u_offset_t baseoff;
    int newslot;
    caddr_t vaddr;
    int color, hashid;
    kmutex_t *hashmtx, *smapmtx;
    struct smfree *sm;
    page_t  *pp;
    struct kpme *kpme;
    uint_t  prot;
    caddr_t base;
    page_t  *pl[MAXPPB + 1];
    int error;
    int is_kpm = 1;

    ASSERT(seg->s_as == &kas);
    ASSERT(seg == segkmap);

    baseoff = off & (offset_t)MAXBMASK;
    if (off + len > baseoff + MAXBSIZE) {
        panic("segmap_getmap bad len");
        /*NOTREACHED*/
    }

    /*
     * If this is a block device we have to be sure to use the
     * "common" block device vnode for the mapping.
     */
    if (vp->v_type == VBLK)
        vp = common_specvp(vp);

    smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;

    if (segmap_kpm == 0 ||
        (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
        is_kpm = 0;
    }

    SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
    hashmtx = SHASHMTX(hashid);

retry_hash:
    mutex_enter(hashmtx);
    for (smp = smd_hash[hashid].sh_hash_list;
        smp != NULL; smp = smp->sm_hash)
        if (smp->sm_vp == vp && smp->sm_off == baseoff)
            break;
    mutex_exit(hashmtx);

vrfy_smp:
    if (smp != NULL) {

        ASSERT(vp->v_count != 0);

        /*
         * Get smap lock and recheck its tag. The hash lock
         * is dropped since the hash is based on (vp, off)
         * and (vp, off) won't change when we have smap mtx.
         */
        smapmtx = SMAPMTX(smp);
        mutex_enter(smapmtx);
        if (smp->sm_vp != vp || smp->sm_off != baseoff) {
            mutex_exit(smapmtx);
            goto retry_hash;
        }

        if (smp->sm_refcnt == 0) {

            smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;

            /*
             * Could still be on the free list. However, this
             * could also be an smp that is transitioning from
             * the free list when we have too much contention
             * for the smapmtx's. In this case, we have an
             * unlocked smp that is not on the free list any
             * longer, but still has a 0 refcnt.  The only way
             * to be sure is to check the freelist pointers.
             * Since we now have the smapmtx, we are guaranteed
             * that the (vp, off) won't change, so we are safe
             * to reclaim it.  get_free_smp() knows that this
             * can happen, and it will check the refcnt.
             */

            if ((smp->sm_next != NULL)) {
                struct sm_freeq *freeq;

                ASSERT(smp->sm_prev != NULL);
                sm = &smd_free[smp->sm_free_ndx];

                if (smp->sm_flags & SM_QNDX_ZERO)
                    freeq = &sm->sm_freeq[0];
                else
                    freeq = &sm->sm_freeq[1];

                mutex_enter(&freeq->smq_mtx);
                if (freeq->smq_free != smp) {
                    /*
                     * fastpath normal case
                     */
                    smp->sm_prev->sm_next = smp->sm_next;
                    smp->sm_next->sm_prev = smp->sm_prev;
                } else if (smp == smp->sm_next) {
                    /*
                     * Taking the last smap on freelist
                     */
                    freeq->smq_free = NULL;
                } else {
                    /*
                     * Reclaiming 1st smap on list
                     */
                    freeq->smq_free = smp->sm_next;
                    smp->sm_prev->sm_next = smp->sm_next;
                    smp->sm_next->sm_prev = smp->sm_prev;
                }
                mutex_exit(&freeq->smq_mtx);
                smp->sm_prev = smp->sm_next = NULL;
            } else {
                ASSERT(smp->sm_prev == NULL);
                segmapcnt.smp_stolen.value.ul++;
            }

        } else {
            segmapcnt.smp_get_use.value.ul++;
        }
        smp->sm_refcnt++;       /* another user */

        /*
         * We don't invoke segmap_fault via TLB miss, so we set ref
         * and mod bits in advance. For S_OTHER  we set them in
         * segmap_fault F_SOFTUNLOCK.
         */
        if (is_kpm) {
            if (rw == S_WRITE) {
                smp->sm_flags |= SM_WRITE_DATA;
            } else if (rw == S_READ) {
                smp->sm_flags |= SM_READ_DATA;
            }
        }
        mutex_exit(smapmtx);

        newslot = 0;
    } else {

        uint32_t free_ndx, *free_ndxp;
        union segmap_cpu *scpu;

        /*
         * On a PAC machine or a machine with anti-alias
         * hardware, smd_colormsk will be zero.
         *
         * On a VAC machine- pick color by offset in the file
         * so we won't get VAC conflicts on elf files.
         * On data files, color does not matter but we
         * don't know what kind of file it is so we always
         * pick color by offset. This causes color
         * corresponding to file offset zero to be used more
         * heavily.
         */
        color = (baseoff >> MAXBSHIFT) & smd_colormsk;
        scpu = smd_cpu+CPU->cpu_seqid;
        free_ndxp = &scpu->scpu.scpu_free_ndx[color];
        free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
#ifdef DEBUG
        colors_used[free_ndx]++;
#endif /* DEBUG */

        /*
         * Get a locked smp slot from the free list.
         */
        smp = get_free_smp(free_ndx);
        smapmtx = SMAPMTX(smp);

        ASSERT(smp->sm_vp == NULL);

        if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
            /*
             * Failed to hashin, there exists one now.
             * Return the smp we just allocated.
             */
            segmap_smapadd(smp);
            mutex_exit(smapmtx);

            smp = nsmp;
            goto vrfy_smp;
        }
        smp->sm_refcnt++;       /* another user */

        /*
         * We don't invoke segmap_fault via TLB miss, so we set ref
         * and mod bits in advance. For S_OTHER  we set them in
         * segmap_fault F_SOFTUNLOCK.
         */
        if (is_kpm) {
            if (rw == S_WRITE) {
                smp->sm_flags |= SM_WRITE_DATA;
            } else if (rw == S_READ) {
                smp->sm_flags |= SM_READ_DATA;
            }
        }
        mutex_exit(smapmtx);

        newslot = 1;
    }

    if (!is_kpm)
        goto use_segmap_range;

    /*
     * Use segkpm
     */
    /* Lint directive required until 6746211 is fixed */
    /*CONSTCOND*/
    ASSERT(PAGESIZE == MAXBSIZE);

    /*
     * remember the last smp faulted on this cpu.
     */
    (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;

    if (forcefault == SM_PAGECREATE) {
        baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
        return (baseaddr);
    }

    if (newslot == 0 &&
        (pp = GET_KPME(smp)->kpe_page) != NULL) {

        /* fastpath */
        switch (rw) {
        case S_READ:
        case S_WRITE:
            if (page_trylock(pp, SE_SHARED)) {
                if (PP_ISFREE(pp) ||
                    !(pp->p_vnode == vp &&
                    pp->p_offset == baseoff)) {
                    page_unlock(pp);
                    pp = page_lookup(vp, baseoff,
                        SE_SHARED);
                }
            } else {
                pp = page_lookup(vp, baseoff, SE_SHARED);
            }

            if (pp == NULL) {
                ASSERT(GET_KPME(smp)->kpe_page == NULL);
                break;
            }

            if (rw == S_WRITE &&
                hat_page_getattr(pp, P_MOD | P_REF) !=
                (P_MOD | P_REF)) {
                page_unlock(pp);
                break;
            }

            /*
             * We have the p_selock as reader, grab_smp
             * can't hit us, we have bumped the smap
             * refcnt and hat_pageunload needs the
             * p_selock exclusive.
             */
            kpme = GET_KPME(smp);
            if (kpme->kpe_page == pp) {
                baseaddr = hat_kpm_page2va(pp, 0);
            } else if (kpme->kpe_page == NULL) {
                baseaddr = hat_kpm_mapin(pp, kpme);
            } else {
                panic("segmap_getmapflt: stale "
                    "kpme page, kpme %p", (void *)kpme);
                /*NOTREACHED*/
            }

            /*
             * We don't invoke segmap_fault via TLB miss,
             * so we set ref and mod bits in advance.
             * For S_OTHER and we set them in segmap_fault
             * F_SOFTUNLOCK.
             */
            if (rw == S_READ && !hat_isref(pp))
                hat_setref(pp);

            return (baseaddr);
        default:
            break;
        }
    }

    base = segkpm_create_va(baseoff);
    error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
        seg, base, rw, CRED(), NULL);

    pp = pl[0];
    if (error || pp == NULL) {
        /*
         * Use segmap address slot and let segmap_fault deal
         * with the error cases. There is no error return
         * possible here.
         */
        goto use_segmap_range;
    }

    ASSERT(pl[1] == NULL);

    /*
     * When prot is not returned w/ PROT_ALL the returned pages
     * are not backed by fs blocks. For most of the segmap users
     * this is no problem, they don't write to the pages in the
     * same request and therefore don't rely on a following
     * trap driven segmap_fault. With SM_LOCKPROTO users it
     * is more secure to use segkmap adresses to allow
     * protection segmap_fault's.
     */
    if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
        /*
         * Use segmap address slot and let segmap_fault
         * do the error return.
         */
        ASSERT(rw != S_WRITE);
        ASSERT(PAGE_LOCKED(pp));
        page_unlock(pp);
        forcefault = 0;
        goto use_segmap_range;
    }

    /*
     * We have the p_selock as reader, grab_smp can't hit us, we
     * have bumped the smap refcnt and hat_pageunload needs the
     * p_selock exclusive.
     */
    kpme = GET_KPME(smp);
    if (kpme->kpe_page == pp) {
        baseaddr = hat_kpm_page2va(pp, 0);
    } else if (kpme->kpe_page == NULL) {
        baseaddr = hat_kpm_mapin(pp, kpme);
    } else {
        panic("segmap_getmapflt: stale kpme page after "
            "VOP_GETPAGE, kpme %p", (void *)kpme);
        /*NOTREACHED*/
    }

    smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;

    return (baseaddr);


use_segmap_range:
    baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
    TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP,
        "segmap_getmap:seg %p addr %p vp %p offset %llx",
        seg, baseaddr, vp, baseoff);

    /*
     * Prefault the translations
     */
    vaddr = baseaddr + (off - baseoff);
    if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {

        caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
            (uintptr_t)PAGEMASK);

        (void) segmap_fault(kas.a_hat, seg, pgaddr,
            (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
            F_INVAL, rw);
    }

    return (baseaddr);
}

int
segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
{
    struct smap *smp;
    int         error;
    int     bflags = 0;
    struct vnode    *vp;
    u_offset_t  offset;
    kmutex_t    *smtx;
    int     is_kpm = 0;
    page_t      *pp;

    if (segmap_kpm && IS_KPM_ADDR(addr)) {

        if (((uintptr_t)addr & MAXBOFFSET) != 0) {
            panic("segmap_release: addr %p not "
                "MAXBSIZE aligned", (void *)addr);
            /*NOTREACHED*/
        }

        if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
            panic("segmap_release: smap not found "
                "for addr %p", (void *)addr);
            /*NOTREACHED*/
        }

        TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
            "segmap_relmap:seg %p addr %p smp %p",
            seg, addr, smp);

        smtx = SMAPMTX(smp);

        /*
         * For compatibility reasons segmap_pagecreate_kpm sets this
         * flag to allow a following segmap_pagecreate to return
         * this as "newpage" flag. When segmap_pagecreate is not
         * called at all we clear it now.
         */
        smp->sm_flags &= ~SM_KPM_NEWPAGE;
        is_kpm = 1;
        if (smp->sm_flags & SM_WRITE_DATA) {
            hat_setrefmod(pp);
        } else if (smp->sm_flags & SM_READ_DATA) {
            hat_setref(pp);
        }
    } else {
        if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
            ((uintptr_t)addr & MAXBOFFSET) != 0) {
            panic("segmap_release: bad addr %p", (void *)addr);
            /*NOTREACHED*/
        }
        smp = GET_SMAP(seg, addr);

        TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
            "segmap_relmap:seg %p addr %p smp %p",
            seg, addr, smp);

        smtx = SMAPMTX(smp);
        mutex_enter(smtx);
        smp->sm_flags |= SM_NOTKPM_RELEASED;
    }

    ASSERT(smp->sm_refcnt > 0);

    /*
     * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
     * are set.
     */
    if ((flags & ~SM_DONTNEED) != 0) {
        if (flags & SM_WRITE)
            segmapcnt.smp_rel_write.value.ul++;
        if (flags & SM_ASYNC) {
            bflags |= B_ASYNC;
            segmapcnt.smp_rel_async.value.ul++;
        }
        if (flags & SM_INVAL) {
            bflags |= B_INVAL;
            segmapcnt.smp_rel_abort.value.ul++;
        }
        if (flags & SM_DESTROY) {
            bflags |= (B_INVAL|B_TRUNC);
            segmapcnt.smp_rel_abort.value.ul++;
        }
        if (smp->sm_refcnt == 1) {
            /*
             * We only bother doing the FREE and DONTNEED flags
             * if no one else is still referencing this mapping.
             */
            if (flags & SM_FREE) {
                bflags |= B_FREE;
                segmapcnt.smp_rel_free.value.ul++;
            }
            if (flags & SM_DONTNEED) {
                bflags |= B_DONTNEED;
                segmapcnt.smp_rel_dontneed.value.ul++;
            }
        }
    } else {
        smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
    }

    vp = smp->sm_vp;
    offset = smp->sm_off;

    if (--smp->sm_refcnt == 0) {

        smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);

        if (flags & (SM_INVAL|SM_DESTROY)) {
            segmap_hashout(smp);    /* remove map info */
            if (is_kpm) {
                hat_kpm_mapout(pp, GET_KPME(smp), addr);
                if (smp->sm_flags & SM_NOTKPM_RELEASED) {
                    smp->sm_flags &= ~SM_NOTKPM_RELEASED;
                    hat_unload(kas.a_hat, segkmap->s_base +
                        ((smp - smd_smap) * MAXBSIZE),
                        MAXBSIZE, HAT_UNLOAD);
                }

            } else {
                if (segmap_kpm)
                    segkpm_mapout_validkpme(GET_KPME(smp));

                smp->sm_flags &= ~SM_NOTKPM_RELEASED;
                hat_unload(kas.a_hat, addr, MAXBSIZE,
                    HAT_UNLOAD);
            }
        }
        segmap_smapadd(smp);    /* add to free list */
    }

    mutex_exit(smtx);

    if (is_kpm)
        page_unlock(pp);
    /*
     * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
     * are set.
     */
    if ((flags & ~SM_DONTNEED) != 0) {
        error = VOP_PUTPAGE(vp, offset, MAXBSIZE,
            bflags, CRED(), NULL);
    } else {
        error = 0;
    }

    return (error);
}

/*
 * Dump the pages belonging to this segmap segment.
 */
static void
segmap_dump(struct seg *seg)
{
    struct segmap_data *smd;
    struct smap *smp, *smp_end;
    page_t *pp;
    pfn_t pfn;
    u_offset_t off;
    caddr_t addr;

    smd = (struct segmap_data *)seg->s_data;
    addr = seg->s_base;
    for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
        smp < smp_end; smp++) {

        if (smp->sm_refcnt) {
            for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
                int we_own_it = 0;

                /*
                 * If pp == NULL, the page either does
                 * not exist or is exclusively locked.
                 * So determine if it exists before
                 * searching for it.
                 */
                if ((pp = page_lookup_nowait(smp->sm_vp,
                    smp->sm_off + off, SE_SHARED)))
                    we_own_it = 1;
                else
                    pp = page_exists(smp->sm_vp,
                        smp->sm_off + off);

                if (pp) {
                    pfn = page_pptonum(pp);
                    dump_addpage(seg->s_as,
                        addr + off, pfn);
                    if (we_own_it)
                        page_unlock(pp);
                }
                dump_timeleft = dump_timeout;
            }
        }
        addr += MAXBSIZE;
    }
}

/*ARGSUSED*/
static int
segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
    struct page ***ppp, enum lock_type type, enum seg_rw rw)
{
    return (ENOTSUP);
}

static int
segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
{
    struct segmap_data *smd = (struct segmap_data *)seg->s_data;

    memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
    memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
    return (0);
}

/*ARGSUSED*/
static lgrp_mem_policy_info_t *
segmap_getpolicy(struct seg *seg, caddr_t addr)
{
    return (NULL);
}

/*ARGSUSED*/
static int
segmap_capable(struct seg *seg, segcapability_t capability)
{
    return (0);
}


#ifdef  SEGKPM_SUPPORT

/*
 * segkpm support routines
 */

static caddr_t
segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
    struct smap *smp, enum seg_rw rw)
{
    caddr_t base;
    page_t  *pp;
    int newpage = 0;
    struct kpme *kpme;

    ASSERT(smp->sm_refcnt > 0);

    if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
        kmutex_t *smtx;

        base = segkpm_create_va(off);

        if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
            seg, base)) == NULL) {
            panic("segmap_pagecreate_kpm: "
                "page_create failed");
            /*NOTREACHED*/
        }

        newpage = 1;
        page_io_unlock(pp);
        ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);

        /*
         * Mark this here until the following segmap_pagecreate
         * or segmap_release.
         */
        smtx = SMAPMTX(smp);
        mutex_enter(smtx);
        smp->sm_flags |= SM_KPM_NEWPAGE;
        mutex_exit(smtx);
    }

    kpme = GET_KPME(smp);
    if (!newpage && kpme->kpe_page == pp)
        base = hat_kpm_page2va(pp, 0);
    else
        base = hat_kpm_mapin(pp, kpme);

    /*
     * FS code may decide not to call segmap_pagecreate and we
     * don't invoke segmap_fault via TLB miss, so we have to set
     * ref and mod bits in advance.
     */
    if (rw == S_WRITE) {
        hat_setrefmod(pp);
    } else {
        ASSERT(rw == S_READ);
        hat_setref(pp);
    }

    smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;

    return (base);
}

/*
 * Find the smap structure corresponding to the
 * KPM addr and return it locked.
 */
struct smap *
get_smap_kpm(caddr_t addr, page_t **ppp)
{
    struct smap *smp;
    struct vnode    *vp;
    u_offset_t  offset;
    caddr_t     baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
    int     hashid;
    kmutex_t    *hashmtx;
    page_t      *pp;
    union segmap_cpu *scpu;

    pp = hat_kpm_vaddr2page(baseaddr);

    ASSERT(pp && !PP_ISFREE(pp));
    ASSERT(PAGE_LOCKED(pp));
    ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);

    vp = pp->p_vnode;
    offset = pp->p_offset;
    ASSERT(vp != NULL);

    /*
     * Assume the last smap used on this cpu is the one needed.
     */
    scpu = smd_cpu+CPU->cpu_seqid;
    smp = scpu->scpu.scpu_last_smap;
    mutex_enter(&smp->sm_mtx);
    if (smp->sm_vp == vp && smp->sm_off == offset) {
        ASSERT(smp->sm_refcnt > 0);
    } else {
        /*
         * Assumption wrong, find the smap on the hash chain.
         */
        mutex_exit(&smp->sm_mtx);
        SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
        hashmtx = SHASHMTX(hashid);

        mutex_enter(hashmtx);
        smp = smd_hash[hashid].sh_hash_list;
        for (; smp != NULL; smp = smp->sm_hash) {
            if (smp->sm_vp == vp && smp->sm_off == offset)
                break;
        }
        mutex_exit(hashmtx);
        if (smp) {
            mutex_enter(&smp->sm_mtx);
            ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
        }
    }

    if (ppp)
        *ppp = smp ? pp : NULL;

    return (smp);
}

#else   /* SEGKPM_SUPPORT */

/* segkpm stubs */

/*ARGSUSED*/
static caddr_t
segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
    struct smap *smp, enum seg_rw rw)
{
    return (NULL);
}

/*ARGSUSED*/
struct smap *
get_smap_kpm(caddr_t addr, page_t **ppp)
{
    return (NULL);
}

#endif  /* SEGKPM_SUPPORT */