common/os/mutex.c

	mutex.c revision 75d94465dbafa487b716482dc36d5150a4ec9853
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * Big Theory Statement for mutual exclusion locking primitives.
 *
 * A mutex serializes multiple threads so that only one thread
 * (the "owner" of the mutex) is active at a time.  See mutex(9F)
 * for a full description of the interfaces and programming model.
 * The rest of this comment describes the implementation.
 *
 * Mutexes come in two flavors: adaptive and spin.  mutex_init(9F)
 * determines the type based solely on the iblock cookie (PIL) argument.
 * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive.
 *
 * Spin mutexes block interrupts and spin until the lock becomes available.
 * A thread may not sleep, or call any function that might sleep, while
 * holding a spin mutex.  With few exceptions, spin mutexes should only
 * be used to synchronize with interrupt handlers.
 *
 * Adaptive mutexes (the default type) spin if the owner is running on
 * another CPU and block otherwise.  This policy is based on the assumption
 * that mutex hold times are typically short enough that the time spent
 * spinning is less than the time it takes to block.  If you need mutual
 * exclusion semantics with long hold times, consider an rwlock(9F) as
 * RW_WRITER.  Better still, reconsider the algorithm: if it requires
 * mutual exclusion for long periods of time, it's probably not scalable.
 *
 * Adaptive mutexes are overwhelmingly more common than spin mutexes,
 * so mutex_enter() assumes that the lock is adaptive.  We get away
 * with this by structuring mutexes so that an attempt to acquire a
 * spin mutex as adaptive always fails.  When mutex_enter() fails
 * it punts to mutex_vector_enter(), which does all the hard stuff.
 *
 * mutex_vector_enter() first checks the type.  If it's spin mutex,
 * we just call lock_set_spl() and return.  If it's an adaptive mutex,
 * we check to see what the owner is doing.  If the owner is running,
 * we spin until the lock becomes available; if not, we mark the lock
 * as having waiters and block.
 *
 * Blocking on a mutex is surprisingly delicate dance because, for speed,
 * mutex_exit() doesn't use an atomic instruction.  Thus we have to work
 * a little harder in the (rarely-executed) blocking path to make sure
 * we don't block on a mutex that's just been released -- otherwise we
 * might never be woken up.
 *
 * The logic for synchronizing mutex_vector_enter() with mutex_exit()
 * in the face of preemption and relaxed memory ordering is as follows:
 *
 * (1) Preemption in the middle of mutex_exit() must cause mutex_exit()
 *     to restart.  Each platform must enforce this by checking the
 *     interrupted PC in the interrupt handler (or on return from trap --
 *     whichever is more convenient for the platform).  If the PC
 *     lies within the critical region of mutex_exit(), the interrupt
 *     handler must reset the PC back to the beginning of mutex_exit().
 *     The critical region consists of all instructions up to, but not
 *     including, the store that clears the lock (which, of course,
 *     must never be executed twice.)
 *
 *     This ensures that the owner will always check for waiters after
 *     resuming from a previous preemption.
 *
 * (2) A thread resuming in mutex_exit() does (at least) the following:
 *
 *  when resuming:  set CPU_THREAD = owner
 *          membar #StoreLoad
 *
 *  in mutex_exit:  check waiters bit; do wakeup if set
 *          membar #LoadStore|#StoreStore
 *          clear owner
 *          (at this point, other threads may or may not grab
 *          the lock, and we may or may not reacquire it)
 *
 *  when blocking:  membar #StoreStore (due to disp_lock_enter())
 *          set CPU_THREAD = (possibly) someone else
 *
 * (3) A thread blocking in mutex_vector_enter() does the following:
 *
 *          set waiters bit
 *          membar #StoreLoad (via membar_enter())
 *          check CPU_THREAD for owner's t_cpu
 *              continue if owner running
 *          membar #LoadLoad (via membar_consumer())
 *          check owner and waiters bit; abort if either changed
 *          block
 *
 * Thus the global memory orderings for (2) and (3) are as follows:
 *
 * (2M) mutex_exit() memory order:
 *
 *          STORE   CPU_THREAD = owner
 *          LOAD    waiters bit
 *          STORE   owner = NULL
 *          STORE   CPU_THREAD = (possibly) someone else
 *
 * (3M) mutex_vector_enter() memory order:
 *
 *          STORE   waiters bit = 1
 *          LOAD    CPU_THREAD for each CPU
 *          LOAD    owner and waiters bit
 *
 * It has been verified by exhaustive simulation that all possible global
 * memory orderings of (2M) interleaved with (3M) result in correct
 * behavior.  Moreover, these ordering constraints are minimal: changing
 * the ordering of anything in (2M) or (3M) breaks the algorithm, creating
 * windows for missed wakeups.  Note: the possibility that other threads
 * may grab the lock after the owner drops it can be factored out of the
 * memory ordering analysis because mutex_vector_enter() won't block
 * if the lock isn't still owned by the same thread.
 *
 * The only requirements of code outside the mutex implementation are
 * (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
 * (2) a membar #StoreLoad after setting CPU_THREAD in resume(),
 * (3) mutex_owner_running() preemption fixup in interrupt handlers
 * or trap returns.
 * Note: idle threads cannot grab adaptive locks (since they cannot block),
 * so the membar may be safely omitted when resuming an idle thread.
 *
 * When a mutex has waiters, mutex_vector_exit() has several options:
 *
 * (1) Choose a waiter and make that thread the owner before waking it;
 *     this is known as "direct handoff" of ownership.
 *
 * (2) Drop the lock and wake one waiter.
 *
 * (3) Drop the lock, clear the waiters bit, and wake all waiters.
 *
 * In many ways (1) is the cleanest solution, but if a lock is moderately
 * contended it defeats the adaptive spin logic.  If we make some other
 * thread the owner, but he's not ONPROC yet, then all other threads on
 * other cpus that try to get the lock will conclude that the owner is
 * blocked, so they'll block too.  And so on -- it escalates quickly,
 * with every thread taking the blocking path rather than the spin path.
 * Thus, direct handoff is *not* a good idea for adaptive mutexes.
 *
 * Option (2) is the next most natural-seeming option, but it has several
 * annoying properties.  If there's more than one waiter, we must preserve
 * the waiters bit on an unheld lock.  On cas-capable platforms, where
 * the waiters bit is part of the lock word, this means that both 0x0
 * and 0x1 represent unheld locks, so we have to cas against *both*.
 * Priority inheritance also gets more complicated, because a lock can
 * have waiters but no owner to whom priority can be willed.  So while
 * it is possible to make option (2) work, it's surprisingly vile.
 *
 * Option (3), the least-intuitive at first glance, is what we actually do.
 * It has the advantage that because you always wake all waiters, you
 * never have to preserve the waiters bit.  Waking all waiters seems like
 * begging for a thundering herd problem, but consider: under option (2),
 * every thread that grabs and drops the lock will wake one waiter -- so
 * if the lock is fairly active, all waiters will be awakened very quickly
 * anyway.  Moreover, this is how adaptive locks are *supposed* to work.
 * The blocking case is rare; the more common case (by 3-4 orders of
 * magnitude) is that one or more threads spin waiting to get the lock.
 * Only direct handoff can prevent the thundering herd problem, but as
 * mentioned earlier, that would tend to defeat the adaptive spin logic.
 * In practice, option (3) works well because the blocking case is rare.
 */

/*
 * delayed lock retry with exponential delay for spin locks
 *
 * It is noted above that for both the spin locks and the adaptive locks,
 * spinning is the dominate mode of operation.  So long as there is only
 * one thread waiting on a lock, the naive spin loop works very well in
 * cache based architectures.  The lock data structure is pulled into the
 * cache of the processor with the waiting/spinning thread and no further
 * memory traffic is generated until the lock is released.  Unfortunately,
 * once two or more threads are waiting on a lock, the naive spin has
 * the property of generating maximum memory traffic from each spinning
 * thread as the spinning threads contend for the lock data structure.
 *
 * By executing a delay loop before retrying a lock, a waiting thread
 * can reduce its memory traffic by a large factor, depending on the
 * size of the delay loop.  A large delay loop greatly reduced the memory
 * traffic, but has the drawback of having a period of time when
 * no thread is attempting to gain the lock even though several threads
 * might be waiting.  A small delay loop has the drawback of not
 * much reduction in memory traffic, but reduces the potential idle time.
 * The theory of the exponential delay code is to start with a short
 * delay loop and double the waiting time on each iteration, up to
 * a preselected maximum.
 */

#include <sys/param.h>
#include <sys/time.h>
#include <sys/cpuvar.h>
#include <sys/thread.h>
#include <sys/debug.h>
#include <sys/cmn_err.h>
#include <sys/sobject.h>
#include <sys/turnstile.h>
#include <sys/systm.h>
#include <sys/mutex_impl.h>
#include <sys/spl.h>
#include <sys/lockstat.h>
#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/stack.h>
#include <sys/archsystm.h>
#include <sys/machsystm.h>
#include <sys/x_call.h>

/*
 * The sobj_ops vector exports a set of functions needed when a thread
 * is asleep on a synchronization object of this type.
 */
static sobj_ops_t mutex_sobj_ops = {
    SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri
};

/*
 * If the system panics on a mutex, save the address of the offending
 * mutex in panic_mutex_addr, and save the contents in panic_mutex.
 */
static mutex_impl_t panic_mutex;
static mutex_impl_t *panic_mutex_addr;

static void
mutex_panic(char *msg, mutex_impl_t *lp)
{
    if (panicstr)
        return;

    if (atomic_cas_ptr(&panic_mutex_addr, NULL, lp) == NULL)
        panic_mutex = *lp;

    panic("%s, lp=%p owner=%p thread=%p",
        msg, (void *)lp, (void *)MUTEX_OWNER(&panic_mutex),
        (void *)curthread);
}

/* "tunables" for per-platform backoff constants. */
uint_t mutex_backoff_cap = 0;
ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE;
ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR;
uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT;

void
mutex_sync(void)
{
    MUTEX_SYNC();
}

/* calculate the backoff interval */
uint_t
default_lock_backoff(uint_t backoff)
{
    uint_t cap;     /* backoff cap calculated */

    if (backoff == 0) {
        backoff = mutex_backoff_base;
        /* first call just sets the base */
        return (backoff);
    }

    /* set cap */
    if (mutex_backoff_cap == 0) {
        /*
         * For a contended lock, in the worst case a load + cas may
         * be queued  at the controller for each contending CPU.
         * Therefore, to avoid queueing, the accesses for all CPUS must
         * be spread out in time over an interval of (ncpu *
         * cap-factor).  Maximum backoff is set to this value, and
         * actual backoff is a random number from 0 to the current max.
         */
        cap = ncpus_online * mutex_cap_factor;
    } else {
        cap = mutex_backoff_cap;
    }

    /* calculate new backoff value */
    backoff <<= mutex_backoff_shift;    /* increase backoff */
    if (backoff > cap) {
        if (cap < mutex_backoff_base)
            backoff = mutex_backoff_base;
        else
            backoff = cap;
    }

    return (backoff);
}

/*
 * default delay function for mutexes.
 */
void
default_lock_delay(uint_t backoff)
{
    ulong_t rnd;        /* random factor */
    uint_t cur_backoff; /* calculated backoff */
    uint_t backctr;

    /*
     * Modify backoff by a random amount to avoid lockstep, and to
     * make it probable that some thread gets a small backoff, and
     * re-checks quickly
     */
    rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK());
    cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) +
        mutex_backoff_base;

    /*
     * Delay before trying
     * to touch the mutex data structure.
     */
    for (backctr = cur_backoff; backctr; backctr--) {
        MUTEX_DELAY();
    };
}

uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff;
void (*mutex_lock_delay)(uint_t) = default_lock_delay;
void (*mutex_delay)(void) = mutex_delay_default;

/*
 * mutex_vector_enter() is called from the assembly mutex_enter() routine
 * if the lock is held or is not of type MUTEX_ADAPTIVE.
 */
void
mutex_vector_enter(mutex_impl_t *lp)
{
    kthread_id_t    owner;
    kthread_id_t    lastowner = MUTEX_NO_OWNER; /* track owner changes */
    hrtime_t    sleep_time = 0; /* how long we slept */
    hrtime_t    spin_time = 0;  /* how long we spun */
    cpu_t       *cpup;
    turnstile_t *ts;
    volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
    uint_t      backoff = 0;    /* current backoff */
    int     changecnt = 0;  /* count of owner changes */

    ASSERT_STACK_ALIGNED();

    if (MUTEX_TYPE_SPIN(lp)) {
        lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
            &lp->m_spin.m_oldspl);
        return;
    }

    if (!MUTEX_TYPE_ADAPTIVE(lp)) {
        mutex_panic("mutex_enter: bad mutex", lp);
        return;
    }

    /*
     * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
     * We can migrate after loading CPU but before checking CPU_ON_INTR,
     * so we must verify by disabling preemption and loading CPU again.
     */
    cpup = CPU;
    if (CPU_ON_INTR(cpup) && !panicstr) {
        kpreempt_disable();
        if (CPU_ON_INTR(CPU))
            mutex_panic("mutex_enter: adaptive at high PIL", lp);
        kpreempt_enable();
    }

    CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);

    spin_time = LOCKSTAT_START_TIME(LS_MUTEX_ENTER_SPIN);

    backoff = mutex_lock_backoff(0);    /* set base backoff */
    for (;;) {
        mutex_lock_delay(backoff); /* backoff delay */

        if (panicstr)
            return;

        if ((owner = MUTEX_OWNER(vlp)) == NULL) {
            if (mutex_adaptive_tryenter(lp)) {
                break;
            }
            /* increase backoff only on failed attempt. */
            backoff = mutex_lock_backoff(backoff);
            changecnt++;
            continue;
        } else if (lastowner != owner) {
            lastowner = owner;
            backoff = mutex_lock_backoff(backoff);
            changecnt++;
        }

        if (changecnt >= ncpus_online) {
            backoff = mutex_lock_backoff(0);
            changecnt = 0;
        }

        if (owner == curthread)
            mutex_panic("recursive mutex_enter", lp);

        /*
         * If lock is held but owner is not yet set, spin.
         * (Only relevant for platforms that don't have cas.)
         */
        if (owner == MUTEX_NO_OWNER)
            continue;

        if (mutex_owner_running(lp) != NULL)  {
            continue;
        }

        /*
         * The owner appears not to be running, so block.
         * See the Big Theory Statement for memory ordering issues.
         */
        ts = turnstile_lookup(lp);
        MUTEX_SET_WAITERS(lp);
        membar_enter();

        /*
         * Recheck whether owner is running after waiters bit hits
         * global visibility (above).  If owner is running, spin.
         */
        if (mutex_owner_running(lp) != NULL) {
            turnstile_exit(lp);
            continue;
        }
        membar_consumer();

        /*
         * If owner and waiters bit are unchanged, block.
         */
        if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
            sleep_time -= gethrtime();
            (void) turnstile_block(ts, TS_WRITER_Q, lp,
                &mutex_sobj_ops, NULL, NULL);
            sleep_time += gethrtime();
            /* reset backoff after turnstile */
            backoff = mutex_lock_backoff(0);
        } else {
            turnstile_exit(lp);
        }
    }

    ASSERT(MUTEX_OWNER(lp) == curthread);

    if (sleep_time != 0) {
        /*
         * Note, sleep time is the sum of all the sleeping we
         * did.
         */
        LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
    }

    /* record spin time, don't count sleep time */
    if (spin_time != 0) {
        LOCKSTAT_RECORD_TIME(LS_MUTEX_ENTER_SPIN, lp,
            spin_time + sleep_time);
    }

    LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
}

/*
 * mutex_vector_tryenter() is called from the assembly mutex_tryenter()
 * routine if the lock is held or is not of type MUTEX_ADAPTIVE.
 */
int
mutex_vector_tryenter(mutex_impl_t *lp)
{
    int s;

    if (MUTEX_TYPE_ADAPTIVE(lp))
        return (0);     /* we already tried in assembly */

    if (!MUTEX_TYPE_SPIN(lp)) {
        mutex_panic("mutex_tryenter: bad mutex", lp);
        return (0);
    }

    s = splr(lp->m_spin.m_minspl);
    if (lock_try(&lp->m_spin.m_spinlock)) {
        lp->m_spin.m_oldspl = (ushort_t)s;
        return (1);
    }
    splx(s);
    return (0);
}

/*
 * mutex_vector_exit() is called from mutex_exit() if the lock is not
 * adaptive, has waiters, or is not owned by the current thread (panic).
 */
void
mutex_vector_exit(mutex_impl_t *lp)
{
    turnstile_t *ts;

    if (MUTEX_TYPE_SPIN(lp)) {
        lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl);
        return;
    }

    if (MUTEX_OWNER(lp) != curthread) {
        mutex_panic("mutex_exit: not owner", lp);
        return;
    }

    ts = turnstile_lookup(lp);
    MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
    if (ts == NULL)
        turnstile_exit(lp);
    else
        turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
    LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp);
}

int
mutex_owned(const kmutex_t *mp)
{
    const mutex_impl_t *lp = (const mutex_impl_t *)mp;

    if (panicstr || quiesce_active)
        return (1);

    if (MUTEX_TYPE_ADAPTIVE(lp))
        return (MUTEX_OWNER(lp) == curthread);
    return (LOCK_HELD(&lp->m_spin.m_spinlock));
}

kthread_t *
mutex_owner(const kmutex_t *mp)
{
    const mutex_impl_t *lp = (const mutex_impl_t *)mp;
    kthread_id_t t;

    if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER)
        return (t);
    return (NULL);
}

/*
 * The iblock cookie 'ibc' is the spl level associated with the lock;
 * this alone determines whether the lock will be ADAPTIVE or SPIN.
 *
 * Adaptive mutexes created in zeroed memory do not need to call
 * mutex_init() as their allocation in this fashion guarantees
 * their initialization.
 *   eg adaptive mutexes created as static within the BSS or allocated
 *      by kmem_zalloc().
 */
/* ARGSUSED */
void
mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
{
    mutex_impl_t *lp = (mutex_impl_t *)mp;

    ASSERT(ibc < (void *)KERNELBASE);   /* see 1215173 */

    if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) {
        ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT);
        MUTEX_SET_TYPE(lp, MUTEX_SPIN);
        LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock);
        LOCK_INIT_HELD(&lp->m_spin.m_dummylock);
        lp->m_spin.m_minspl = (int)(intptr_t)ibc;
    } else {
#ifdef MUTEX_ALIGN
        static int misalign_cnt = 0;

        if (((uintptr_t)lp & (uintptr_t)(MUTEX_ALIGN - 1)) &&
            (misalign_cnt < MUTEX_ALIGN_WARNINGS)) {
            /*
             * The mutex is not aligned and may cross a cache line.
             * This is not supported and may cause a panic.
             * Show a warning that the mutex is not aligned
             * and attempt to identify the origin.
             * Unaligned mutexes are not (supposed to be)
             * possible on SPARC.
             */
            char *funcname;
            ulong_t offset = 0;

            funcname = modgetsymname((uintptr_t)caller(), &offset);
            cmn_err(CE_WARN, "mutex_init: %p is not %d byte "
                "aligned; caller %s+%lx in module %s. "
                "This is unsupported and may cause a panic. "
                "Please report this to the kernel module supplier.",
                (void *)lp, MUTEX_ALIGN,
                funcname ? funcname : "unknown", offset,
                mod_containing_pc(caller()));
            misalign_cnt++;
            if (misalign_cnt >= MUTEX_ALIGN_WARNINGS) {
                cmn_err(CE_WARN, "mutex_init: further unaligned"
                    " mutex warnings will be suppressed.");
            }
        }
#endif  /* MUTEX_ALIGN */
        ASSERT(type != MUTEX_SPIN);

        MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE);
        MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
    }
}

void
mutex_destroy(kmutex_t *mp)
{
    mutex_impl_t *lp = (mutex_impl_t *)mp;

    if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) {
        MUTEX_DESTROY(lp);
    } else if (MUTEX_TYPE_SPIN(lp)) {
        LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
        MUTEX_DESTROY(lp);
    } else if (MUTEX_TYPE_ADAPTIVE(lp)) {
        LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
        if (MUTEX_OWNER(lp) != curthread)
            mutex_panic("mutex_destroy: not owner", lp);
        if (MUTEX_HAS_WAITERS(lp)) {
            turnstile_t *ts = turnstile_lookup(lp);
            turnstile_exit(lp);
            if (ts != NULL)
                mutex_panic("mutex_destroy: has waiters", lp);
        }
        MUTEX_DESTROY(lp);
    } else {
        mutex_panic("mutex_destroy: bad mutex", lp);
    }
}

/*
 * Simple C support for the cases where spin locks miss on the first try.
 */
void
lock_set_spin(lock_t *lp)
{
    int loop_count = 0;
    uint_t backoff = 0; /* current backoff */
    hrtime_t spin_time = 0; /* how long we spun */

    if (panicstr)
        return;

    if (ncpus == 1)
        panic("lock_set: %p lock held and only one CPU", (void *)lp);

    spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPIN);

    while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
        if (panicstr)
            return;
        loop_count++;

        if (ncpus_online == loop_count) {
            backoff = mutex_lock_backoff(0);
            loop_count = 0;
        } else {
            backoff = mutex_lock_backoff(backoff);
        }
        mutex_lock_delay(backoff);
    }

    LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPIN, lp, spin_time);

    LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
}

void
lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
{
    int loop_count = 0;
    uint_t backoff = 0; /* current backoff */
    hrtime_t spin_time = 0; /* how long we spun */

    if (panicstr)
        return;

    if (ncpus == 1)
        panic("lock_set_spl: %p lock held and only one CPU",
            (void *)lp);

    ASSERT(new_pil > LOCK_LEVEL);

    spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPL_SPIN);

    do {
        splx(old_pil);
        while (LOCK_HELD(lp)) {
            loop_count++;

            if (panicstr) {
                *old_pil_addr = (ushort_t)splr(new_pil);
                return;
            }
            if (ncpus_online == loop_count) {
                backoff = mutex_lock_backoff(0);
                loop_count = 0;
            } else {
                backoff = mutex_lock_backoff(backoff);
            }
            mutex_lock_delay(backoff);
        }
        old_pil = splr(new_pil);
    } while (!lock_spin_try(lp));

    *old_pil_addr = (ushort_t)old_pil;

    LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPL_SPIN, lp, spin_time);

    LOCKSTAT_RECORD0(LS_LOCK_SET_SPL_ACQUIRE, lp);
}