rwlock.c revision d4204c85a44d2589b9afff2c81db7044e97f2d1d
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * CDDL HEADER START
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * The contents of this file are subject to the terms of the
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Common Development and Distribution License (the "License").
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * You may not use this file except in compliance with the License.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * See the License for the specific language governing permissions
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * and limitations under the License.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * When distributing Covered Code, include this CDDL HEADER in each
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * If applicable, add the following below this CDDL HEADER, with the
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * fields enclosed by brackets "[]" replaced with your own identifying
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * information: Portions Copyright [yyyy] [name of copyright owner]
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * CDDL HEADER END
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Use is subject to license terms.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw#pragma ident "%Z%%M% %I% %E% SMI"
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw#define NLOCKS 4 /* initial number of readlock_t structs allocated */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * We must be deferring signals for this to be safe.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Else if we are returning an entry with ul_rdlockcnt == 0,
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * it could be reassigned behind our back in a signal handler.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw /* we must be deferring signals */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * No entry available. Allocate more space, converting the single
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * readlock_t entry into an array of readlock_t entries if necessary.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Initial allocation of the readlock_t array.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * Convert the single entry into an array.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * The single readlock_t becomes the first entry in the array.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Return the next available entry in the array.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * Reallocate the array, double the size each time.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * Return the next available entry in the newly allocated array.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * Free the array of rwlocks held for reading.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * Check if a reader version of the lock is held by the current thread.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * rw_read_is_held() is private to libc.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * The lock is held for reading by some thread.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * Search our array of rwlocks held for reading for a match.
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp * Check if a writer version of the lock is held by the current thread.
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp * rw_write_is_held() is private to libc.
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp/* ARGSUSED2 */
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp * Once reinitialized, we can no longer be holding a read or write lock.
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp * We can do nothing about other threads that are holding read locks.
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp return (0);
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp * Once destroyed, we can no longer be holding a read or write lock.
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp * We can do nothing about other threads that are holding read locks.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp return (0);
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp * Attempt to acquire a readers lock. Return true on success.
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
bda89588bd7667394a834e8a9a34612cce2ae9c3jp if (atomic_cas_32(rwstate, readers, readers + 1) == readers) {
bda89588bd7667394a834e8a9a34612cce2ae9c3jp return (1);
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp return (0);
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp * Attempt to release a reader lock. Return true on success.
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
f7b4b2fefbe31d31fbe1e6a4b494a8fbed3f49b1jp if (atomic_cas_32(rwstate, readers, readers - 1) == readers) {
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw return (1);
bda89588bd7667394a834e8a9a34612cce2ae9c3jp return (0);
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * Attempt to acquire a writer lock. Return true on success.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
bda89588bd7667394a834e8a9a34612cce2ae9c3jp if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED)
bda89588bd7667394a834e8a9a34612cce2ae9c3jp return (1);
bda89588bd7667394a834e8a9a34612cce2ae9c3jp return (0);
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * Attempt to release a writer lock. Return true on success.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
bda89588bd7667394a834e8a9a34612cce2ae9c3jp return (1);
bda89588bd7667394a834e8a9a34612cce2ae9c3jp return (0);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Wake up thread(s) sleeping on the rwlock queue and then
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * drop the queue lock. Return non-zero if we wake up someone.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * This is called when a thread releases a lock that appears to have waiters.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
bda89588bd7667394a834e8a9a34612cce2ae9c3jp return (0);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Examine the queue of waiters in priority order and prepare
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * to wake up as many readers as we encounter before encountering
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * a writer. If the highest priority thread on the queue is a
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * writer, stop there and wake it up.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * We keep track of lwpids that are to be unparked in lwpid[].
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * __lwp_unpark_all() is called to unpark all of them after
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * they have been removed from the sleep queue and the sleep
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * queue lock has been dropped. If we run out of space in our
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * on-stack buffer, we need to allocate more but we can't call
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * lmalloc() because we are holding a queue lock when the overflow
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * occurs and lmalloc() acquires a lock. We can't use alloca()
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * either because the application may have allocated a small
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * stack and we don't want to overrun the stack. So we call
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * alloc_lwpids() to allocate a bigger buffer using the mmap()
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * system call directly since that path acquires no locks.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp /* one writer to wake */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw /* at least one reader to wake */
bda89588bd7667394a834e8a9a34612cce2ae9c3jp if (nlwpid == 0) {
bda89588bd7667394a834e8a9a34612cce2ae9c3jp (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t));
bda89588bd7667394a834e8a9a34612cce2ae9c3jp return (nlwpid != 0);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * released, and if they need to sleep will release the mutex first. In the
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * event of a spurious wakeup, these will return EAGAIN (because it is much
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * easier for us to re-acquire the mutex here).
c5c4113dfcabb1eed3d4bdf7609de5170027a794nwshared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
bda89588bd7667394a834e8a9a34612cce2ae9c3jp volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * The calls to __lwp_rwlock_*() below will release the mutex,
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * so we need a dtrace probe here.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * The waiters bit may be inaccurate.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Only the kernel knows for sure.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * and trywrlock for process-private (USYNC_THREAD) rwlocks.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw /* initial attempt to acquire the lock fails if there are waiters */
d15447b6c777a1b2223924443bf36c9c8efb2ea4jp while (error == 0) {
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw /* subsequent attempts do not fail due to waiters */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw continue; /* no queued waiters, try again */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Do a priority check on the queued waiter (the
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * highest priority thread on the queue) to see
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * if we should defer to him or just grab the lock.
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * We defer to a queued thread that has
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * a higher priority than ours.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw continue; /* try again */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * We defer to a queued thread that has
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * a higher priority than ours or that
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * is a writer whose priority equals ours.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw continue; /* try again */
bda89588bd7667394a834e8a9a34612cce2ae9c3jp * We are about to block.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * If we're doing a trylock, return EBUSY instead.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Enqueue writers ahead of readers.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * If we already hold a readers lock on this rwlock,
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * just increment our reference count and return.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * If we hold the writer lock, bail out.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw "calling thread owns the writer lock");
bda89588bd7667394a834e8a9a34612cce2ae9c3jp else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
bda89588bd7667394a834e8a9a34612cce2ae9c3jp else /* user-level */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw if (error == 0) {
bda89588bd7667394a834e8a9a34612cce2ae9c3jp DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
cd37da7426f0c49c14ad9a8a07638ca971477566nw_pthread_rwlock_reltimedrdlock_np(rwlock_t *rwlp, const timespec_t *reltime)
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw#pragma weak pthread_rwlock_timedrdlock = _pthread_rwlock_timedrdlock
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw_pthread_rwlock_timedrdlock(rwlock_t *rwlp, const timespec_t *abstime)
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * If we hold a readers lock on this rwlock, bail out.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw "calling thread owns the readers lock");
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * If we hold the writer lock, bail out.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw "calling thread owns the writer lock");
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw else /* user-level */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw if (error == 0) {
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw_pthread_rwlock_reltimedwrlock_np(rwlock_t *rwlp, const timespec_t *reltime)
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw#pragma weak pthread_rwlock_timedwrlock = _pthread_rwlock_timedwrlock
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw_pthread_rwlock_timedwrlock(rwlock_t *rwlp, const timespec_t *abstime)
bda89588bd7667394a834e8a9a34612cce2ae9c3jp ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * If we already hold a readers lock on this rwlock,
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * just increment our reference count and return.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw else /* user-level */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw if (error == 0) {
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw else /* user-level */
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw if (error == 0) {
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Since the writer lock is held, we'd better be
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * holding it, else we cannot legitimately be here.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw "writer lock held, "
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw "but not by the calling thread");
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw } else if (readers > 0) {
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * A readers lock is held; if we don't hold one, bail out.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw "readers lock held, "
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw "but not by the calling thread");
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * If we hold more than one readers lock on this rwlock,
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * just decrement our reference count and return.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * This is a usage error.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * No thread should release an unowned lock.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw /* EMPTY */;
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw } else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) {
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw /* EMPTY */;
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * Yield to the thread we just waked up, just in case we might
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * be about to grab the rwlock again immediately upon return.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * This is pretty weak but it helps on a uniprocessor and also
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * when cpu affinity has assigned both ourself and the other
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * thread to the same CPU. Note that lwp_yield() will yield
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * the processor only if the writer is at the same or higher
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * priority than ourself. This provides more balanced program
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * behavior; it doesn't guarantee acquisition of the lock by
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw * the pending writer.
c5c4113dfcabb1eed3d4bdf7609de5170027a794nw return (0);