/*
* Copyright (C) 1998-2001, 2003-2005, 2007, 2009, 2011, 2012, 2015-2017 Internet Systems Consortium, Inc. ("ISC")
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
/* $Id$ */
/*! \file */
#include <config.h>
#include <stddef.h>
#include <isc/platform.h>
#ifdef ISC_PLATFORM_USETHREADS
#ifndef RWLOCK_DEFAULT_READ_QUOTA
#endif
#ifndef RWLOCK_DEFAULT_WRITE_QUOTA
#endif
#ifndef RWLOCK_MAX_ADAPTIVE_COUNT
#endif
#if defined(ISC_RWLOCK_USEATOMIC)
static isc_result_t
#endif
#ifdef ISC_RWLOCK_TRACE
static void
#if defined(ISC_PLATFORM_HAVEXADD) && defined(ISC_PLATFORM_HAVECMPXCHG)
"rwlock %p thread %lu %s(%s): "
"write_requests=%u, write_completions=%u, "
"cnt_and_flag=0x%x, readers_waiting=%u, "
"write_granted=%u, write_quota=%u\n"),
(type == isc_rwlocktype_read ?
ISC_MSG_READ, "read") :
ISC_MSG_WRITE, "write")),
#else
"rwlock %p thread %lu %s(%s): %s, %u active, "
"%u granted, %u rwaiting, %u wwaiting\n"),
(type == isc_rwlocktype_read ?
ISC_MSG_READ, "read") :
ISC_MSG_WRITE, "write")),
ISC_MSG_READING, "reading") :
ISC_MSG_WRITING, "writing")),
#endif
}
#endif /* ISC_RWLOCK_TRACE */
unsigned int write_quota)
{
/*
* In case there's trouble initializing, we zero magic now. If all
* goes well, we'll set it to RWLOCK_MAGIC.
*/
#if defined(ISC_RWLOCK_USEATOMIC)
rwl->write_requests = 0;
rwl->write_completions = 0;
rwl->cnt_and_flag = 0;
rwl->readers_waiting = 0;
rwl->write_granted = 0;
if (read_quota != 0) {
"read quota is not supported");
}
if (write_quota == 0)
#else
rwl->readers_waiting = 0;
rwl->writers_waiting = 0;
if (read_quota == 0)
if (write_quota == 0)
#endif
if (result != ISC_R_SUCCESS)
return (result);
if (result != ISC_R_SUCCESS) {
"isc_condition_init(readable) %s: %s",
ISC_MSG_FAILED, "failed"),
goto destroy_lock;
}
if (result != ISC_R_SUCCESS) {
"isc_condition_init(writeable) %s: %s",
ISC_MSG_FAILED, "failed"),
goto destroy_rcond;
}
return (ISC_R_SUCCESS);
return (result);
}
void
#if defined(ISC_RWLOCK_USEATOMIC)
#else
rwl->readers_waiting == 0 &&
rwl->writers_waiting == 0);
#endif
}
#if defined(ISC_RWLOCK_USEATOMIC)
/*
* When some architecture-dependent atomic operations are available,
* rwlock can be more efficient than the generic algorithm defined below.
* The basic algorithm is described in the following URL:
*
* The key is to use the following integer variables modified atomically:
* write_requests, write_completions, and cnt_and_flag.
*
* write_requests and write_completions act as a waiting queue for writers
* in order to ensure the FIFO order. Both variables begin with the initial
* value of 0. When a new writer tries to get a write lock, it increments
* write_requests and gets the previous value of the variable as a "ticket".
* When write_completions reaches the ticket number, the new writer can start
* writing. When the writer completes its work, it increments
* write_completions so that another new writer can start working. If the
* write_requests is not equal to write_completions, it means a writer is now
* working or waiting. In this case, a new readers cannot start reading, or
* in other words, this algorithm basically prefers writers.
*
* cnt_and_flag is a "lock" shared by all readers and writers. This integer
* variable is a kind of structure with two members: writer_flag (1 bit) and
* reader_count (31 bits). The writer_flag shows whether a writer is working,
* and the reader_count shows the number of readers currently working or almost
* ready for working. A writer who has the current "ticket" tries to get the
* lock by exclusively setting the writer_flag to 1, provided that the whole
* 32-bit is 0 (meaning no readers or writers working). On the other hand,
* a new reader tries to increment the "reader_count" field provided that
* the writer_flag is 0 (meaning there is no writer working).
*
* If some of the above operations fail, the reader or the writer sleeps
* until the related condition changes. When a working reader or writer
* completes its work, some readers or writers are sleeping, and the condition
* that suspended the reader or writer has changed, it wakes up the sleeping
* readers or writers.
*
* As already noted, this algorithm basically prefers writers. In order to
* prevent readers from starving, however, the algorithm also introduces the
* "writer quota" (Q). When Q consecutive writers have completed their work,
* suspending readers, the last writer will wake up the readers, even if a new
* writer is waiting.
*
* Implementation specific note: due to the combination of atomic operations
* and a mutex lock, ordering between the atomic operation and locks can be
* very sensitive in some cases. In particular, it is generally very important
* to check the atomic variable that requires a reader or writer to sleep after
* locking the mutex and before actually sleeping; otherwise, it could be very
* likely to cause a deadlock. For example, assume "var" is a variable
* atomically modified, then the corresponding code would be:
* if (var == need_sleep) {
* LOCK(lock);
* if (var == need_sleep)
* WAIT(cond, lock);
* UNLOCK(lock);
* }
* The second check is important, since "var" is protected by the atomic
* operation, not by the mutex, and can be changed just before sleeping.
* (The first "if" could be omitted, but this is also important in order to
* make the code efficient by avoiding the use of the mutex unless it is
* really necessary.)
*/
static isc_result_t
#ifdef ISC_RWLOCK_TRACE
#endif
if (type == isc_rwlocktype_read) {
/* there is a waiting or active writer */
rwl->readers_waiting++;
rwl->readers_waiting--;
}
}
#if defined(ISC_RWLOCK_USESTDATOMIC)
#else
#endif
while (1) {
break;
/* A writer is still working */
rwl->readers_waiting++;
rwl->readers_waiting--;
/*
* Typically, the reader should be able to get a lock
* at this stage:
* (1) there should have been no pending writer when
* the reader was trying to increment the
* counter; otherwise, the writer should be in
* the waiting queue, preventing the reader from
* proceeding to this point.
* (2) once the reader increments the counter, no
* more writer can get a lock.
* Still, it is possible another writer can work at
* this point, e.g. in the following scenario:
* A previous writer unlocks the writer lock.
* This reader proceeds to point (1).
* A new writer appears, and gets a new lock before
* the reader increments the counter.
* The reader then increments the counter.
* The previous writer notices there is a waiting
* reader who is almost ready, and wakes it up.
* So, the reader needs to confirm whether it can now
* read explicitly (thus we loop). Note that this is
* not an infinite process, since the reader has
* incremented the counter at this point.
*/
}
/*
* If we are temporarily preferred to writers due to the writer
* quota, reset the condition (race among readers doesn't
* matter).
*/
rwl->write_granted = 0;
} else {
/* enter the waiting queue, and wait for our turn */
#if defined(ISC_RWLOCK_USESTDATOMIC)
#else
#endif
continue;
}
break;
}
while (1) {
#if defined(ISC_RWLOCK_USESTDATOMIC)
#else
#endif
if (cntflag2 == 0)
break;
/* Another active reader or writer is working. */
if (rwl->cnt_and_flag != 0)
}
rwl->write_granted++;
}
#ifdef ISC_RWLOCK_TRACE
#endif
return (ISC_R_SUCCESS);
}
if (max_cnt > RWLOCK_MAX_ADAPTIVE_COUNT)
do {
break;
}
#ifdef ISC_PLATFORM_BUSYWAITNOP
#endif
return (result);
}
#ifdef ISC_RWLOCK_TRACE
#endif
if (type == isc_rwlocktype_read) {
/* If a writer is waiting or working, we fail. */
return (ISC_R_LOCKBUSY);
/* Otherwise, be ready for reading. */
#if defined(ISC_RWLOCK_USESTDATOMIC)
#else
#endif
if ((cntflag & WRITER_ACTIVE) != 0) {
/*
* A writer is working. We lose, and cancel the read
* request.
*/
#if defined(ISC_RWLOCK_USESTDATOMIC)
#else
-READER_INCR);
#endif
/*
* If no other readers are waiting and we've suspended
* new writers in this short period, wake them up.
*/
if (cntflag == READER_INCR &&
}
return (ISC_R_LOCKBUSY);
}
} else {
/* Try locking without entering the waiting queue. */
#if defined(ISC_RWLOCK_USESTDATOMIC)
return (ISC_R_LOCKBUSY);
#else
if (cntflag != 0)
return (ISC_R_LOCKBUSY);
#endif
/*
* XXXJT: jump into the queue, possibly breaking the writer
* order.
*/
#if defined(ISC_RWLOCK_USESTDATOMIC)
#else
#endif
rwl->write_granted++;
}
#ifdef ISC_RWLOCK_TRACE
#endif
return (ISC_R_SUCCESS);
}
#if defined(ISC_RWLOCK_USESTDATOMIC)
{
/* Try to acquire write access. */
/*
* There must have been no writer, and there must have
* been at least one reader.
*/
(reader_incr & ~WRITER_ACTIVE) != 0);
if (reader_incr == READER_INCR) {
/*
* We are the only reader and have been upgraded.
* Now jump into the head of the writer waiting queue.
*/
} else
return (ISC_R_LOCKBUSY);
}
#else
{
/* Try to acquire write access. */
/*
* There must have been no writer, and there must have
* been at least one reader.
*/
(prevcnt & ~WRITER_ACTIVE) != 0);
if (prevcnt == READER_INCR) {
/*
* We are the only reader and have been upgraded.
* Now jump into the head of the writer waiting queue.
*/
} else
return (ISC_R_LOCKBUSY);
}
#endif
return (ISC_R_SUCCESS);
}
void
#if defined(ISC_RWLOCK_USESTDATOMIC)
{
/* Become an active reader. */
/* We must have been a writer. */
/* Complete write */
}
#else
{
/* Become an active reader. */
/* We must have been a writer. */
/* Complete write */
}
#endif
/* Resume other readers */
if (rwl->readers_waiting > 0)
}
#ifdef ISC_RWLOCK_TRACE
#endif
if (type == isc_rwlocktype_read) {
#if defined(ISC_RWLOCK_USESTDATOMIC)
#else
#endif
/*
* If we're the last reader and any writers are waiting, wake
* them up. We need to wake up all of them to ensure the
* FIFO order.
*/
if (prev_cnt == READER_INCR &&
}
} else {
/*
* Reset the flag, and (implicitly) tell other writers
* we are done.
*/
#if defined(ISC_RWLOCK_USESTDATOMIC)
#else
#endif
/*
* We have passed the write quota, no writer is
* waiting, or some readers are almost ready, pending
* possible writers. Note that the last case can
* happen even if write_requests != write_completions
* (which means a new writer in the queue), so we need
* to catch the case explicitly.
*/
if (rwl->readers_waiting > 0) {
}
}
}
}
#ifdef ISC_RWLOCK_TRACE
ISC_MSG_POSTUNLOCK, "postunlock"),
#endif
return (ISC_R_SUCCESS);
}
#else /* ISC_RWLOCK_USEATOMIC */
static isc_result_t
#ifdef ISC_RWLOCK_TRACE
#endif
if (type == isc_rwlocktype_read) {
if (rwl->readers_waiting != 0)
while (!done) {
if (!skip &&
(rwl->writers_waiting == 0 ||
{
} else if (nonblock) {
} else {
rwl->readers_waiting++;
rwl->readers_waiting--;
}
}
} else {
if (rwl->writers_waiting != 0)
while (!done) {
} else if (nonblock) {
} else {
rwl->writers_waiting++;
rwl->writers_waiting--;
}
}
}
#ifdef ISC_RWLOCK_TRACE
#endif
return (result);
}
if (max_cnt > RWLOCK_MAX_ADAPTIVE_COUNT)
do {
break;
}
#ifdef ISC_PLATFORM_BUSYWAITNOP
#endif
return (result);
}
}
/* If we are the only reader then succeed. */
} else
return (result);
}
void
/*
* Resume processing any read request that were blocked when
* we upgraded.
*/
rwl->readers_waiting > 0)
}
#ifdef ISC_RWLOCK_TRACE
#endif
}
if (rwl->writers_waiting > 0) {
} else if (rwl->readers_waiting > 0) {
/* Does this case ever happen? */
}
} else {
if (rwl->readers_waiting > 0) {
if (rwl->writers_waiting > 0 &&
} else {
}
} else if (rwl->writers_waiting > 0) {
} else {
}
}
}
#ifdef ISC_RWLOCK_TRACE
ISC_MSG_POSTUNLOCK, "postunlock"),
#endif
return (ISC_R_SUCCESS);
}
#endif /* ISC_RWLOCK_USEATOMIC */
#else /* ISC_PLATFORM_USETHREADS */
unsigned int write_quota)
{
return (ISC_R_SUCCESS);
}
if (type == isc_rwlocktype_read) {
return (ISC_R_LOCKBUSY);
} else {
return (ISC_R_LOCKBUSY);
}
return (ISC_R_SUCCESS);
}
}
/* If we are the only reader then succeed. */
else
return (result);
}
void
}
return (ISC_R_SUCCESS);
}
void
}
#endif /* ISC_PLATFORM_USETHREADS */