lx_futex.c revision 3348528f7ec68bf2f11d0cbd5c3b9932ea7f0d5c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/inttypes.h>
#include <sys/lx_futex.h>
/*
* Futexes are a Linux-specific implementation of inter-process mutexes.
* They are designed to use shared memory for simple, uncontested
* operations, and rely on the kernel to resolve any contention issues.
*
* Most of the information in this section comes from the paper "Futexes
* Are Tricky", by Ulrich Drepper. This paper is currently available at:
*
* A futex itself a 4-byte integer, which must be 4-byte aligned. The
* value of this integer is expected to be modified using user-level atomic
* operations. The futex(4) design itself does not impose any semantic
* constraints on the value stored in the futex; it is up to the
* application to define its own protocol.
*
* When the application decides that kernel intervention is required, it
* will use the futex(2) system call. There are 5 different operations
* that can be performed on a futex, using this system call. Since this
* interface has evolved over time, there are several different prototypes
* available to the user. Fortunately, there is only a single kernel-level
* interface:
*
* long sys_futex(void *futex1, int cmd, int val1,
* struct timespec *timeout, void *futex2, int val2)
*
* The kernel-level operations that may be performed on a futex are:
*
* FUTEX_WAIT
*
* Atomically verify that futex1 contains the value val1. If it
* doesn't, return EWOULDBLOCK. If it does contain the expected
* value, the thread will sleep until somebody performs a FUTEX_WAKE
* on the futex. The caller may also specify a timeout, indicating
* the maximum time the thread should sleep. If the timer expires,
* the call returns ETIMEDOUT. If the thread is awoken with a signal,
* the call returns EINTR. Otherwise, the call returns 0.
*
* FUTEX_WAKE
*
* Wake up val1 processes that are waiting on futex1. The call
* returns the number of blocked threads that were woken up.
*
* FUTEX_CMP_REQUEUE
*
* If the value stored in futex1 matches that passed in in val2, wake
* up val1 processes that are waiting on futex1. Otherwise, return
* EAGAIN.
*
* If there are more than val1 threads waiting on the futex, remove
* the remaining threads from this futex, and requeue them on futex2.
* The caller can limit the number of threads being requeued by
* encoding an integral numerical value in the position usually used
* for the timeout pointer.
*
* The call returns the number of blocked threads that were woken up
* or requeued.
*
* FUTEX_REQUEUE
*
* Identical to FUTEX_CMP_REQUEUE except that it does not use val2.
* This command has been declared broken and obsolete, but we still
* need to support it.
*
* FUTEX_FD
*
* Return a file descriptor, which can be used to refer to the futex.
* We don't support this operation.
*/
/*
* This structure is used to track all the threads currently waiting on a
* futex. There is one fwaiter_t for each blocked thread. We store all
* fwaiter_t's in a hash structure, indexed by the memid_t of the integer
* containing the futex's value.
*
* At the moment, all fwaiter_t's for a single futex are simply dumped into
* the hash bucket. If futex contention ever becomes a hot path, we can
* chain a single futex's waiters together.
*/
typedef struct fwaiter {
volatile int fw_woken;
} fwaiter_t;
#define MEMID_COPY(s, d) \
#define MEMID_EQUAL(s, d) \
/* Borrowed from the page freelist hash code. */
#define HASH_SHIFT_SZ 7
(HASH_SIZE - 1))
static void
{
int index;
}
static void
{
int index;
}
/*
* Go to sleep until somebody does a WAKE operation on this futex, we get a
* signal, or the timeout expires.
*/
static int
{
int index;
goto out;
}
goto out;
}
futex_hashin(&fw);
err = 0;
if (ret < 0)
else if (ret == 0)
}
/*
* The futex is normally hashed out in wakeup. If we timed out or
* got a signal, we need to hash it out here instead.
*/
futex_hashout(&fw);
out:
return (err);
}
/*
* Wake up to wake_threads threads that are blocked on the futex at memid.
*/
static int
{
int index;
int ret = 0;
ret++;
}
}
return (ret);
}
/*
* Wake up to wake_threads waiting on the futex at memid. If there are
* more than that many threads waiting, requeue the remaining threads on
* the futex at requeue_memid.
*/
static int
{
int ret = 0;
/*
* To ensure that we don't miss a wakeup if the value of cmpval
* changes, we need to grab locks on both the original and new hash
* buckets. To avoid deadlock, we always grab the lower-indexed
* lock first.
*/
} else {
}
goto out;
}
goto out;
}
}
continue;
if (ret++ < wake_threads) {
} else {
break;
}
}
out:
mutex_exit(l2);
mutex_exit(l1);
if (ret < 0)
return (ret);
}
/*
* Copy in the relative timeout provided by the application and convert it
* to an absolute timeout.
*/
static int
{
if (get_udatamodel() == DATAMODEL_NATIVE) {
return (EFAULT);
}
#ifdef _SYSCALL32_IMPL
else {
return (EFAULT);
}
#endif
gethrestime(&now);
if (itimerspecfix(timeout))
return (EINVAL);
return (0);
}
long
{
int requeue_threads;
int *requeue_cmp = NULL;
int rval = 0;
/* must be aligned on int boundary */
if (addr & 0x3)
/* Sanity check the futex command */
/* Copy in the timeout structure from userspace. */
if (rval != 0)
}
if (cmd == FUTEX_CMP_REQUEUE)
requeue_cmp = &val2;
/*
* lx_timeout is nominally a pointer to a userspace
* address. For these two commands, it actually contains
* an integer which indicates the maximum number of threads
* to requeue. This is horrible, and I'm sorry.
*/
requeue_threads = (int)lx_timeout;
}
/*
* Translate the process-specific, user-space futex virtual
* address(es) to universal memid.
*/
if (rval != 0)
if (rval)
}
switch (cmd) {
case FUTEX_WAIT:
break;
case FUTEX_WAKE:
break;
case FUTEX_CMP_REQUEUE:
case FUTEX_REQUEUE:
break;
}
return (rval);
}
void
lx_futex_init(void)
{
int i;
for (i = 0; i < HASH_SIZE; i++)
}
int
lx_futex_fini(void)
{
int i, err;
err = 0;
mutex_enter(&futex_hash_lock[i]);
if (futex_hash[i] != NULL)
mutex_exit(&futex_hash_lock[i]);
}
return (err);
}