/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include "lint.h"
#include "thr_uberdata.h"
#include <stddef.h>
/*
* These symbols should not be exported from libc, but
* /lib/libm.so.2 references them. libm needs to be fixed.
* Also, some older versions of the Studio compiler/debugger
* components reference them. These need to be fixed, too.
*/
#pragma weak _thr_getspecific = thr_getspecific
#pragma weak _thr_keycreate = thr_keycreate
#pragma weak _thr_setspecific = thr_setspecific
/*
* 128 million keys should be enough for anyone.
* This allocates half a gigabyte of memory for the keys themselves and
* half a gigabyte of memory for each thread that uses the largest key.
*/
#define MAX_KEYS 0x08000000U
int
thr_keycreate(thread_key_t *pkey, void (*destructor)(void *))
{
tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
void (**old_data)(void *) = NULL;
void (**new_data)(void *);
uint_t old_nkeys;
uint_t new_nkeys;
lmutex_lock(&tsdm->tsdm_lock);
/*
* Unfortunately, pthread_getspecific() specifies that a
* pthread_getspecific() on an allocated key upon which the
* calling thread has not performed a pthread_setspecifc()
* must return NULL. Consider the following sequence:
*
* pthread_key_create(&key);
* pthread_setspecific(key, datum);
* pthread_key_delete(&key);
* pthread_key_create(&key);
* val = pthread_getspecific(key);
*
* According to POSIX, if the deleted key is reused for the new
* key returned by the second pthread_key_create(), then the
* pthread_getspecific() in the above example must return NULL
* (and not the stale datum). The implementation is thus left
* with two alternatives:
*
* (1) Reuse deleted keys. If this is to be implemented optimally,
* it requires that pthread_key_create() somehow associate
* the value NULL with the new (reused) key for each thread.
* Keeping the hot path fast and lock-free induces substantial
* complexity on the implementation.
*
* (2) Never reuse deleted keys. This allows the pthread_getspecific()
* implementation to simply perform a check against the number
* of keys set by the calling thread, returning NULL if the
* specified key is larger than the highest set key. This has
* the disadvantage of wasting memory (a program which simply
* loops calling pthread_key_create()/pthread_key_delete()
* will ultimately run out of memory), but permits an optimal
* pthread_getspecific() while allowing for simple key creation
* and deletion.
*
* All Solaris implementations have opted for (2). Given the
* ~10 years that this has been in the field, it is safe to assume
* that applications don't loop creating and destroying keys; we
* stick with (2).
*/
if (tsdm->tsdm_nused == (old_nkeys = tsdm->tsdm_nkeys)) {
/*
* We need to allocate or double the number of keys.
* tsdm->tsdm_nused must always be a power of two.
*/
if ((new_nkeys = (old_nkeys << 1)) == 0)
new_nkeys = 8;
if (new_nkeys > MAX_KEYS) {
lmutex_unlock(&tsdm->tsdm_lock);
return (EAGAIN);
}
if ((new_data = lmalloc(new_nkeys * sizeof (void *))) == NULL) {
lmutex_unlock(&tsdm->tsdm_lock);
return (ENOMEM);
}
if ((old_data = tsdm->tsdm_destro) == NULL) {
/* key == 0 is always invalid */
new_data[0] = TSD_UNALLOCATED;
tsdm->tsdm_nused = 1;
} else {
(void) memcpy(new_data, old_data,
old_nkeys * sizeof (void *));
}
tsdm->tsdm_destro = new_data;
tsdm->tsdm_nkeys = new_nkeys;
}
*pkey = tsdm->tsdm_nused;
tsdm->tsdm_destro[tsdm->tsdm_nused++] = destructor;
lmutex_unlock(&tsdm->tsdm_lock);
if (old_data != NULL)
lfree(old_data, old_nkeys * sizeof (void *));
return (0);
}
#pragma weak _pthread_key_create = pthread_key_create
int
pthread_key_create(pthread_key_t *pkey, void (*destructor)(void *))
{
return (thr_keycreate(pkey, destructor));
}
/*
* Same as thr_keycreate(), above, except that the key creation
* is performed only once. This relies upon the fact that a key
* value of THR_ONCE_KEY is invalid, and requires that the key be
* allocated with a value of THR_ONCE_KEY before calling here.
* THR_ONCE_KEY and PTHREAD_ONCE_KEY_NP, defined in <thread.h>
* and <pthread.h> respectively, must have the same value.
* Example:
*
* static pthread_key_t key = PTHREAD_ONCE_KEY_NP;
* ...
* pthread_key_create_once_np(&key, destructor);
*/
#pragma weak pthread_key_create_once_np = thr_keycreate_once
int
thr_keycreate_once(thread_key_t *keyp, void (*destructor)(void *))
{
static mutex_t key_lock = DEFAULTMUTEX;
thread_key_t key;
int error;
if (*keyp == THR_ONCE_KEY) {
lmutex_lock(&key_lock);
if (*keyp == THR_ONCE_KEY) {
error = thr_keycreate(&key, destructor);
if (error) {
lmutex_unlock(&key_lock);
return (error);
}
membar_producer();
*keyp = key;
}
lmutex_unlock(&key_lock);
}
membar_consumer();
return (0);
}
int
pthread_key_delete(pthread_key_t key)
{
tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
lmutex_lock(&tsdm->tsdm_lock);
if (key >= tsdm->tsdm_nused ||
tsdm->tsdm_destro[key] == TSD_UNALLOCATED) {
lmutex_unlock(&tsdm->tsdm_lock);
return (EINVAL);
}
tsdm->tsdm_destro[key] = TSD_UNALLOCATED;
lmutex_unlock(&tsdm->tsdm_lock);
return (0);
}
/*
* Blessedly, the pthread_getspecific() interface is much better than the
* thr_getspecific() interface in that it cannot return an error status.
* Thus, if the key specified is bogus, pthread_getspecific()'s behavior
* is undefined. As an added bonus (and as an artificat of not returning
* an error code), the requested datum is returned rather than stored
* through a parameter -- thereby avoiding the unnecessary store/load pair
* incurred by thr_getspecific(). Every once in a while, the Standards
* get it right -- but usually by accident.
*/
void *
pthread_getspecific(pthread_key_t key)
{
tsd_t *stsd;
/*
* We are cycle-shaving in this function because some
* applications make heavy use of it and one machine cycle
* can make a measurable difference in performance. This
* is why we waste a little memory and allocate a NULL value
* for the invalid key == 0 in curthread->ul_ftsd[0] rather
* than adjusting the key by subtracting one.
*/
if (key < TSD_NFAST)
return (curthread->ul_ftsd[key]);
if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
return (stsd->tsd_data[key]);
return (NULL);
}
int
thr_getspecific(thread_key_t key, void **valuep)
{
tsd_t *stsd;
/*
* Amazingly, some application code (and worse, some particularly
* fugly Solaris library code) _relies_ on the fact that 0 is always
* an invalid key. To preserve this semantic, 0 is never returned
* as a key from thr_/pthread_key_create(); we explicitly check
* for it here and return EINVAL.
*/
if (key == 0)
return (EINVAL);
if (key < TSD_NFAST)
*valuep = curthread->ul_ftsd[key];
else if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
*valuep = stsd->tsd_data[key];
else
*valuep = NULL;
return (0);
}
/*
* We call thr_setspecific_slow() when the key specified
* is beyond the current thread's currently allocated range.
* This case is in a separate function because we want
* the compiler to optimize for the common case.
*/
static int
thr_setspecific_slow(thread_key_t key, void *value)
{
ulwp_t *self = curthread;
tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
tsd_t *stsd;
tsd_t *ntsd;
uint_t nkeys;
/*
* It isn't necessary to grab locks in this path;
* tsdm->tsdm_nused can only increase.
*/
if (key >= tsdm->tsdm_nused)
return (EINVAL);
/*
* We would like to test (tsdm->tsdm_destro[key] == TSD_UNALLOCATED)
* here but that would require acquiring tsdm->tsdm_lock and we
* want to avoid locks in this path.
*
* We have a key which is (or at least _was_) valid. If this key
* is later deleted (or indeed, is deleted before we set the value),
* we don't care; such a condition would indicate an application
* race for which POSIX thankfully leaves the behavior unspecified.
*
* First, determine our new size. To avoid allocating more than we
* have to, continue doubling our size only until the new key fits.
* stsd->tsd_nalloc must always be a power of two.
*/
nkeys = ((stsd = self->ul_stsd) != NULL)? stsd->tsd_nalloc : 8;
for (; key >= nkeys; nkeys <<= 1)
continue;
/*
* Allocate the new TSD.
*/
if ((ntsd = lmalloc(nkeys * sizeof (void *))) == NULL)
return (ENOMEM);
if (stsd != NULL) {
/*
* Copy the old TSD across to the new.
*/
(void) memcpy(ntsd, stsd, stsd->tsd_nalloc * sizeof (void *));
lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
}
ntsd->tsd_nalloc = nkeys;
ntsd->tsd_data[key] = value;
self->ul_stsd = ntsd;
return (0);
}
int
thr_setspecific(thread_key_t key, void *value)
{
tsd_t *stsd;
int ret;
ulwp_t *self = curthread;
/*
* See the comment in thr_getspecific(), above.
*/
if (key == 0)
return (EINVAL);
if (key < TSD_NFAST) {
curthread->ul_ftsd[key] = value;
return (0);
}
if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc) {
stsd->tsd_data[key] = value;
return (0);
}
/*
* This is a critical region since we are dealing with memory
* allocation and free. Similar protection required in tsd_free().
*/
enter_critical(self);
ret = thr_setspecific_slow(key, value);
exit_critical(self);
return (ret);
}
int
pthread_setspecific(pthread_key_t key, const void *value)
{
return (thr_setspecific(key, (void *)value));
}
/*
* Contract-private interface for java. See PSARC/2003/159
*
* If the key falls within the TSD_NFAST range, return a non-negative
* offset that can be used by the caller to fetch the TSD data value
* directly out of the thread structure using %g7 (sparc) or %gs (x86).
* With the advent of TLS, %g7 and %gs are part of the ABI, even though
* the definition of the thread structure itself (ulwp_t) is private.
*
* We guarantee that the offset returned on sparc will fit within
* a SIMM13 field (that is, it is less than 2048).
*
* On failure (key is not in the TSD_NFAST range), return -1.
*/
ptrdiff_t
_thr_slot_offset(thread_key_t key)
{
if (key != 0 && key < TSD_NFAST)
return ((ptrdiff_t)offsetof(ulwp_t, ul_ftsd[key]));
return (-1);
}
/*
* This is called by _thrp_exit() to apply destructors to the thread's tsd.
*/
void
tsd_exit()
{
ulwp_t *self = curthread;
tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
thread_key_t key;
int recheck;
void *val;
void (*func)(void *);
lmutex_lock(&tsdm->tsdm_lock);
do {
recheck = 0;
for (key = 1; key < TSD_NFAST &&
key < tsdm->tsdm_nused; key++) {
if ((func = tsdm->tsdm_destro[key]) != NULL &&
func != TSD_UNALLOCATED &&
(val = self->ul_ftsd[key]) != NULL) {
self->ul_ftsd[key] = NULL;
lmutex_unlock(&tsdm->tsdm_lock);
(*func)(val);
lmutex_lock(&tsdm->tsdm_lock);
recheck = 1;
}
}
if (self->ul_stsd == NULL)
continue;
/*
* Any of these destructors could cause us to grow the number
* TSD keys in the slow TSD; we cannot cache the slow TSD
* pointer through this loop.
*/
for (; key < self->ul_stsd->tsd_nalloc &&
key < tsdm->tsdm_nused; key++) {
if ((func = tsdm->tsdm_destro[key]) != NULL &&
func != TSD_UNALLOCATED &&
(val = self->ul_stsd->tsd_data[key]) != NULL) {
self->ul_stsd->tsd_data[key] = NULL;
lmutex_unlock(&tsdm->tsdm_lock);
(*func)(val);
lmutex_lock(&tsdm->tsdm_lock);
recheck = 1;
}
}
} while (recheck);
lmutex_unlock(&tsdm->tsdm_lock);
/*
* We're done; if we have slow TSD, we need to free it.
*/
tsd_free(self);
}
void
tsd_free(ulwp_t *ulwp)
{
tsd_t *stsd;
ulwp_t *self = curthread;
enter_critical(self);
if ((stsd = ulwp->ul_stsd) != NULL)
lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
ulwp->ul_stsd = NULL;
exit_critical(self);
}