/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include "lint.h"
#include "thr_uberdata.h"
#include "asyncio.h"
/*
* The aio subsystem memory allocation strategy:
*
* For each of the structure types we wish to allocate/free
* (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate
* chunks of memory which are then subdivided into individual
* elements which are put into a free list from which allocations
* are made and to which frees are returned.
*
* Chunks start small (8 Kbytes) and get larger (size doubling)
* as more chunks are needed. This keeps memory usage small for
* light use and fragmentation small for heavy use.
*
* Chunks are never unmapped except as an aftermath of fork()
* in the child process, when they are all unmapped (because
* all of the worker threads disappear in the child).
*/
#define INITIAL_CHUNKSIZE (8 * 1024)
/*
* The header structure for each chunk.
* A pointer and a size_t ensures proper alignment for whatever follows.
*/
typedef struct chunk {
struct chunk *chunk_next; /* linked list */
size_t chunk_size; /* size of this chunk */
} chunk_t;
chunk_t *chunk_list = NULL; /* list of all chunks */
mutex_t chunk_lock = DEFAULTMUTEX;
chunk_t *
chunk_alloc(size_t size)
{
chunk_t *chp = NULL;
void *ptr;
ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
if (ptr != MAP_FAILED) {
lmutex_lock(&chunk_lock);
chp = ptr;
chp->chunk_next = chunk_list;
chunk_list = chp;
chp->chunk_size = size;
lmutex_unlock(&chunk_lock);
}
return (chp);
}
aio_worker_t *worker_freelist = NULL; /* free list of worker structures */
aio_worker_t *worker_freelast = NULL;
size_t worker_chunksize = 0;
mutex_t worker_lock = DEFAULTMUTEX;
/*
* Allocate a worker control block.
*/
aio_worker_t *
_aio_worker_alloc(void)
{
aio_worker_t *aiowp;
chunk_t *chp;
size_t chunksize;
int nelem;
int i;
lmutex_lock(&worker_lock);
if ((aiowp = worker_freelist) == NULL) {
if ((chunksize = 2 * worker_chunksize) == 0)
chunksize = INITIAL_CHUNKSIZE;
if ((chp = chunk_alloc(chunksize)) == NULL) {
lmutex_unlock(&worker_lock);
return (NULL);
}
worker_chunksize = chunksize;
worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1);
nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t);
for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++)
aiowp->work_forw = aiowp + 1;
worker_freelast = aiowp - 1;
worker_freelast->work_forw = NULL;
aiowp = worker_freelist;
}
if ((worker_freelist = aiowp->work_forw) == NULL)
worker_freelast = NULL;
lmutex_unlock(&worker_lock);
aiowp->work_forw = NULL;
(void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL);
(void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL);
return (aiowp);
}
/*
* Free a worker control block.
* Declared with void *arg so it can be a pthread_key_create() destructor.
*/
void
_aio_worker_free(void *arg)
{
aio_worker_t *aiowp = arg;
(void) mutex_destroy(&aiowp->work_qlock1);
(void) cond_destroy(&aiowp->work_idle_cv);
(void) memset(aiowp, 0, sizeof (*aiowp));
lmutex_lock(&worker_lock);
if (worker_freelast == NULL) {
worker_freelist = worker_freelast = aiowp;
} else {
worker_freelast->work_forw = aiowp;
worker_freelast = aiowp;
}
lmutex_unlock(&worker_lock);
}
aio_req_t *_aio_freelist = NULL; /* free list of request structures */
aio_req_t *_aio_freelast = NULL;
size_t request_chunksize = 0;
int _aio_freelist_cnt = 0;
int _aio_allocated_cnt = 0;
mutex_t __aio_cache_lock = DEFAULTMUTEX;
/*
* Allocate an aio request structure.
*/
aio_req_t *
_aio_req_alloc(void)
{
aio_req_t *reqp;
chunk_t *chp;
size_t chunksize;
int nelem;
int i;
lmutex_lock(&__aio_cache_lock);
if ((reqp = _aio_freelist) == NULL) {
if ((chunksize = 2 * request_chunksize) == 0)
chunksize = INITIAL_CHUNKSIZE;
if ((chp = chunk_alloc(chunksize)) == NULL) {
lmutex_unlock(&__aio_cache_lock);
return (NULL);
}
request_chunksize = chunksize;
_aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1);
nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t);
for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) {
reqp->req_state = AIO_REQ_FREE;
reqp->req_link = reqp + 1;
}
_aio_freelast = reqp - 1;
_aio_freelast->req_link = NULL;
_aio_freelist_cnt = nelem;
reqp = _aio_freelist;
}
if ((_aio_freelist = reqp->req_link) == NULL)
_aio_freelast = NULL;
_aio_freelist_cnt--;
_aio_allocated_cnt++;
lmutex_unlock(&__aio_cache_lock);
ASSERT(reqp->req_state == AIO_REQ_FREE);
reqp->req_state = 0;
reqp->req_link = NULL;
reqp->req_sigevent.sigev_notify = SIGEV_NONE;
return (reqp);
}
/*
* Free an aio request structure.
*/
void
_aio_req_free(aio_req_t *reqp)
{
ASSERT(reqp->req_state != AIO_REQ_FREE &&
reqp->req_state != AIO_REQ_DONEQ);
(void) memset(reqp, 0, sizeof (*reqp));
reqp->req_state = AIO_REQ_FREE;
lmutex_lock(&__aio_cache_lock);
if (_aio_freelast == NULL) {
_aio_freelist = _aio_freelast = reqp;
} else {
_aio_freelast->req_link = reqp;
_aio_freelast = reqp;
}
_aio_freelist_cnt++;
_aio_allocated_cnt--;
lmutex_unlock(&__aio_cache_lock);
}
aio_lio_t *_lio_head_freelist = NULL; /* free list of lio head structures */
aio_lio_t *_lio_head_freelast = NULL;
size_t lio_head_chunksize = 0;
int _lio_alloc = 0;
int _lio_free = 0;
mutex_t __lio_mutex = DEFAULTMUTEX;
/*
* Allocate a listio head structure.
*/
aio_lio_t *
_aio_lio_alloc(void)
{
aio_lio_t *head;
chunk_t *chp;
size_t chunksize;
int nelem;
int i;
lmutex_lock(&__lio_mutex);
if ((head = _lio_head_freelist) == NULL) {
if ((chunksize = 2 * lio_head_chunksize) == 0)
chunksize = INITIAL_CHUNKSIZE;
if ((chp = chunk_alloc(chunksize)) == NULL) {
lmutex_unlock(&__lio_mutex);
return (NULL);
}
lio_head_chunksize = chunksize;
_lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1);
nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t);
for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++)
head->lio_next = head + 1;
_lio_head_freelast = head - 1;
_lio_head_freelast->lio_next = NULL;
_lio_alloc += nelem;
_lio_free = nelem;
head = _lio_head_freelist;
}
if ((_lio_head_freelist = head->lio_next) == NULL)
_lio_head_freelast = NULL;
_lio_free--;
lmutex_unlock(&__lio_mutex);
ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
head->lio_next = NULL;
head->lio_port = -1;
(void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL);
(void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL);
return (head);
}
/*
* Free a listio head structure.
*/
void
_aio_lio_free(aio_lio_t *head)
{
ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
(void) mutex_destroy(&head->lio_mutex);
(void) cond_destroy(&head->lio_cond_cv);
(void) memset(head, 0, sizeof (*head));
lmutex_lock(&__lio_mutex);
if (_lio_head_freelast == NULL) {
_lio_head_freelist = _lio_head_freelast = head;
} else {
_lio_head_freelast->lio_next = head;
_lio_head_freelast = head;
}
_lio_free++;
lmutex_unlock(&__lio_mutex);
}
void
postfork1_child_aio(void)
{
chunk_t *chp;
/*
* All of the workers are gone; free their structures.
*/
if (_kaio_supported != NULL) {
(void) munmap((void *)_kaio_supported,
MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t));
_kaio_supported = NULL;
}
if (_aio_hash != NULL) {
(void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t));
_aio_hash = NULL;
}
for (chp = chunk_list; chp != NULL; chp = chunk_list) {
chunk_list = chp->chunk_next;
(void) munmap((void *)chp, chp->chunk_size);
}
/*
* Reinitialize global variables
*/
worker_freelist = NULL;
worker_freelast = NULL;
worker_chunksize = 0;
(void) mutex_init(&worker_lock, USYNC_THREAD, NULL);
_aio_freelist = NULL;
_aio_freelast = NULL;
request_chunksize = 0;
_aio_freelist_cnt = 0;
_aio_allocated_cnt = 0;
(void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL);
_lio_head_freelist = NULL;
_lio_head_freelast = NULL;
lio_head_chunksize = 0;
_lio_alloc = 0;
_lio_free = 0;
(void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL);
(void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL);
(void) cond_init(&__aio_initcv, USYNC_THREAD, NULL);
__aio_initbusy = 0;
(void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL);
(void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL);
(void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL);
_kaio_ok = 0;
__uaio_ok = 0;
_kaiowp = NULL;
__workers_rw = NULL;
__nextworker_rw = NULL;
__rw_workerscnt = 0;
__workers_no = NULL;
__nextworker_no = NULL;
__no_workerscnt = 0;
_aio_worker_cnt = 0;
_aio_done_head = NULL;
_aio_done_tail = NULL;
_aio_donecnt = 0;
_aio_doneq = NULL;
_aio_doneq_cnt = 0;
_aio_waitncnt = 0;
_aio_outstand_cnt = 0;
_kaio_outstand_cnt = 0;
_aio_req_done_cnt = 0;
_aio_kernel_suspend = 0;
_aio_suscv_cnt = 0;
_aiowait_flag = 0;
_aio_flags = 0;
}
#define DISPLAY(var) \
(void) fprintf(stderr, #var "\t= %d\n", var)
static void
_aio_exit_info(void)
{
if ((_kaio_ok | __uaio_ok) == 0)
return;
(void) fprintf(stderr, "\n");
DISPLAY(_aio_freelist_cnt);
DISPLAY(_aio_allocated_cnt);
DISPLAY(_lio_alloc);
DISPLAY(_lio_free);
DISPLAY(__rw_workerscnt);
DISPLAY(__no_workerscnt);
DISPLAY(_aio_worker_cnt);
DISPLAY(_aio_donecnt);
DISPLAY(_aio_doneq_cnt);
DISPLAY(_aio_waitncnt);
DISPLAY(_aio_outstand_cnt);
DISPLAY(_kaio_outstand_cnt);
DISPLAY(_aio_req_done_cnt);
DISPLAY(_aio_kernel_suspend);
DISPLAY(_aio_suscv_cnt);
DISPLAY(_aiowait_flag);
DISPLAY(_aio_flags);
}
void
init_aio(void)
{
char *str;
(void) pthread_key_create(&_aio_key, _aio_worker_free);
if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) {
if ((_min_workers = atoi(str)) <= 0)
_min_workers = 4;
}
if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) {
if ((_max_workers = atoi(str)) <= 0)
_max_workers = 256;
if (_max_workers < _min_workers + 1)
_max_workers = _min_workers + 1;
}
if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0)
(void) atexit(_aio_exit_info);
}