posix_aio.c revision 6e628f2786bf7adece487b606a56068e35e3fcd2
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* posix_aio.c implements the POSIX async. I/O functions.
*
* aio_read
* aio_write
* aio_error
* aio_return
* aio_suspend
* lio_listio
* aio_fsync
* aio_cancel
*/
#include "synonyms.h"
#include "thr_uberdata.h"
#include "asyncio.h"
#include <atomic.h>
extern int __fdsync(int, int);
/* defines for timedwait in __aio_waitn() and __aio_suspend() */
#define AIO_TIMEOUT_INDEF -1
#define AIO_TIMEOUT_POLL 0
#define AIO_TIMEOUT_WAIT 1
#define AIO_TIMEOUT_UNDEF 2
/*
* List I/O stuff
*/
static void _lio_list_decr(aio_lio_t *);
static long aio_list_max = 0;
int
{
return (-1);
}
return (-1);
}
if (_aio_sigev_thread(aiocbp) != 0)
return (-1);
(AIO_KAIO | AIO_NO_DUPS)));
}
int
{
return (-1);
}
return (-1);
}
if (_aio_sigev_thread(aiocbp) != 0)
return (-1);
(AIO_KAIO | AIO_NO_DUPS)));
}
/*
* __lio_listio() cancellation handler.
*/
/* ARGSUSED */
static void
{
int freeit = 0;
if (head->lio_refcnt == 0) {
freeit = 1;
}
head->lio_waiting = 0;
if (freeit)
}
int
{
int aio_ufs = 0;
int oerrno = 0;
int state = 0;
int EIOflg = 0;
int rw;
int do_kaio = 0;
int error;
int i;
if (!_kaio_ok)
_kaio_init();
if (aio_list_max == 0)
return (-1);
}
switch (mode) {
case LIO_WAIT:
break;
case LIO_NOWAIT:
break;
default:
return (-1);
}
for (i = 0; i < nent; i++) {
continue;
return (-1);
}
if (_aio_sigev_thread(aiocbp) != 0)
return (-1);
else {
do_kaio++;
else
}
}
if (_aio_sigev_thread_init(sigevp) != 0)
return (-1);
if (do_kaio) {
if (error == 0)
return (0);
} else {
error = -1;
}
/*
* If LIO_WAIT, or notification required, allocate a list head.
*/
head = _aio_lio_alloc();
if (head) {
head->lio_largefile = 0;
port_notify_t *pn =
} else { /* SIGEV_SIGNAL */
}
}
}
/*
*/
for (i = 0; i < nent; i++) {
if (head)
continue;
}
if (aiocbp->aio_reqprio != 0) {
EIOflg = 1;
if (head)
continue;
}
/*
* submit an AIO request with flags AIO_NO_KAIO
* to avoid the kaio() syscall in _aio_rw()
*/
switch (aiocbp->aio_lio_opcode) {
case LIO_READ:
break;
case LIO_WRITE:
break;
}
(AIO_NO_KAIO | AIO_NO_DUPS));
if (error == 0)
aio_ufs++;
else {
if (head)
EIOflg = 1;
}
}
}
if (EIOflg) {
return (-1);
}
/*
* call kaio(AIOLIOWAIT) to get all outstanding
* kernel AIO requests
*/
while (head->lio_refcnt > 0) {
int err;
head->lio_waiting = 0;
return (-1);
}
}
for (i = 0; i < nent; i++) {
return (-1);
}
}
}
return (0);
}
return (error);
}
static void
{
head->lio_refcnt--;
}
/*
* __aio_suspend() cancellation handler.
*/
/* ARGSUSED */
static void
_aio_suspend_cleanup(int *counter)
{
(*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */
}
static int
{
int cv_err; /* error code from cond_xxx() */
int kerr; /* error code from _kaio(AIOSUSPEND) */
int i;
int timedwait;
int req_outstanding;
#if !defined(_LP64)
#endif
#if defined(_LP64)
if (largefile)
aio_panic("__aio_suspend: largefile set when _LP64 defined");
#endif
if (nent <= 0) {
return (-1);
}
if (timo) {
return (-1);
}
/* Initialize start time if time monitoring desired */
} else {
/* content of timeout = 0 : polling */
}
} else {
/* timeout pointer = NULL : wait indefinitely */
}
#if !defined(_LP64)
if (largefile) {
for (i = 0; i < nent; i++) {
}
} else
#endif /* !_LP64 */
{
for (i = 0; i < nent; i++) {
}
}
/*
* The next "if -case" is required to accelerate the
* access to completed RAW-IO requests.
*/
if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
/* Only kernel requests pending */
/*
* _aio_kernel_suspend is used to detect completed non RAW-IO
* requests.
* As long as this thread resides in the kernel (_kaio) further
* asynchronous non RAW-IO requests could be submitted.
*/
/*
* Always do the kaio() call without using the KAIO_SUPPORTED()
* checks because it is not mandatory to have a valid fd
* set in the list entries, only the resultp must be set.
*
* _kaio(AIOSUSPEND ...) return values :
* 0: everythink ok, completed request found
* -1: error
* 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,)
* system call using _kaio(AIONOTIFY). It means, that some
* non RAW-IOs completed inbetween.
*/
if (!kerr) {
return (0);
}
} else {
}
/*
* Return kernel error code if no other IOs are outstanding.
*/
if (req_outstanding == 0) {
/* no IOs outstanding in the thread pool */
if (kerr == 1)
/* return "no IOs completed" */
return (-1);
}
/*
* IOs using the thread pool are outstanding.
*/
if (timedwait == AIO_TIMEOUT_WAIT) {
/* time monitoring */
if (hrtres <= 0)
hrtres = 1;
} else if (timedwait == AIO_TIMEOUT_POLL) {
}
for (;;) {
int error;
int inprogress;
/* first scan file system requests */
inprogress = 0;
for (i = 0; i < nent; i++) {
#if !defined(_LP64)
if (largefile) {
continue;
} else
#endif
{
continue;
}
if (error == EINPROGRESS)
inprogress = 1;
errno = 0;
return (0);
}
}
/*
* we have to return here, provided that all kernel RAW-IOs
* also completed.
* If the kernel was notified to return, then we have to check
* possible pending RAW-IOs.
*/
break;
}
/*
* There are outstanding IOs in the thread pool or the kernel
* was notified to return.
* Check pending RAW-IOs first.
*/
if (kerr == 1) {
/*
* _aiodone just notified the kernel about
* completed non RAW-IOs (AIONOTIFY was detected).
*/
if (timedwait == AIO_TIMEOUT_WAIT) {
/* Update remaining timeout for the kernel */
if (hrtres <= 0) {
/* timer expired */
break;
}
}
if (!kerr) {
return (0);
}
}
if (timedwait == AIO_TIMEOUT_POLL) {
break;
}
if (timedwait == AIO_TIMEOUT_WAIT) {
/* Update remaining timeout */
if (hrtres <= 0) {
/* timer expired */
break;
}
}
if (_aio_outstand_cnt == 0) {
continue;
}
_aio_suscv_cnt++; /* ID for _aiodone (wake up) */
if (timedwait == AIO_TIMEOUT_WAIT) {
&__aio_mutex, wait);
} else {
/* wait indefinitely */
}
/* this decrements _aio_suscv_cnt and drops __aio_mutex */
if (cv_err) {
break;
}
}
return (-1);
}
int
const timespec_t *timeout)
{
}
int
{
int error;
/*
* Always do the kaio() call without using the
* KAIO_SUPPORTED() checks because it is not
* mandatory to have a valid fd set in the
* aiocb, only the resultp must be set.
*/
return (-1);
}
}
}
return (error);
}
{
int error;
/*
* The _aiodone() function stores resultp->aio_return before
* storing resultp->aio_errno (with an membar_producer() in
* between). We use membar_consumer() below to ensure proper
* memory ordering between _aiodone() and ourself.
*/
/*
* we use this condition to indicate either that
* aio_return() has been called before or should
* not have been called yet.
*/
return (-1);
}
/*
* Before we return, mark the result as being returned so that later
* calls to aio_return() will return the fact that the result has
* already been returned.
*/
/* retest, in case more than one thread actually got in here */
return (-1);
}
else {
(void) _aio_req_remove(reqp);
}
if (retval == -1)
return (retval);
}
void
{
int refcnt;
head->lio_refcnt--;
if (refcnt == 0)
}
}
/*
* This function returns the number of asynchronous I/O requests submitted.
*/
static int
int workerscnt)
{
int i;
int error;
for (i = 0; i < workerscnt; i++) {
if (error != 0) {
return (i);
}
}
return (i);
}
int
{
int fret;
return (0);
return (-1);
}
return (-1);
}
return (-1);
if (_aio_sigev_thread(aiocbp) != 0)
return (-1);
/*
* Kernel aio_fsync() is not supported.
* We force user-level aio_fsync() just
* for the notification side-effect.
*/
return (-1);
/*
* The first asynchronous I/O request in the current process will
* create a bunch of workers (via __uaio_init()). If the number
* of workers is zero then the number of pending asynchronous I/O
* requests is zero. In such a case only execute the standard
* fsync(3C) or fdatasync(3RT) as appropriate.
*/
if (__rw_workerscnt == 0) {
else
}
/*
* re-use aio_offset as the op field.
* O_DSYNC - fdatasync()
* O_SYNC - fsync()
*/
/*
* Create a list of fsync requests. The worker that
* gets the last request will do the fsync request.
*/
head = _aio_lio_alloc();
return (-1);
}
head->lio_largefile = 0;
/*
* Insert an fsync request on every worker's queue.
*/
if (fret != __rw_workerscnt) {
/*
* Fewer fsync requests than workers means that it was
* not possible to submit fsync requests to all workers.
* Actions:
* a) number of fsync requests submitted is 0:
* => free allocated memory (aio_lio_t).
* b) number of fsync requests submitted is > 0:
* => the last worker executing the fsync request
* will free the aio_lio_t struct.
*/
if (fret == 0)
return (-1);
}
return (0);
}
int
{
int done = 0;
int canceled = 0;
return (-1);
return (-1);
}
return (AIO_ALLDONE);
}
if (done)
return (AIO_ALLDONE);
if (canceled)
return (AIO_CANCELED);
return (AIO_NOTCANCELED);
}
return (AIO_ALLDONE);
}
return (aiocancel_all(fd));
}
/*
* __aio_waitn() cancellation handler.
*/
/* ARGSUSED */
static void
_aio_waitn_cleanup(void *arg)
{
/* check for pending aio_waitn() calls */
if (_aio_flags & AIO_LIB_WAITN_PENDING) {
(void) cond_signal(&_aio_waitn_cv);
}
}
/*
* aio_waitn can be used to reap the results of several I/O operations that
* existing POSIX interfaces: lio_listio, aio_write or aio_read.
* completed is returned in "nwait". Otherwise aio_waitn might also
* return if the timeout expires. Additionally, aio_waitn returns 0 if
* successful or -1 if an error occurred.
*/
static int
{
int error = 0;
int uerrno = 0;
int kerrno = 0; /* save errno from _kaio() call */
int timedwait = AIO_TIMEOUT_UNDEF;
return (-1);
}
/*
* Only one running aio_waitn call per process allowed.
* Further calls will be blocked here until the running
* call finishes.
*/
while (_aio_flags & AIO_LIB_WAITN) {
*nwait = 0;
return (0);
}
if (error != 0) {
*nwait = 0;
return (-1);
}
}
if (*nwait >= AIO_WAITN_MAXIOCBS) {
error = -1;
dnwait = 0;
goto out;
}
if (timedwait != AIO_TIMEOUT_INDEF) {
}
}
/*
* If both counters are still set to zero, then only
*/
if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
for (;;) {
if (knentcnt > AIO_WAITN_MAXIOCBS)
if (error == 0) {
break;
if (timedwait == AIO_TIMEOUT_WAIT) {
if (error == -1) {
/* timer expired */
break;
}
}
continue;
}
if (dnwait > 0)
error = 0;
break;
}
break;
}
/* fatal error */
break;
}
goto out;
}
if (timedwait == AIO_TIMEOUT_UNDEF) {
error = -1;
dnwait = 0;
goto out;
}
if (timedwait != AIO_TIMEOUT_INDEF) {
}
}
for (;;) {
/*
* Calculate sum of active non RAW-IO requests (sum_reqs).
* If the expected amount of completed requests (*nwait) is
* greater than the calculated sum (sum_reqs) then
* use _kaio to check pending RAW-IO requests.
*/
if (kwaitcnt != 0) {
if (knentcnt > AIO_WAITN_MAXIOCBS)
if (error == 0) {
} else {
switch (errno) {
case EINVAL:
case EAGAIN:
kerrno = 0; /* ignore _kaio() errno */
*nwait = _aio_doneq_cnt +
error = 0;
break;
case EINTR:
case ETIME:
error = 0;
break;
default:
break;
}
}
if (error)
break; /* fatal kernel error */
}
/* check completed FS requests in the "done" queue */
/* get done requests */
}
}
break;
}
if (timedwait == AIO_TIMEOUT_WAIT &&
/* timer expired */
break;
}
/*
* then sleep here. _aiodone() will call _aio_waitn_wakeup()
* to wakeup this thread as soon as the required amount of
*/
/*
* _aio_waitn_wakeup() will wake up this thread when:
* - _aio_waitncnt requests are completed or
* - _aio_outstand_cnt becomes zero.
* sig_cond_reltimedwait() could also return with
* a timeout error (ETIME).
*/
if (*nwait < _aio_outstand_cnt)
_aio_waitncnt = *nwait;
else
if (wait)
&__aio_mutex, wait);
else
&__aio_mutex);
_aio_flags &= ~AIO_IO_WAITING;
continue;
}
if (uerrno != 0)
}
if (timedwait == AIO_TIMEOUT_POLL) {
/* polling or timer expired */
break;
}
}
if (errno)
error = -1;
else
error = 0;
out:
return (error);
}
int
const timespec_t *timeout)
{
}
void
_aio_waitn_wakeup(void)
{
/*
* __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that
* aio_waitn() is woken up when
* (_aio_outstand_cnt == 0) or
* a time.
*
* __aio_suspend() increments "_aio_suscv_cnt" to notify
* _aiodone() that at least one __aio_suspend() call is
* There could be more than one __aio_suspend() function
* __aio_suspend() functions each time.
* Every __aio_suspend() function will compare the recently
* completed I/O with its own list.
*/
if (_aio_flags & AIO_IO_WAITING) {
if (_aio_waitncnt > 0)
if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 ||
_aio_suscv_cnt > 0)
(void) cond_broadcast(&_aio_iowait_cv);
} else {
/* Wake up waiting aio_suspend calls */
if (_aio_suscv_cnt > 0)
(void) cond_broadcast(&_aio_iowait_cv);
}
}
/*
* timedwait values :
* AIO_TIMEOUT_POLL : polling
* AIO_TIMEOUT_WAIT : timeout
* AIO_TIMEOUT_INDEF : wait indefinitely
*/
static int
{
if (utimo) {
return (-1);
}
}
} else {
/* polling */
}
} else {
}
return (0);
}
#if !defined(_LP64)
int
{
return (-1);
}
return (-1);
}
if (_aio_sigev_thread64(aiocbp) != 0)
return (-1);
(AIO_KAIO | AIO_NO_DUPS)));
}
int
{
return (-1);
}
return (-1);
}
if (_aio_sigev_thread64(aiocbp) != 0)
return (-1);
(AIO_KAIO | AIO_NO_DUPS)));
}
int
{
int aio_ufs = 0;
int oerrno = 0;
int state = 0;
int EIOflg = 0;
int rw;
int do_kaio = 0;
int error;
int i;
if (!_kaio_ok)
_kaio_init();
if (aio_list_max == 0)
return (-1);
}
switch (mode) {
case LIO_WAIT:
break;
case LIO_NOWAIT:
break;
default:
return (-1);
}
for (i = 0; i < nent; i++) {
continue;
return (-1);
}
if (_aio_sigev_thread64(aiocbp) != 0)
return (-1);
else {
do_kaio++;
else
}
}
if (_aio_sigev_thread_init(sigevp) != 0)
return (-1);
if (do_kaio) {
if (error == 0)
return (0);
} else {
error = -1;
}
/*
* If LIO_WAIT, or notification required, allocate a list head.
*/
head = _aio_lio_alloc();
if (head) {
port_notify_t *pn =
} else { /* SIGEV_SIGNAL */
}
}
}
/*
*/
for (i = 0; i < nent; i++) {
if (head)
continue;
}
if (aiocbp->aio_reqprio != 0) {
EIOflg = 1;
if (head)
continue;
}
/*
* submit an AIO request with flags AIO_NO_KAIO
* to avoid the kaio() syscall in _aio_rw()
*/
switch (aiocbp->aio_lio_opcode) {
case LIO_READ:
rw = AIOAREAD64;
break;
case LIO_WRITE:
rw = AIOAWRITE64;
break;
}
(AIO_NO_KAIO | AIO_NO_DUPS));
if (error == 0)
aio_ufs++;
else {
if (head)
EIOflg = 1;
}
}
}
if (EIOflg) {
return (-1);
}
/*
* call kaio(AIOLIOWAIT) to get all outstanding
* kernel AIO requests
*/
while (head->lio_refcnt > 0) {
int err;
head->lio_waiting = 0;
return (-1);
}
}
for (i = 0; i < nent; i++) {
return (-1);
}
}
}
return (0);
}
return (error);
}
int
const timespec_t *timeout)
{
}
int
{
int error;
/*
* Always do the kaio() call without using the
* KAIO_SUPPORTED() checks because it is not
* mandatory to have a valid fd set in the
* aiocb, only the resultp must be set.
*/
return (-1);
}
}
}
return (error);
}
{
int error;
/*
* The _aiodone() function stores resultp->aio_return before
* storing resultp->aio_errno (with an membar_producer() in
* between). We use membar_consumer() below to ensure proper
* memory ordering between _aiodone() and ourself.
*/
/*
* we use this condition to indicate either that
* aio_return() has been called before or should
* not have been called yet.
*/
return (-1);
}
/*
* Before we return, mark the result as being returned so that later
* calls to aio_return() will return the fact that the result has
* already been returned.
*/
/* retest, in case more than one thread actually got in here */
return (-1);
}
else {
(void) _aio_req_remove(reqp);
}
if (retval == -1)
return (retval);
}
static int
int workerscnt)
{
int i;
int error;
for (i = 0; i < workerscnt; i++) {
if (error != 0) {
return (i);
}
}
return (i);
}
int
{
int fret;
return (0);
return (-1);
}
return (-1);
}
return (-1);
if (_aio_sigev_thread64(aiocbp) != 0)
return (-1);
/*
* Kernel aio_fsync() is not supported.
* We force user-level aio_fsync() just
* for the notification side-effect.
*/
return (-1);
/*
* The first asynchronous I/O request in the current process will
* create a bunch of workers (via __uaio_init()). If the number
* of workers is zero then the number of pending asynchronous I/O
* requests is zero. In such a case only execute the standard
* fsync(3C) or fdatasync(3RT) as appropriate.
*/
if (__rw_workerscnt == 0) {
else
}
/*
* re-use aio_offset as the op field.
* O_DSYNC - fdatasync()
* O_SYNC - fsync()
*/
/*
* Create a list of fsync requests. The worker that
* gets the last request will do the fsync request.
*/
head = _aio_lio_alloc();
return (-1);
}
/*
* Insert an fsync request on every worker's queue.
*/
if (fret != __rw_workerscnt) {
/*
* Fewer fsync requests than workers means that it was
* not possible to submit fsync requests to all workers.
* Actions:
* a) number of fsync requests submitted is 0:
* => free allocated memory (aio_lio_t).
* b) number of fsync requests submitted is > 0:
* => the last worker executing the fsync request
* will free the aio_lio_t struct.
*/
if (fret == 0)
return (-1);
}
return (0);
}
int
{
int done = 0;
int canceled = 0;
return (-1);
return (-1);
}
return (AIO_ALLDONE);
}
if (done)
return (AIO_ALLDONE);
if (canceled)
return (AIO_CANCELED);
return (AIO_NOTCANCELED);
}
return (AIO_ALLDONE);
}
return (aiocancel_all(fd));
}
int
const timespec_t *timeout)
{
}
#endif /* !defined(_LP64) */