posix_aio.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* posix_aio.c implements the POSIX async. I/O
* functions for librt
*
* aio_read
* aio_write
* aio_error
* aio_return
* aio_suspend
* lio_listio
* aio_fsync
* aio_cancel
*/
#include "libaio.h"
extern int __fdsync(int, int);
/* __aio_suspend stuff */
extern int _aio_kernel_suspend;
extern int _aio_suscv_cnt;
/* __aio_waitn stuff */
extern int _aio_flags;
extern cond_t _aio_iowait_cv;
extern int _aio_doneq_cnt;
extern int _aio_outstand_cnt;
extern int _aio_waitncnt;
int *);
/* defines for timedwait in __aio_waitn() and __aio_suspend() */
#define AIO_TIMEOUT_INDEF -1
#define AIO_TIMEOUT_POLL 0
#define AIO_TIMEOUT_WAIT 1
#define AIO_TIMEOUT_UNDEF 2
/*
* List I/O list head stuff
*/
static int _aio_lio_alloc(aio_lio_t **);
static void _aio_lio_free(aio_lio_t *);
static void _lio_list_decr(aio_lio_t *);
int
{
return (-1);
}
}
int
{
return (-1);
}
}
int
{
int i, err;
int aio_ufs = 0;
int oerrno = 0;
int state = 0;
static long aio_list_max = 0;
int EIOflg = 0;
int rw;
int do_kaio = 0;
if (!_kaio_ok)
_kaio_init();
if (aio_list_max == 0)
return (-1);
}
switch (mode) {
case LIO_WAIT:
break;
case LIO_NOWAIT:
break;
default:
return (-1);
}
for (i = 0; i < nent; i++) {
if (list[i]) {
do_kaio++;
else
} else
}
}
if (do_kaio) {
return (0);
} else {
err = -1;
}
/*
* If LIO_WAIT, or signal required, allocate a list head.
*/
(void) _aio_lio_alloc(&head);
if (head) {
(sig->sigev_signo > 0)) {
} else
}
/*
*/
for (i = 0; i < nent; i++) {
if (list[i] &&
if (head)
continue;
}
list[i]->aio_fildes);
if (list[i]->aio_reqprio < 0) {
EIOflg = 1;
if (head)
continue;
}
/*
* submit an AIO request with flags AIO_NO_KAIO
* to avoid the kaio() syscall in _aio_rw()
*/
switch (list[i]->aio_lio_opcode) {
case LIO_READ:
break;
case LIO_WRITE:
break;
}
sig);
else
NULL);
if (err != 0) {
if (head)
EIOflg = 1;
} else
aio_ufs++;
} else {
if (head)
continue;
}
}
}
if (EIOflg) {
return (-1);
}
/*
* call kaio(AIOLIOWAIT) to get all outstanding
* kernel AIO requests
*/
}
while (head->lio_refcnt > 0) {
if (errno) {
return (-1);
}
}
for (i = 0; i < nent; i++) {
if (list[i] &&
return (-1);
}
}
}
return (0);
}
return (err);
}
static void
{
head->lio_refcnt--;
}
extern void _cancelon(void);
extern void _canceloff(void);
int
{
int cv_err; /* error code from cond_xxx() */
int kerr; /* error code from _kaio(AIOSUSPEND) */
int i;
int timedwait;
int req_outstanding;
if (nent <= 0) {
return (-1);
}
if (timo) {
return (-1);
}
/* Initialize start time if time monitoring desired */
} else {
/* content of timeout = 0 : polling */
}
} else {
/* timeout pointer = NULL : wait indefinitely */
}
if (largefile) {
/* _LARGEFILE64_SOURCE && !_LP64 */
for (i = 0; i < nent; i++) {
}
} else {
for (i = 0; i < nent; i++) {
}
}
/*
* The next "if -case" is required to accelerate the
* access to completed RAW-IO requests.
*/
if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
/* Only kernel requests pending */
_cancelon();
/*
* _aio_kernel_suspend is used to detect completed non RAW-IO
* requests.
* As long as this thread resides in the kernel (_kaio) further
* asynchronous non RAW-IO requests could be submitted.
*/
_aio_lock();
_aio_unlock();
/*
* Always do the kaio() call without using the KAIO_SUPPORTED()
* checks because it is not mandatory to have a valid fd
* set in the list entries, only the resultp must be set.
*
* _kaio(AIOSUSPEND ...) return values :
* 0: everythink ok, completed request found
* -1: error
* 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,)
* system call using _kaio(AIONOTIFY). It means, that some
* non RAW-IOs completed inbetween.
*/
if (largefile)
else
_aio_lock();
_aio_unlock();
_canceloff();
if (!kerr)
return (0);
} else {
}
/* Return kernel error code, if no other IOs are outstanding */
_aio_lock();
_aio_unlock();
if (req_outstanding == 0) {
/* no IOs outstanding in the thread pool */
if (kerr == 1)
/* return "no IOs completed" */
return (-1);
}
/* IOs using the thread pool are outstanding */
if (timedwait == AIO_TIMEOUT_WAIT) {
/* time monitoring */
if (hrtres <= 0)
hrtres = 1;
} else {
if (timedwait == AIO_TIMEOUT_POLL) {
}
}
for (;;) {
int aio_errno;
int aio_inprogress;
/* first scan file system requests */
aio_inprogress = 0;
if (largefile) {
for (i = 0; i < nent; i++) {
continue;
if (aio_errno == EINPROGRESS) {
aio_inprogress = 1;
} else {
errno = 0;
return (0);
}
}
}
} else {
for (i = 0; i < nent; i++) {
continue;
if (aio_errno == EINPROGRESS) {
aio_inprogress = 1;
} else {
errno = 0;
return (0);
}
}
}
}
/*
* we have to return here, provided that all kernel RAW-IOs
* also completed.
* If the kernel was notified to return, then we have to check
* possible pending RAW-IOs.
*/
if (_aio_outstand_cnt == 0 && aio_inprogress == 0 &&
kerr != 1) {
break;
}
/*
* There are outstanding IOs in the thread pool or the kernel
* was notified to return.
* Check pending RAW-IOs first.
*/
if (kerr == 1) {
/*
* _aiodone just notified the kernel about
* completed non RAW-IOs (AIONOTIFY was detected).
*/
if (timedwait == AIO_TIMEOUT_WAIT) {
/* Update remaining timeout for the kernel */
if (hrtres <= 0) {
/* timer expired */
break;
}
}
_aio_lock();
_aio_unlock();
_cancelon();
if (largefile)
wait, -1);
else
wait, -1);
_canceloff();
_aio_lock();
_aio_unlock();
if (!kerr) {
return (0);
}
}
if (timedwait == AIO_TIMEOUT_POLL) {
break;
}
if (timedwait == AIO_TIMEOUT_WAIT) {
/* Update remaining timeout */
if (hrtres <= 0) {
/* timer expired */
break;
}
}
_aio_lock();
if (_aio_outstand_cnt == 0) {
_aio_unlock();
continue;
}
_aio_suscv_cnt++; /* ID for _aiodone (wake up) */
if (timedwait == AIO_TIMEOUT_WAIT) {
&__aio_mutex, wait);
} else {
/* wait indefinitely */
}
_aio_unlock();
if (cv_err) {
break;
}
}
return (-1);
}
int
{
if (aio_errno == EINPROGRESS) {
/*
* Always do the kaio() call without using
* the KAIO_SUPPORTED()
* checks because it is not mandatory to
* have a valid fd
* set in the aiocb, only the resultp must be set.
*/
return (-1);
}
_aio_lock();
(void) _aio_req_remove(reqp);
}
_aio_unlock();
}
return (aio_errno);
}
{
/*
* graceful detection of an invalid cb is not possible. a
* SIGSEGV will be generated if it is invalid.
*/
exit(-1);
}
/*
* we use this condition to indicate that
* aio_return has been called before
*/
return (-1);
}
/*
* Before we return mark the result as being returned so that later
* calls to aio_return() will return the fact that the result has
* already been returned
*/
_aio_lock();
(void) _aio_req_remove(reqp);
}
_aio_unlock();
}
return (ret);
}
void
{
int refcnt;
if (head) {
if (!refcnt)
}
}
void
{
_aio_lock();
(void) _aio_req_remove(reqp);
_aio_unlock();
}
int
{
(void) mutex_lock(&__lio_mutex);
if (_lio_head_freelist == NULL) {
} else {
}
(void) mutex_unlock(&__lio_mutex);
return (-1);
}
(void) mutex_unlock(&__lio_mutex);
return (0);
}
void
{
(void) mutex_lock(&__lio_mutex);
(void) mutex_unlock(&__lio_mutex);
}
/*
* This function returns the number of asynchronous I/O requests submitted.
*/
static int
int workerscnt)
{
int i;
int err;
for (i = 0; i < workerscnt; i++) {
if (err != 0) {
return (i);
}
}
return (i);
}
/*
* This function is called from aio_fsync(3RT).
*/
int
{
int retval;
return (0);
}
return (-1);
}
return (-1);
/*
* The first asynchronous I/O request in the current process
* will create a bunch of workers.
* If the sum of workers (read + write) is zero then the
* number of pending asynchronous I/O requests is zero.
* In such a case only execute the standard fsync(3C) or
* fdatasync(3RT) as appropriate (see flag of __fdsync()).
*/
if ((__wr_workerscnt + __rd_workerscnt) == 0) {
else
}
/*
* re-use aio_offset as the op field.
* O_DSYNC - fdatasync()
* O_SYNC - fsync()
*/
/*
* create a list of fsync requests. the worker
* that gets the last request will do the fsync
* request.
*/
(void) _aio_lio_alloc(&head);
return (-1);
}
/* insert an fsync request on every read workers' queue. */
if (retval != __rd_workerscnt) {
/*
* Less fsync requests than workers means that
* it was not possible to submit fsync requests to all
* workers.
* Actions:
* a) number of fsync requests submitted is 0:
* => free allocated memory (aio_lio_t).
* b) number of fsync requests submitted is > 0:
* => the last worker executing the fsync request
* will free the aio_lio_t struct.
*/
if (retval == 0)
return (-1);
}
/* insert an fsync request on every write workers' queue. */
if (retval != __wr_workerscnt)
return (-1);
return (0);
}
int
{
int done = 0;
int canceled = 0;
return (-1);
_aio_lock();
_aio_unlock();
return (AIO_ALLDONE);
} else {
&done);
_aio_unlock();
if (done)
return (AIO_ALLDONE);
else if (canceled)
return (AIO_CANCELED);
else
return (AIO_NOTCANCELED);
}
}
return (AIO_ALLDONE);
}
return (aiocancel_all(fd));
}
/*
* aio_waitn can be used to reap the results of several I/O operations that
* existing POSIX interfaces: lio_listio, aio_write or aio_read.
* completed is returned in "nwait". Otherwise aio_waitn might also
* return if the timeout expires. Additionally, aio_waitn returns 0 if
* successful or -1 if an error occurred.
*/
/*ARGSUSED*/
int
{
int err = 0;
int uerrno = 0;
int kerrno = 0; /* save errno from _kaio() call */
int timedwait = AIO_TIMEOUT_UNDEF;
#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64)
#endif
return (-1);
}
return (-1);
}
/*
* Only one running aio_waitn call per process allowed.
* Further calls will be blocked here until the running
* call finishes.
*/
(void) mutex_lock(&__aio_waitn_mutex);
while (_aio_flags & AIO_LIB_WAITN) {
(void) mutex_unlock(&__aio_waitn_mutex);
*nwait = 0;
return (0);
}
if (err != 0) {
(void) mutex_unlock(&__aio_waitn_mutex);
*nwait = 0;
return (-1);
}
}
(void) mutex_unlock(&__aio_waitn_mutex);
if (*nwait >= AIO_WAITN_MAXIOCBS) {
if (err) {
*nwait = 0;
return (-1);
}
if (timedwait != AIO_TIMEOUT_INDEF) {
}
}
/*
* _aio_lock() is not required at this time, but the
* condition is that "_aio_doneq_cnt" has to be updated
* before "_aio_outstand_cnt". Otherwise we could hit
* a zero value in both counters during the transition
* time (see _aiodone).
*
* If both counters are still set to zero, then only
*/
if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
for (;;) {
if (knentcnt > AIO_WAITN_MAXIOCBS)
if (err == 0) {
break;
if (timedwait == AIO_TIMEOUT_WAIT) {
if (err == -1) {
/* timer expired */
break;
}
}
continue;
}
if (dnwait > 0)
err = 0;
break;
}
break;
}
/* fatal error */
break;
}
/* check for pending aio_waitn() calls */
(void) mutex_lock(&__aio_waitn_mutex);
_aio_flags &= ~AIO_LIB_WAITN;
if (_aio_flags & AIO_LIB_WAITN_PENDING) {
(void) cond_signal(&_aio_waitn_cv);
}
(void) mutex_unlock(&__aio_waitn_mutex);
return (err);
}
if (timedwait == AIO_TIMEOUT_UNDEF) {
if (err) {
*nwait = 0;
return (-1);
}
if (timedwait != AIO_TIMEOUT_INDEF) {
}
}
for (;;) {
/*
* Calculate sum of active non RAW-IO requests (sum_reqs).
* If the expected amount of completed requests (*nwait) is
* greater than the calculated sum (sum_reqs) then
* use _kaio to check pending RAW-IO requests.
*/
(void) mutex_lock(&__aio_mutex);
(void) mutex_unlock(&__aio_mutex);
if (kwaitcnt != 0) {
if (knentcnt > AIO_WAITN_MAXIOCBS)
(void) mutex_lock(&__aio_waitn_mutex);
(void) mutex_unlock(&__aio_waitn_mutex);
(void) mutex_lock(&__aio_waitn_mutex);
(void) mutex_unlock(&__aio_waitn_mutex);
if (err == 0) {
} else {
switch (errno) {
case EINVAL:
case EAGAIN:
kerrno = 0; /* ignore _kaio() errno */
(void) mutex_lock(&__aio_mutex);
*nwait = _aio_doneq_cnt +
(void) mutex_unlock(&__aio_mutex);
err = 0;
break;
case EINTR:
case ETIME:
err = 0;
break;
default:
break;
}
}
if (err)
break; /* fatal kernel error */
}
/* check completed FS requests in the "done" queue */
(void) mutex_lock(&__aio_mutex);
/* get done requests */
#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64)
if (largefile) {
} else
#endif
}
}
(void) mutex_unlock(&__aio_mutex);
break;
}
if (timedwait == AIO_TIMEOUT_WAIT) {
/* timer expired */
(void) mutex_unlock(&__aio_mutex);
break;
}
}
/*
* then sleep here.
* _aiodone() will wakeup this thread as soon as the
*/
/*
* _aiodone() will wake up this thread as soon as
* - _aio_waitncnt -requests are completed or
* - _aio_outstand_cnt becomes zero.
* cond_reltimedwait() could also return with
* timeout error (ETIME).
*/
if (*nwait < _aio_outstand_cnt)
_aio_waitncnt = *nwait;
else
(void) mutex_lock(&__aio_waitn_mutex);
(void) mutex_unlock(&__aio_waitn_mutex);
if (wait)
&__aio_mutex, wait);
else
&__aio_mutex);
(void) mutex_lock(&__aio_waitn_mutex);
_aio_flags &= ~AIO_IO_WAITING;
(void) mutex_unlock(&__aio_waitn_mutex);
(void) mutex_unlock(&__aio_mutex);
continue;
}
if (uerrno != 0)
}
(void) mutex_unlock(&__aio_mutex);
if (timedwait == AIO_TIMEOUT_POLL) {
/* polling or timer expired */
break;
}
}
/* check for pending aio_waitn() calls */
(void) mutex_lock(&__aio_waitn_mutex);
_aio_flags &= ~AIO_LIB_WAITN;
if (_aio_flags & AIO_LIB_WAITN_PENDING) {
(void) cond_signal(&_aio_waitn_cv);
}
(void) mutex_unlock(&__aio_waitn_mutex);
if (errno)
err = -1;
else
err = 0;
return (err);
}
/*
* timedwait values :
* AIO_TIMEOUT_POLL : polling
* AIO_TIMEOUT_WAIT : timeout
* AIO_TIMEOUT_INDEF : wait indefinitely
*/
int
int *timedwait)
{
if (utimo) {
/*
* invalid timer values => return EINVAL
* check for pending aio_waitn() calls
*/
(void) mutex_lock(&__aio_waitn_mutex);
_aio_flags &= ~AIO_LIB_WAITN;
if (_aio_flags & AIO_LIB_WAITN_PENDING) {
(void) cond_signal(&_aio_waitn_cv);
}
(void) mutex_unlock(&__aio_waitn_mutex);
return (-1);
}
}
} else {
/* polling */
}
} else {
}
return (0);
}
#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64)
int
{
return (-1);
}
}
int
{
return (-1);
}
}
int
{
int i, err;
int aio_ufs = 0;
int oerrno = 0;
int state = 0;
static long aio_list_max = 0;
int EIOflg = 0;
int rw;
int do_kaio = 0;
if (!_kaio_ok)
_kaio_init();
if (aio_list_max == 0)
return (-1);
}
switch (mode) {
case LIO_WAIT:
break;
case LIO_NOWAIT:
break;
default:
return (-1);
}
for (i = 0; i < nent; i++) {
if (list[i]) {
do_kaio++;
else
} else
}
}
if (do_kaio) {
return (0);
} else {
err = -1;
}
/*
* If LIO_WAIT, or signal required, allocate a list head.
*/
(void) _aio_lio_alloc(&head);
if (head) {
(sig->sigev_signo > 0)) {
} else
}
/*
*/
for (i = 0; i < nent; i++) {
if (list[i] &&
if (head)
continue;
}
list[i]->aio_fildes);
if (list[i]->aio_reqprio < 0) {
EIOflg = 1;
if (head)
continue;
}
/*
* submit an AIO request with flags AIO_NO_KAIO
* to avoid the kaio() syscall in _aio_rw()
*/
switch (list[i]->aio_lio_opcode) {
case LIO_READ:
rw = AIOAREAD64;
break;
case LIO_WRITE:
rw = AIOAWRITE64;
break;
}
nextworker, rw,
else
nextworker, rw,
if (err != 0) {
if (head)
EIOflg = 1;
} else
aio_ufs++;
} else {
if (head)
continue;
}
}
}
if (EIOflg) {
return (-1);
}
/*
* call kaio(AIOLIOWAIT) to get all outstanding
* kernel AIO requests
*/
}
while (head->lio_refcnt > 0) {
if (errno) {
return (-1);
}
}
for (i = 0; i < nent; i++) {
if (list[i] &&
return (-1);
}
}
}
return (0);
}
return (err);
}
int
{
if (aio_errno == EINPROGRESS) {
/*
* Always do the kaio() call without using
* the KAIO_SUPPORTED()
* checks because it is not mandatory to
* have a valid fd
* set in the aiocb, only the resultp must be set.
*/
return (-1);
}
return (aio_errno);
}
_aio_lock();
(void) _aio_req_remove(reqp);
}
_aio_unlock();
}
return (aio_errno);
}
{
int ret;
/*
* graceful detection of an invalid cb is not possible. a
* SIGSEGV will be generated if it is invalid.
*/
exit(-1);
}
/*
* we use this condition to indicate that
* aio_return has been called before
*/
return (-1);
}
/*
* Before we return mark the result as being returned so that later
* calls to aio_return() will return the fact that the result has
* already been returned
*/
_aio_lock();
(void) _aio_req_remove(reqp);
}
_aio_unlock();
}
return (ret);
}
static int
int workerscnt)
{
int i;
int err;
for (i = 0; i < workerscnt; i++) {
if (err != 0) {
return (i);
}
}
return (i);
}
int
{
int retval;
return (0);
}
return (-1);
}
return (-1);
return (-1);
}
/*
* The first asynchronous I/O request in the current process
* will create a bunch of workers.
* If the sum of workers (read + write) is zero then the
* number of pending asynchronous I/O requests is zero.
* In such a case only execute the standard fsync(3C) or
* fdatasync(3RT) as appropriate (see flag of __fdsync()).
*/
if ((__wr_workerscnt + __rd_workerscnt) == 0) {
else
}
/*
* re-use aio_offset as the op field.
* O_DSYNC - fdatasync()
* O_SYNC - fsync()
*/
/*
* create a list of fsync requests. the worker
* that gets the last request will do the fsync
* request.
*/
(void) _aio_lio_alloc(&head);
return (-1);
}
/* insert an fsync request on every read workers' queue. */
if (retval != __rd_workerscnt) {
/*
* Less fsync requests than workers means that
* it was not possible to submit fsync requests to all
* workers.
* Actions:
* a) number of fsync requests submitted is 0:
* => free allocated memory (aio_lio_t).
* b) number of fsync requests submitted is > 0:
* => the last worker executing the fsync request
* will free the aio_lio_t struct.
*/
if (retval == 0)
return (-1);
}
/* insert an fsync request on every write workers' queue. */
if (retval != __wr_workerscnt)
return (-1);
return (0);
}
int
{
int done = 0;
int canceled = 0;
return (-1);
_aio_lock();
_aio_unlock();
return (AIO_ALLDONE);
} else {
&done);
_aio_unlock();
if (done)
return (AIO_ALLDONE);
else if (canceled)
return (AIO_CANCELED);
else
return (AIO_NOTCANCELED);
}
}
}
return (aiocancel_all(fd));
}
#endif /* (_LARGEFILE64_SOURCE) && !defined(_LP64) */