/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* posix_aio.c implements the POSIX async. I/O functions.
*
* aio_read
* aio_write
* aio_error
* aio_return
* aio_suspend
* lio_listio
* aio_fsync
* aio_cancel
*/
#include "lint.h"
#include "thr_uberdata.h"
#include "asyncio.h"
#include <atomic.h>
#include <sys/file.h>
#include <sys/port.h>
extern int __fdsync(int, int);
cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */
static int _aio_check_timeout(const timespec_t *, timespec_t *, int *);
/* defines for timedwait in __aio_waitn() and __aio_suspend() */
#define AIO_TIMEOUT_INDEF -1
#define AIO_TIMEOUT_POLL 0
#define AIO_TIMEOUT_WAIT 1
#define AIO_TIMEOUT_UNDEF 2
/*
* List I/O stuff
*/
static void _lio_list_decr(aio_lio_t *);
static long aio_list_max = 0;
int
aio_read(aiocb_t *aiocbp)
{
if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
errno = EINVAL;
return (-1);
}
if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
errno = EBUSY;
return (-1);
}
if (_aio_sigev_thread(aiocbp) != 0)
return (-1);
aiocbp->aio_lio_opcode = LIO_READ;
return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD,
(AIO_KAIO | AIO_NO_DUPS)));
}
int
aio_write(aiocb_t *aiocbp)
{
if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
errno = EINVAL;
return (-1);
}
if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
errno = EBUSY;
return (-1);
}
if (_aio_sigev_thread(aiocbp) != 0)
return (-1);
aiocbp->aio_lio_opcode = LIO_WRITE;
return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE,
(AIO_KAIO | AIO_NO_DUPS)));
}
/*
* __lio_listio() cancellation handler.
*/
/* ARGSUSED */
static void
_lio_listio_cleanup(aio_lio_t *head)
{
int freeit = 0;
ASSERT(MUTEX_HELD(&head->lio_mutex));
if (head->lio_refcnt == 0) {
ASSERT(head->lio_nent == 0);
freeit = 1;
}
head->lio_waiting = 0;
sig_mutex_unlock(&head->lio_mutex);
if (freeit)
_aio_lio_free(head);
}
int
lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
int nent, struct sigevent *_RESTRICT_KYWD sigevp)
{
int aio_ufs = 0;
int oerrno = 0;
aio_lio_t *head = NULL;
aiocb_t *aiocbp;
int state = 0;
int EIOflg = 0;
int rw;
int do_kaio = 0;
int error;
int i;
if (!_kaio_ok)
_kaio_init();
if (aio_list_max == 0)
aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
if (nent <= 0 || nent > aio_list_max) {
errno = EINVAL;
return (-1);
}
switch (mode) {
case LIO_WAIT:
state = NOCHECK;
break;
case LIO_NOWAIT:
state = CHECK;
break;
default:
errno = EINVAL;
return (-1);
}
for (i = 0; i < nent; i++) {
if ((aiocbp = list[i]) == NULL)
continue;
if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
errno = EBUSY;
return (-1);
}
if (_aio_sigev_thread(aiocbp) != 0)
return (-1);
if (aiocbp->aio_lio_opcode == LIO_NOP)
aiocbp->aio_state = NOCHECK;
else {
aiocbp->aio_state = state;
if (KAIO_SUPPORTED(aiocbp->aio_fildes))
do_kaio++;
else
aiocbp->aio_resultp.aio_errno = ENOTSUP;
}
}
if (_aio_sigev_thread_init(sigevp) != 0)
return (-1);
if (do_kaio) {
error = (int)_kaio(AIOLIO, mode, list, nent, sigevp);
if (error == 0)
return (0);
oerrno = errno;
} else {
oerrno = errno = ENOTSUP;
error = -1;
}
if (error == -1 && errno == ENOTSUP) {
error = errno = 0;
/*
* If LIO_WAIT, or notification required, allocate a list head.
*/
if (mode == LIO_WAIT ||
(sigevp != NULL &&
(sigevp->sigev_notify == SIGEV_SIGNAL ||
sigevp->sigev_notify == SIGEV_THREAD ||
sigevp->sigev_notify == SIGEV_PORT)))
head = _aio_lio_alloc();
if (head) {
sig_mutex_lock(&head->lio_mutex);
head->lio_mode = mode;
head->lio_largefile = 0;
if (mode == LIO_NOWAIT && sigevp != NULL) {
if (sigevp->sigev_notify == SIGEV_THREAD) {
head->lio_port = sigevp->sigev_signo;
head->lio_event = AIOLIO;
head->lio_sigevent = sigevp;
head->lio_sigval.sival_ptr =
sigevp->sigev_value.sival_ptr;
} else if (sigevp->sigev_notify == SIGEV_PORT) {
port_notify_t *pn =
sigevp->sigev_value.sival_ptr;
head->lio_port = pn->portnfy_port;
head->lio_event = AIOLIO;
head->lio_sigevent = sigevp;
head->lio_sigval.sival_ptr =
pn->portnfy_user;
} else { /* SIGEV_SIGNAL */
head->lio_signo = sigevp->sigev_signo;
head->lio_sigval.sival_ptr =
sigevp->sigev_value.sival_ptr;
}
}
head->lio_nent = head->lio_refcnt = nent;
sig_mutex_unlock(&head->lio_mutex);
}
/*
* find UFS requests, errno == ENOTSUP/EBADFD,
*/
for (i = 0; i < nent; i++) {
if ((aiocbp = list[i]) == NULL ||
aiocbp->aio_lio_opcode == LIO_NOP ||
(aiocbp->aio_resultp.aio_errno != ENOTSUP &&
aiocbp->aio_resultp.aio_errno != EBADFD)) {
if (head)
_lio_list_decr(head);
continue;
}
if (aiocbp->aio_resultp.aio_errno == EBADFD)
SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
if (aiocbp->aio_reqprio != 0) {
aiocbp->aio_resultp.aio_errno = EINVAL;
aiocbp->aio_resultp.aio_return = -1;
EIOflg = 1;
if (head)
_lio_list_decr(head);
continue;
}
/*
* submit an AIO request with flags AIO_NO_KAIO
* to avoid the kaio() syscall in _aio_rw()
*/
switch (aiocbp->aio_lio_opcode) {
case LIO_READ:
rw = AIOAREAD;
break;
case LIO_WRITE:
rw = AIOAWRITE;
break;
}
error = _aio_rw(aiocbp, head, &__nextworker_rw, rw,
(AIO_NO_KAIO | AIO_NO_DUPS));
if (error == 0)
aio_ufs++;
else {
if (head)
_lio_list_decr(head);
aiocbp->aio_resultp.aio_errno = error;
EIOflg = 1;
}
}
}
if (EIOflg) {
errno = EIO;
return (-1);
}
if (mode == LIO_WAIT && oerrno == ENOTSUP) {
/*
* call kaio(AIOLIOWAIT) to get all outstanding
* kernel AIO requests
*/
if ((nent - aio_ufs) > 0)
(void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
if (head != NULL && head->lio_nent > 0) {
sig_mutex_lock(&head->lio_mutex);
while (head->lio_refcnt > 0) {
int err;
head->lio_waiting = 1;
pthread_cleanup_push(_lio_listio_cleanup, head);
err = sig_cond_wait(&head->lio_cond_cv,
&head->lio_mutex);
pthread_cleanup_pop(0);
head->lio_waiting = 0;
if (err && head->lio_nent > 0) {
sig_mutex_unlock(&head->lio_mutex);
errno = err;
return (-1);
}
}
sig_mutex_unlock(&head->lio_mutex);
ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
_aio_lio_free(head);
for (i = 0; i < nent; i++) {
if ((aiocbp = list[i]) != NULL &&
aiocbp->aio_resultp.aio_errno) {
errno = EIO;
return (-1);
}
}
}
return (0);
}
return (error);
}
static void
_lio_list_decr(aio_lio_t *head)
{
sig_mutex_lock(&head->lio_mutex);
head->lio_nent--;
head->lio_refcnt--;
sig_mutex_unlock(&head->lio_mutex);
}
/*
* __aio_suspend() cancellation handler.
*/
/* ARGSUSED */
static void
_aio_suspend_cleanup(int *counter)
{
ASSERT(MUTEX_HELD(&__aio_mutex));
(*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */
sig_mutex_unlock(&__aio_mutex);
}
static int
__aio_suspend(void **list, int nent, const timespec_t *timo, int largefile)
{
int cv_err; /* error code from cond_xxx() */
int kerr; /* error code from _kaio(AIOSUSPEND) */
int i;
timespec_t twait; /* copy of timo for internal calculations */
timespec_t *wait = NULL;
int timedwait;
int req_outstanding;
aiocb_t **listp;
aiocb_t *aiocbp;
#if !defined(_LP64)
aiocb64_t **listp64;
aiocb64_t *aiocbp64;
#endif
hrtime_t hrtstart;
hrtime_t hrtend;
hrtime_t hrtres;
#if defined(_LP64)
if (largefile)
aio_panic("__aio_suspend: largefile set when _LP64 defined");
#endif
if (nent <= 0) {
errno = EINVAL;
return (-1);
}
if (timo) {
if (timo->tv_sec < 0 || timo->tv_nsec < 0 ||
timo->tv_nsec >= NANOSEC) {
errno = EINVAL;
return (-1);
}
/* Initialize start time if time monitoring desired */
if (timo->tv_sec > 0 || timo->tv_nsec > 0) {
timedwait = AIO_TIMEOUT_WAIT;
hrtstart = gethrtime();
} else {
/* content of timeout = 0 : polling */
timedwait = AIO_TIMEOUT_POLL;
}
} else {
/* timeout pointer = NULL : wait indefinitely */
timedwait = AIO_TIMEOUT_INDEF;
}
#if !defined(_LP64)
if (largefile) {
listp64 = (aiocb64_t **)list;
for (i = 0; i < nent; i++) {
if ((aiocbp64 = listp64[i]) != NULL &&
aiocbp64->aio_state == CHECK)
aiocbp64->aio_state = CHECKED;
}
} else
#endif /* !_LP64 */
{
listp = (aiocb_t **)list;
for (i = 0; i < nent; i++) {
if ((aiocbp = listp[i]) != NULL &&
aiocbp->aio_state == CHECK)
aiocbp->aio_state = CHECKED;
}
}
sig_mutex_lock(&__aio_mutex);
/*
* The next "if -case" is required to accelerate the
* access to completed RAW-IO requests.
*/
if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
/* Only kernel requests pending */
/*
* _aio_kernel_suspend is used to detect completed non RAW-IO
* requests.
* As long as this thread resides in the kernel (_kaio) further
* asynchronous non RAW-IO requests could be submitted.
*/
_aio_kernel_suspend++;
/*
* Always do the kaio() call without using the KAIO_SUPPORTED()
* checks because it is not mandatory to have a valid fd
* set in the list entries, only the resultp must be set.
*
* _kaio(AIOSUSPEND ...) return values :
* 0: everythink ok, completed request found
* -1: error
* 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,)
* system call using _kaio(AIONOTIFY). It means, that some
* non RAW-IOs completed inbetween.
*/
pthread_cleanup_push(_aio_suspend_cleanup,
&_aio_kernel_suspend);
pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
sig_mutex_unlock(&__aio_mutex);
_cancel_prologue();
kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
list, nent, timo, -1);
_cancel_epilogue();
pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */
pthread_cleanup_pop(0);
_aio_kernel_suspend--;
if (!kerr) {
sig_mutex_unlock(&__aio_mutex);
return (0);
}
} else {
kerr = 1; /* simulation: _kaio detected AIONOTIFY */
}
/*
* Return kernel error code if no other IOs are outstanding.
*/
req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt;
sig_mutex_unlock(&__aio_mutex);
if (req_outstanding == 0) {
/* no IOs outstanding in the thread pool */
if (kerr == 1)
/* return "no IOs completed" */
errno = EAGAIN;
return (-1);
}
/*
* IOs using the thread pool are outstanding.
*/
if (timedwait == AIO_TIMEOUT_WAIT) {
/* time monitoring */
hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC +
(hrtime_t)timo->tv_nsec;
hrtres = hrtend - gethrtime();
if (hrtres <= 0)
hrtres = 1;
twait.tv_sec = hrtres / (hrtime_t)NANOSEC;
twait.tv_nsec = hrtres % (hrtime_t)NANOSEC;
wait = &twait;
} else if (timedwait == AIO_TIMEOUT_POLL) {
twait = *timo; /* content of timo = 0 : polling */
wait = &twait;
}
for (;;) {
int error;
int inprogress;
/* first scan file system requests */
inprogress = 0;
for (i = 0; i < nent; i++) {
#if !defined(_LP64)
if (largefile) {
if ((aiocbp64 = listp64[i]) == NULL)
continue;
error = aiocbp64->aio_resultp.aio_errno;
} else
#endif
{
if ((aiocbp = listp[i]) == NULL)
continue;
error = aiocbp->aio_resultp.aio_errno;
}
if (error == EINPROGRESS)
inprogress = 1;
else if (error != ECANCELED) {
errno = 0;
return (0);
}
}
sig_mutex_lock(&__aio_mutex);
/*
* If there aren't outstanding I/Os in the thread pool then
* we have to return here, provided that all kernel RAW-IOs
* also completed.
* If the kernel was notified to return, then we have to check
* possible pending RAW-IOs.
*/
if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) {
sig_mutex_unlock(&__aio_mutex);
errno = EAGAIN;
break;
}
/*
* There are outstanding IOs in the thread pool or the kernel
* was notified to return.
* Check pending RAW-IOs first.
*/
if (kerr == 1) {
/*
* _aiodone just notified the kernel about
* completed non RAW-IOs (AIONOTIFY was detected).
*/
if (timedwait == AIO_TIMEOUT_WAIT) {
/* Update remaining timeout for the kernel */
hrtres = hrtend - gethrtime();
if (hrtres <= 0) {
/* timer expired */
sig_mutex_unlock(&__aio_mutex);
errno = EAGAIN;
break;
}
wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
}
_aio_kernel_suspend++;
pthread_cleanup_push(_aio_suspend_cleanup,
&_aio_kernel_suspend);
pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
sig_mutex_unlock(&__aio_mutex);
_cancel_prologue();
kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
list, nent, wait, -1);
_cancel_epilogue();
pthread_cleanup_pop(1);
pthread_cleanup_pop(0);
_aio_kernel_suspend--;
if (!kerr) {
sig_mutex_unlock(&__aio_mutex);
return (0);
}
}
if (timedwait == AIO_TIMEOUT_POLL) {
sig_mutex_unlock(&__aio_mutex);
errno = EAGAIN;
break;
}
if (timedwait == AIO_TIMEOUT_WAIT) {
/* Update remaining timeout */
hrtres = hrtend - gethrtime();
if (hrtres <= 0) {
/* timer expired */
sig_mutex_unlock(&__aio_mutex);
errno = EAGAIN;
break;
}
wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
}
if (_aio_outstand_cnt == 0) {
sig_mutex_unlock(&__aio_mutex);
continue;
}
_aio_suscv_cnt++; /* ID for _aiodone (wake up) */
pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt);
if (timedwait == AIO_TIMEOUT_WAIT) {
cv_err = sig_cond_reltimedwait(&_aio_iowait_cv,
&__aio_mutex, wait);
if (cv_err == ETIME)
cv_err = EAGAIN;
} else {
/* wait indefinitely */
cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex);
}
/* this decrements _aio_suscv_cnt and drops __aio_mutex */
pthread_cleanup_pop(1);
if (cv_err) {
errno = cv_err;
break;
}
}
return (-1);
}
int
aio_suspend(const aiocb_t * const list[], int nent,
const timespec_t *timeout)
{
return (__aio_suspend((void **)list, nent, timeout, 0));
}
int
aio_error(const aiocb_t *aiocbp)
{
const aio_result_t *resultp = &aiocbp->aio_resultp;
aio_req_t *reqp;
int error;
if ((error = resultp->aio_errno) == EINPROGRESS) {
if (aiocbp->aio_state == CHECK) {
/*
* Always do the kaio() call without using the
* KAIO_SUPPORTED() checks because it is not
* mandatory to have a valid fd set in the
* aiocb, only the resultp must be set.
*/
if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) {
errno = EINVAL;
return (-1);
}
error = resultp->aio_errno;
} else if (aiocbp->aio_state == CHECKED) {
((aiocb_t *)aiocbp)->aio_state = CHECK;
}
} else if (aiocbp->aio_state == USERAIO) {
sig_mutex_lock(&__aio_mutex);
if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) {
sig_mutex_unlock(&__aio_mutex);
((aiocb_t *)aiocbp)->aio_state = CHECKED;
} else {
((aiocb_t *)aiocbp)->aio_state = NOCHECK;
ASSERT(reqp->req_head == NULL);
(void) _aio_req_remove(reqp);
sig_mutex_unlock(&__aio_mutex);
_aio_req_free(reqp);
}
}
return (error);
}
ssize_t
aio_return(aiocb_t *aiocbp)
{
aio_result_t *resultp = &aiocbp->aio_resultp;
aio_req_t *reqp;
int error;
ssize_t retval;
/*
* The _aiodone() function stores resultp->aio_return before
* storing resultp->aio_errno (with an membar_producer() in
* between). We use membar_consumer() below to ensure proper
* memory ordering between _aiodone() and ourself.
*/
error = resultp->aio_errno;
membar_consumer();
retval = resultp->aio_return;
/*
* we use this condition to indicate either that
* aio_return() has been called before or should
* not have been called yet.
*/
if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
errno = error;
return (-1);
}
/*
* Before we return, mark the result as being returned so that later
* calls to aio_return() will return the fact that the result has
* already been returned.
*/
sig_mutex_lock(&__aio_mutex);
/* retest, in case more than one thread actually got in here */
if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
sig_mutex_unlock(&__aio_mutex);
errno = EINVAL;
return (-1);
}
resultp->aio_return = -1;
resultp->aio_errno = EINVAL;
if ((reqp = _aio_hash_del(resultp)) == NULL)
sig_mutex_unlock(&__aio_mutex);
else {
aiocbp->aio_state = NOCHECK;
ASSERT(reqp->req_head == NULL);
(void) _aio_req_remove(reqp);
sig_mutex_unlock(&__aio_mutex);
_aio_req_free(reqp);
}
if (retval == -1)
errno = error;
return (retval);
}
void
_lio_remove(aio_req_t *reqp)
{
aio_lio_t *head;
int refcnt;
if ((head = reqp->req_head) != NULL) {
sig_mutex_lock(&head->lio_mutex);
ASSERT(head->lio_refcnt == head->lio_nent);
refcnt = --head->lio_nent;
head->lio_refcnt--;
sig_mutex_unlock(&head->lio_mutex);
if (refcnt == 0)
_aio_lio_free(head);
reqp->req_head = NULL;
}
}
/*
* This function returns the number of asynchronous I/O requests submitted.
*/
static int
__aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
int workerscnt)
{
int i;
int error;
aio_worker_t *next = aiowp;
for (i = 0; i < workerscnt; i++) {
error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
if (error != 0) {
sig_mutex_lock(&head->lio_mutex);
head->lio_mode = LIO_DESTROY; /* ignore fsync */
head->lio_nent -= workerscnt - i;
head->lio_refcnt -= workerscnt - i;
sig_mutex_unlock(&head->lio_mutex);
errno = EAGAIN;
return (i);
}
next = next->work_forw;
}
return (i);
}
int
aio_fsync(int op, aiocb_t *aiocbp)
{
aio_lio_t *head;
struct stat statb;
int fret;
if (aiocbp == NULL)
return (0);
if (op != O_DSYNC && op != O_SYNC) {
errno = EINVAL;
return (-1);
}
if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
errno = EBUSY;
return (-1);
}
if (fstat(aiocbp->aio_fildes, &statb) < 0)
return (-1);
if (_aio_sigev_thread(aiocbp) != 0)
return (-1);
/*
* Kernel aio_fsync() is not supported.
* We force user-level aio_fsync() just
* for the notification side-effect.
*/
if (!__uaio_ok && __uaio_init() == -1)
return (-1);
/*
* The first asynchronous I/O request in the current process will
* create a bunch of workers (via __uaio_init()). If the number
* of workers is zero then the number of pending asynchronous I/O
* requests is zero. In such a case only execute the standard
* fsync(3C) or fdatasync(3RT) as appropriate.
*/
if (__rw_workerscnt == 0) {
if (op == O_DSYNC)
return (__fdsync(aiocbp->aio_fildes, FDSYNC));
else
return (__fdsync(aiocbp->aio_fildes, FSYNC));
}
/*
* re-use aio_offset as the op field.
* O_DSYNC - fdatasync()
* O_SYNC - fsync()
*/
aiocbp->aio_offset = op;
aiocbp->aio_lio_opcode = AIOFSYNC;
/*
* Create a list of fsync requests. The worker that
* gets the last request will do the fsync request.
*/
head = _aio_lio_alloc();
if (head == NULL) {
errno = EAGAIN;
return (-1);
}
head->lio_mode = LIO_FSYNC;
head->lio_nent = head->lio_refcnt = __rw_workerscnt;
head->lio_largefile = 0;
/*
* Insert an fsync request on every worker's queue.
*/
fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt);
if (fret != __rw_workerscnt) {
/*
* Fewer fsync requests than workers means that it was
* not possible to submit fsync requests to all workers.
* Actions:
* a) number of fsync requests submitted is 0:
* => free allocated memory (aio_lio_t).
* b) number of fsync requests submitted is > 0:
* => the last worker executing the fsync request
* will free the aio_lio_t struct.
*/
if (fret == 0)
_aio_lio_free(head);
return (-1);
}
return (0);
}
int
aio_cancel(int fd, aiocb_t *aiocbp)
{
aio_req_t *reqp;
aio_worker_t *aiowp;
int done = 0;
int canceled = 0;
struct stat buf;
if (fstat(fd, &buf) < 0)
return (-1);
if (aiocbp != NULL) {
if (fd != aiocbp->aio_fildes) {
errno = EINVAL;
return (-1);
}
if (aiocbp->aio_state == USERAIO) {
sig_mutex_lock(&__aio_mutex);
reqp = _aio_hash_find(&aiocbp->aio_resultp);
if (reqp == NULL) {
sig_mutex_unlock(&__aio_mutex);
return (AIO_ALLDONE);
}
aiowp = reqp->req_worker;
sig_mutex_lock(&aiowp->work_qlock1);
(void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
sig_mutex_unlock(&aiowp->work_qlock1);
sig_mutex_unlock(&__aio_mutex);
if (done)
return (AIO_ALLDONE);
if (canceled)
return (AIO_CANCELED);
return (AIO_NOTCANCELED);
}
if (aiocbp->aio_state == USERAIO_DONE)
return (AIO_ALLDONE);
return ((int)_kaio(AIOCANCEL, fd, aiocbp));
}
return (aiocancel_all(fd));
}
/*
* __aio_waitn() cancellation handler.
*/
/* ARGSUSED */
static void
_aio_waitn_cleanup(void *arg)
{
ASSERT(MUTEX_HELD(&__aio_mutex));
/* check for pending aio_waitn() calls */
_aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING);
if (_aio_flags & AIO_LIB_WAITN_PENDING) {
_aio_flags &= ~AIO_LIB_WAITN_PENDING;
(void) cond_signal(&_aio_waitn_cv);
}
sig_mutex_unlock(&__aio_mutex);
}
/*
* aio_waitn can be used to reap the results of several I/O operations that
* were submitted asynchronously. The submission of I/Os can be done using
* existing POSIX interfaces: lio_listio, aio_write or aio_read.
* aio_waitn waits until "nwait" I/Os (supplied as a parameter) have
* completed and it returns the descriptors for these I/Os in "list". The
* maximum size of this list is given by "nent" and the actual number of I/Os
* completed is returned in "nwait". Otherwise aio_waitn might also
* return if the timeout expires. Additionally, aio_waitn returns 0 if
* successful or -1 if an error occurred.
*/
static int
__aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo)
{
int error = 0;
uint_t dnwait = 0; /* amount of requests in the waitn-done list */
uint_t kwaitcnt; /* expected "done" requests from kernel */
uint_t knentcnt; /* max. expected "done" requests from kernel */
int uerrno = 0;
int kerrno = 0; /* save errno from _kaio() call */
int timedwait = AIO_TIMEOUT_UNDEF;
aio_req_t *reqp;
timespec_t end;
timespec_t twait; /* copy of utimo for internal calculations */
timespec_t *wait = NULL;
if (nent == 0 || *nwait == 0 || *nwait > nent) {
errno = EINVAL;
return (-1);
}
/*
* Only one running aio_waitn call per process allowed.
* Further calls will be blocked here until the running
* call finishes.
*/
sig_mutex_lock(&__aio_mutex);
while (_aio_flags & AIO_LIB_WAITN) {
if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) {
sig_mutex_unlock(&__aio_mutex);
*nwait = 0;
return (0);
}
_aio_flags |= AIO_LIB_WAITN_PENDING;
pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex);
error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex);
pthread_cleanup_pop(0);
if (error != 0) {
sig_mutex_unlock(&__aio_mutex);
*nwait = 0;
errno = error;
return (-1);
}
}
pthread_cleanup_push(_aio_waitn_cleanup, NULL);
_aio_flags |= AIO_LIB_WAITN;
if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
error = -1;
dnwait = 0;
goto out;
}
if (timedwait != AIO_TIMEOUT_INDEF) {
twait = *utimo;
wait = &twait;
}
/*
* If both counters are still set to zero, then only
* kernel requests are currently outstanding (raw-I/Os).
*/
if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
for (;;) {
kwaitcnt = *nwait - dnwait;
knentcnt = nent - dnwait;
if (knentcnt > AIO_WAITN_MAXIOCBS)
knentcnt = AIO_WAITN_MAXIOCBS;
kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
sig_mutex_unlock(&__aio_mutex);
_cancel_prologue();
error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
&kwaitcnt, wait);
_cancel_epilogue();
pthread_cleanup_pop(1);
if (error == 0) {
dnwait += kwaitcnt;
if (dnwait >= *nwait ||
*nwait < AIO_WAITN_MAXIOCBS)
break;
if (timedwait == AIO_TIMEOUT_WAIT) {
error = _aio_get_timedelta(&end, wait);
if (error == -1) {
/* timer expired */
errno = ETIME;
break;
}
}
continue;
}
if (errno == EAGAIN) {
if (dnwait > 0)
error = 0;
break;
}
if (errno == ETIME || errno == EINTR) {
dnwait += kwaitcnt;
break;
}
/* fatal error */
break;
}
goto out;
}
/* File system I/Os outstanding ... */
if (timedwait == AIO_TIMEOUT_UNDEF) {
if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
error = -1;
dnwait = 0;
goto out;
}
if (timedwait != AIO_TIMEOUT_INDEF) {
twait = *utimo;
wait = &twait;
}
}
for (;;) {
uint_t sum_reqs;
/*
* Calculate sum of active non RAW-IO requests (sum_reqs).
* If the expected amount of completed requests (*nwait) is
* greater than the calculated sum (sum_reqs) then
* use _kaio to check pending RAW-IO requests.
*/
sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt;
kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0;
if (kwaitcnt != 0) {
/* possibly some kernel I/Os outstanding */
knentcnt = nent - dnwait;
if (knentcnt > AIO_WAITN_MAXIOCBS)
knentcnt = AIO_WAITN_MAXIOCBS;
kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
_aio_flags |= AIO_WAIT_INPROGRESS;
pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
sig_mutex_unlock(&__aio_mutex);
_cancel_prologue();
error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
&kwaitcnt, wait);
_cancel_epilogue();
pthread_cleanup_pop(1);
_aio_flags &= ~AIO_WAIT_INPROGRESS;
if (error == 0) {
dnwait += kwaitcnt;
} else {
switch (errno) {
case EINVAL:
case EAGAIN:
/* don't wait for kernel I/Os */
kerrno = 0; /* ignore _kaio() errno */
*nwait = _aio_doneq_cnt +
_aio_outstand_cnt + dnwait;
error = 0;
break;
case EINTR:
case ETIME:
/* just scan for completed LIB I/Os */
dnwait += kwaitcnt;
timedwait = AIO_TIMEOUT_POLL;
kerrno = errno; /* save _kaio() errno */
error = 0;
break;
default:
kerrno = errno; /* save _kaio() errno */
break;
}
}
if (error)
break; /* fatal kernel error */
}
/* check completed FS requests in the "done" queue */
while (_aio_doneq_cnt && dnwait < nent) {
/* get done requests */
if ((reqp = _aio_req_remove(NULL)) != NULL) {
(void) _aio_hash_del(reqp->req_resultp);
list[dnwait++] = reqp->req_aiocbp;
_aio_req_mark_done(reqp);
_lio_remove(reqp);
_aio_req_free(reqp);
}
}
if (dnwait >= *nwait) {
/* min. requested amount of completed I/Os satisfied */
break;
}
if (timedwait == AIO_TIMEOUT_WAIT &&
(error = _aio_get_timedelta(&end, wait)) == -1) {
/* timer expired */
uerrno = ETIME;
break;
}
/*
* If some I/Os are outstanding and we have to wait for them,
* then sleep here. _aiodone() will call _aio_waitn_wakeup()
* to wakeup this thread as soon as the required amount of
* completed I/Os is done.
*/
if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) {
/*
* _aio_waitn_wakeup() will wake up this thread when:
* - _aio_waitncnt requests are completed or
* - _aio_outstand_cnt becomes zero.
* sig_cond_reltimedwait() could also return with
* a timeout error (ETIME).
*/
if (*nwait < _aio_outstand_cnt)
_aio_waitncnt = *nwait;
else
_aio_waitncnt = _aio_outstand_cnt;
_aio_flags |= AIO_IO_WAITING;
if (wait)
uerrno = sig_cond_reltimedwait(&_aio_iowait_cv,
&__aio_mutex, wait);
else
uerrno = sig_cond_wait(&_aio_iowait_cv,
&__aio_mutex);
_aio_flags &= ~AIO_IO_WAITING;
if (uerrno == ETIME) {
timedwait = AIO_TIMEOUT_POLL;
continue;
}
if (uerrno != 0)
timedwait = AIO_TIMEOUT_POLL;
}
if (timedwait == AIO_TIMEOUT_POLL) {
/* polling or timer expired */
break;
}
}
errno = uerrno == 0 ? kerrno : uerrno;
if (errno)
error = -1;
else
error = 0;
out:
*nwait = dnwait;
pthread_cleanup_pop(1); /* drops __aio_mutex */
return (error);
}
int
aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait,
const timespec_t *timeout)
{
return (__aio_waitn((void **)list, nent, nwait, timeout));
}
void
_aio_waitn_wakeup(void)
{
/*
* __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that
* it is waiting for completed I/Os. The number of required
* completed I/Os is stored into "_aio_waitncnt".
* aio_waitn() is woken up when
* - there are no further outstanding I/Os
* (_aio_outstand_cnt == 0) or
* - the expected number of I/Os has completed.
* Only one __aio_waitn() function waits for completed I/Os at
* a time.
*
* __aio_suspend() increments "_aio_suscv_cnt" to notify
* _aiodone() that at least one __aio_suspend() call is
* waiting for completed I/Os.
* There could be more than one __aio_suspend() function
* waiting for completed I/Os. Because every function should
* be waiting for different I/Os, _aiodone() has to wake up all
* __aio_suspend() functions each time.
* Every __aio_suspend() function will compare the recently
* completed I/O with its own list.
*/
ASSERT(MUTEX_HELD(&__aio_mutex));
if (_aio_flags & AIO_IO_WAITING) {
if (_aio_waitncnt > 0)
_aio_waitncnt--;
if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 ||
_aio_suscv_cnt > 0)
(void) cond_broadcast(&_aio_iowait_cv);
} else {
/* Wake up waiting aio_suspend calls */
if (_aio_suscv_cnt > 0)
(void) cond_broadcast(&_aio_iowait_cv);
}
}
/*
* timedwait values :
* AIO_TIMEOUT_POLL : polling
* AIO_TIMEOUT_WAIT : timeout
* AIO_TIMEOUT_INDEF : wait indefinitely
*/
static int
_aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait)
{
struct timeval curtime;
if (utimo) {
if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 ||
utimo->tv_nsec >= NANOSEC) {
errno = EINVAL;
return (-1);
}
if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) {
(void) gettimeofday(&curtime, NULL);
end->tv_sec = utimo->tv_sec + curtime.tv_sec;
end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec;
if (end->tv_nsec >= NANOSEC) {
end->tv_nsec -= NANOSEC;
end->tv_sec += 1;
}
*timedwait = AIO_TIMEOUT_WAIT;
} else {
/* polling */
*timedwait = AIO_TIMEOUT_POLL;
}
} else {
*timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */
}
return (0);
}
#if !defined(_LP64)
int
aio_read64(aiocb64_t *aiocbp)
{
if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
errno = EINVAL;
return (-1);
}
if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
errno = EBUSY;
return (-1);
}
if (_aio_sigev_thread64(aiocbp) != 0)
return (-1);
aiocbp->aio_lio_opcode = LIO_READ;
return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64,
(AIO_KAIO | AIO_NO_DUPS)));
}
int
aio_write64(aiocb64_t *aiocbp)
{
if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
errno = EINVAL;
return (-1);
}
if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
errno = EBUSY;
return (-1);
}
if (_aio_sigev_thread64(aiocbp) != 0)
return (-1);
aiocbp->aio_lio_opcode = LIO_WRITE;
return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64,
(AIO_KAIO | AIO_NO_DUPS)));
}
int
lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
int nent, struct sigevent *_RESTRICT_KYWD sigevp)
{
int aio_ufs = 0;
int oerrno = 0;
aio_lio_t *head = NULL;
aiocb64_t *aiocbp;
int state = 0;
int EIOflg = 0;
int rw;
int do_kaio = 0;
int error;
int i;
if (!_kaio_ok)
_kaio_init();
if (aio_list_max == 0)
aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
if (nent <= 0 || nent > aio_list_max) {
errno = EINVAL;
return (-1);
}
switch (mode) {
case LIO_WAIT:
state = NOCHECK;
break;
case LIO_NOWAIT:
state = CHECK;
break;
default:
errno = EINVAL;
return (-1);
}
for (i = 0; i < nent; i++) {
if ((aiocbp = list[i]) == NULL)
continue;
if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
errno = EBUSY;
return (-1);
}
if (_aio_sigev_thread64(aiocbp) != 0)
return (-1);
if (aiocbp->aio_lio_opcode == LIO_NOP)
aiocbp->aio_state = NOCHECK;
else {
aiocbp->aio_state = state;
if (KAIO_SUPPORTED(aiocbp->aio_fildes))
do_kaio++;
else
aiocbp->aio_resultp.aio_errno = ENOTSUP;
}
}
if (_aio_sigev_thread_init(sigevp) != 0)
return (-1);
if (do_kaio) {
error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp);
if (error == 0)
return (0);
oerrno = errno;
} else {
oerrno = errno = ENOTSUP;
error = -1;
}
if (error == -1 && errno == ENOTSUP) {
error = errno = 0;
/*
* If LIO_WAIT, or notification required, allocate a list head.
*/
if (mode == LIO_WAIT ||
(sigevp != NULL &&
(sigevp->sigev_notify == SIGEV_SIGNAL ||
sigevp->sigev_notify == SIGEV_THREAD ||
sigevp->sigev_notify == SIGEV_PORT)))
head = _aio_lio_alloc();
if (head) {
sig_mutex_lock(&head->lio_mutex);
head->lio_mode = mode;
head->lio_largefile = 1;
if (mode == LIO_NOWAIT && sigevp != NULL) {
if (sigevp->sigev_notify == SIGEV_THREAD) {
head->lio_port = sigevp->sigev_signo;
head->lio_event = AIOLIO64;
head->lio_sigevent = sigevp;
head->lio_sigval.sival_ptr =
sigevp->sigev_value.sival_ptr;
} else if (sigevp->sigev_notify == SIGEV_PORT) {
port_notify_t *pn =
sigevp->sigev_value.sival_ptr;
head->lio_port = pn->portnfy_port;
head->lio_event = AIOLIO64;
head->lio_sigevent = sigevp;
head->lio_sigval.sival_ptr =
pn->portnfy_user;
} else { /* SIGEV_SIGNAL */
head->lio_signo = sigevp->sigev_signo;
head->lio_sigval.sival_ptr =
sigevp->sigev_value.sival_ptr;
}
}
head->lio_nent = head->lio_refcnt = nent;
sig_mutex_unlock(&head->lio_mutex);
}
/*
* find UFS requests, errno == ENOTSUP/EBADFD,
*/
for (i = 0; i < nent; i++) {
if ((aiocbp = list[i]) == NULL ||
aiocbp->aio_lio_opcode == LIO_NOP ||
(aiocbp->aio_resultp.aio_errno != ENOTSUP &&
aiocbp->aio_resultp.aio_errno != EBADFD)) {
if (head)
_lio_list_decr(head);
continue;
}
if (aiocbp->aio_resultp.aio_errno == EBADFD)
SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
if (aiocbp->aio_reqprio != 0) {
aiocbp->aio_resultp.aio_errno = EINVAL;
aiocbp->aio_resultp.aio_return = -1;
EIOflg = 1;
if (head)
_lio_list_decr(head);
continue;
}
/*
* submit an AIO request with flags AIO_NO_KAIO
* to avoid the kaio() syscall in _aio_rw()
*/
switch (aiocbp->aio_lio_opcode) {
case LIO_READ:
rw = AIOAREAD64;
break;
case LIO_WRITE:
rw = AIOAWRITE64;
break;
}
error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw,
(AIO_NO_KAIO | AIO_NO_DUPS));
if (error == 0)
aio_ufs++;
else {
if (head)
_lio_list_decr(head);
aiocbp->aio_resultp.aio_errno = error;
EIOflg = 1;
}
}
}
if (EIOflg) {
errno = EIO;
return (-1);
}
if (mode == LIO_WAIT && oerrno == ENOTSUP) {
/*
* call kaio(AIOLIOWAIT) to get all outstanding
* kernel AIO requests
*/
if ((nent - aio_ufs) > 0)
(void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
if (head != NULL && head->lio_nent > 0) {
sig_mutex_lock(&head->lio_mutex);
while (head->lio_refcnt > 0) {
int err;
head->lio_waiting = 1;
pthread_cleanup_push(_lio_listio_cleanup, head);
err = sig_cond_wait(&head->lio_cond_cv,
&head->lio_mutex);
pthread_cleanup_pop(0);
head->lio_waiting = 0;
if (err && head->lio_nent > 0) {
sig_mutex_unlock(&head->lio_mutex);
errno = err;
return (-1);
}
}
sig_mutex_unlock(&head->lio_mutex);
ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
_aio_lio_free(head);
for (i = 0; i < nent; i++) {
if ((aiocbp = list[i]) != NULL &&
aiocbp->aio_resultp.aio_errno) {
errno = EIO;
return (-1);
}
}
}
return (0);
}
return (error);
}
int
aio_suspend64(const aiocb64_t * const list[], int nent,
const timespec_t *timeout)
{
return (__aio_suspend((void **)list, nent, timeout, 1));
}
int
aio_error64(const aiocb64_t *aiocbp)
{
const aio_result_t *resultp = &aiocbp->aio_resultp;
int error;
if ((error = resultp->aio_errno) == EINPROGRESS) {
if (aiocbp->aio_state == CHECK) {
/*
* Always do the kaio() call without using the
* KAIO_SUPPORTED() checks because it is not
* mandatory to have a valid fd set in the
* aiocb, only the resultp must be set.
*/
if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) {
errno = EINVAL;
return (-1);
}
error = resultp->aio_errno;
} else if (aiocbp->aio_state == CHECKED) {
((aiocb64_t *)aiocbp)->aio_state = CHECK;
}
}
return (error);
}
ssize_t
aio_return64(aiocb64_t *aiocbp)
{
aio_result_t *resultp = &aiocbp->aio_resultp;
aio_req_t *reqp;
int error;
ssize_t retval;
/*
* The _aiodone() function stores resultp->aio_return before
* storing resultp->aio_errno (with an membar_producer() in
* between). We use membar_consumer() below to ensure proper
* memory ordering between _aiodone() and ourself.
*/
error = resultp->aio_errno;
membar_consumer();
retval = resultp->aio_return;
/*
* we use this condition to indicate either that
* aio_return() has been called before or should
* not have been called yet.
*/
if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
errno = error;
return (-1);
}
/*
* Before we return, mark the result as being returned so that later
* calls to aio_return() will return the fact that the result has
* already been returned.
*/
sig_mutex_lock(&__aio_mutex);
/* retest, in case more than one thread actually got in here */
if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
sig_mutex_unlock(&__aio_mutex);
errno = EINVAL;
return (-1);
}
resultp->aio_return = -1;
resultp->aio_errno = EINVAL;
if ((reqp = _aio_hash_del(resultp)) == NULL)
sig_mutex_unlock(&__aio_mutex);
else {
aiocbp->aio_state = NOCHECK;
ASSERT(reqp->req_head == NULL);
(void) _aio_req_remove(reqp);
sig_mutex_unlock(&__aio_mutex);
_aio_req_free(reqp);
}
if (retval == -1)
errno = error;
return (retval);
}
static int
__aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
int workerscnt)
{
int i;
int error;
aio_worker_t *next = aiowp;
for (i = 0; i < workerscnt; i++) {
error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
if (error != 0) {
sig_mutex_lock(&head->lio_mutex);
head->lio_mode = LIO_DESTROY; /* ignore fsync */
head->lio_nent -= workerscnt - i;
head->lio_refcnt -= workerscnt - i;
sig_mutex_unlock(&head->lio_mutex);
errno = EAGAIN;
return (i);
}
next = next->work_forw;
}
return (i);
}
int
aio_fsync64(int op, aiocb64_t *aiocbp)
{
aio_lio_t *head;
struct stat64 statb;
int fret;
if (aiocbp == NULL)
return (0);
if (op != O_DSYNC && op != O_SYNC) {
errno = EINVAL;
return (-1);
}
if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
errno = EBUSY;
return (-1);
}
if (fstat64(aiocbp->aio_fildes, &statb) < 0)
return (-1);
if (_aio_sigev_thread64(aiocbp) != 0)
return (-1);
/*
* Kernel aio_fsync() is not supported.
* We force user-level aio_fsync() just
* for the notification side-effect.
*/
if (!__uaio_ok && __uaio_init() == -1)
return (-1);
/*
* The first asynchronous I/O request in the current process will
* create a bunch of workers (via __uaio_init()). If the number
* of workers is zero then the number of pending asynchronous I/O
* requests is zero. In such a case only execute the standard
* fsync(3C) or fdatasync(3RT) as appropriate.
*/
if (__rw_workerscnt == 0) {
if (op == O_DSYNC)
return (__fdsync(aiocbp->aio_fildes, FDSYNC));
else
return (__fdsync(aiocbp->aio_fildes, FSYNC));
}
/*
* re-use aio_offset as the op field.
* O_DSYNC - fdatasync()
* O_SYNC - fsync()
*/
aiocbp->aio_offset = op;
aiocbp->aio_lio_opcode = AIOFSYNC;
/*
* Create a list of fsync requests. The worker that
* gets the last request will do the fsync request.
*/
head = _aio_lio_alloc();
if (head == NULL) {
errno = EAGAIN;
return (-1);
}
head->lio_mode = LIO_FSYNC;
head->lio_nent = head->lio_refcnt = __rw_workerscnt;
head->lio_largefile = 1;
/*
* Insert an fsync request on every worker's queue.
*/
fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt);
if (fret != __rw_workerscnt) {
/*
* Fewer fsync requests than workers means that it was
* not possible to submit fsync requests to all workers.
* Actions:
* a) number of fsync requests submitted is 0:
* => free allocated memory (aio_lio_t).
* b) number of fsync requests submitted is > 0:
* => the last worker executing the fsync request
* will free the aio_lio_t struct.
*/
if (fret == 0)
_aio_lio_free(head);
return (-1);
}
return (0);
}
int
aio_cancel64(int fd, aiocb64_t *aiocbp)
{
aio_req_t *reqp;
aio_worker_t *aiowp;
int done = 0;
int canceled = 0;
struct stat64 buf;
if (fstat64(fd, &buf) < 0)
return (-1);
if (aiocbp != NULL) {
if (fd != aiocbp->aio_fildes) {
errno = EINVAL;
return (-1);
}
if (aiocbp->aio_state == USERAIO) {
sig_mutex_lock(&__aio_mutex);
reqp = _aio_hash_find(&aiocbp->aio_resultp);
if (reqp == NULL) {
sig_mutex_unlock(&__aio_mutex);
return (AIO_ALLDONE);
}
aiowp = reqp->req_worker;
sig_mutex_lock(&aiowp->work_qlock1);
(void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
sig_mutex_unlock(&aiowp->work_qlock1);
sig_mutex_unlock(&__aio_mutex);
if (done)
return (AIO_ALLDONE);
if (canceled)
return (AIO_CANCELED);
return (AIO_NOTCANCELED);
}
if (aiocbp->aio_state == USERAIO_DONE)
return (AIO_ALLDONE);
return ((int)_kaio(AIOCANCEL, fd, aiocbp));
}
return (aiocancel_all(fd));
}
int
aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait,
const timespec_t *timeout)
{
return (__aio_waitn((void **)list, nent, nwait, timeout));
}
#endif /* !defined(_LP64) */