2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A
2N/A/*
2N/A * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
2N/A */
2N/A
2N/A/*
2N/A * posix_aio.c implements the POSIX async. I/O functions.
2N/A *
2N/A * aio_read
2N/A * aio_write
2N/A * aio_error
2N/A * aio_return
2N/A * aio_suspend
2N/A * lio_listio
2N/A * aio_fsync
2N/A * aio_cancel
2N/A */
2N/A
2N/A#include "lint.h"
2N/A#include "thr_uberdata.h"
2N/A#include "asyncio.h"
2N/A#include <atomic.h>
2N/A#include <sys/file.h>
2N/A#include <sys/port.h>
2N/A
2N/Aextern int __fdsync(int, int);
2N/A
2N/Acond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */
2N/A
2N/Astatic int _aio_check_timeout(const timespec_t *, timespec_t *, int *);
2N/A
2N/A/* defines for timedwait in __aio_waitn() and __aio_suspend() */
2N/A#define AIO_TIMEOUT_INDEF -1
2N/A#define AIO_TIMEOUT_POLL 0
2N/A#define AIO_TIMEOUT_WAIT 1
2N/A#define AIO_TIMEOUT_UNDEF 2
2N/A
2N/A/* value in aio_returned to indicate that aio_return() has *not* been called */
2N/A#define AIO_NOTRETURNED 0x6f
2N/A
2N/A/*
2N/A * List I/O stuff
2N/A */
2N/Astatic void _lio_list_decr(aio_lio_t *);
2N/Astatic long aio_list_max = 0;
2N/A
2N/Aint
2N/Aaio_read(aiocb_t *aiocbp)
2N/A{
2N/A if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
2N/A errno = EBUSY;
2N/A return (-1);
2N/A }
2N/A if (_aio_sigev_thread(aiocbp) != 0)
2N/A return (-1);
2N/A aiocbp->aio_returned = AIO_NOTRETURNED;
2N/A aiocbp->aio_lio_opcode = LIO_READ;
2N/A return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD,
2N/A (AIO_KAIO | AIO_NO_DUPS)));
2N/A}
2N/A
2N/Aint
2N/Aaio_write(aiocb_t *aiocbp)
2N/A{
2N/A if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
2N/A errno = EBUSY;
2N/A return (-1);
2N/A }
2N/A if (_aio_sigev_thread(aiocbp) != 0)
2N/A return (-1);
2N/A aiocbp->aio_returned = AIO_NOTRETURNED;
2N/A aiocbp->aio_lio_opcode = LIO_WRITE;
2N/A return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE,
2N/A (AIO_KAIO | AIO_NO_DUPS)));
2N/A}
2N/A
2N/A/*
2N/A * __lio_listio() cancellation handler.
2N/A */
2N/A/* ARGSUSED */
2N/Astatic void
2N/A_lio_listio_cleanup(aio_lio_t *head)
2N/A{
2N/A int freeit = 0;
2N/A
2N/A ASSERT(MUTEX_HELD(&head->lio_mutex));
2N/A if (head->lio_refcnt == 0) {
2N/A ASSERT(head->lio_nent == 0);
2N/A freeit = 1;
2N/A }
2N/A head->lio_waiting = 0;
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A if (freeit)
2N/A _aio_lio_free(head);
2N/A}
2N/A
2N/Aint
2N/Alio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
2N/A int nent, struct sigevent *_RESTRICT_KYWD sigevp)
2N/A{
2N/A int aio_ufs = 0;
2N/A int oerrno = 0;
2N/A aio_lio_t *head = NULL;
2N/A aiocb_t *aiocbp;
2N/A int state = 0;
2N/A int EIOflg = 0;
2N/A int rw;
2N/A int do_kaio = 0;
2N/A int error;
2N/A int i;
2N/A
2N/A if (!_kaio_ok)
2N/A _kaio_init();
2N/A
2N/A if (aio_list_max == 0)
2N/A aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
2N/A
2N/A if (nent <= 0 || nent > aio_list_max) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A
2N/A switch (mode) {
2N/A case LIO_WAIT:
2N/A state = NOCHECK;
2N/A break;
2N/A case LIO_NOWAIT:
2N/A state = CHECK;
2N/A break;
2N/A default:
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A
2N/A for (i = 0; i < nent; i++) {
2N/A if ((aiocbp = list[i]) == NULL)
2N/A continue;
2N/A if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
2N/A errno = EBUSY;
2N/A return (-1);
2N/A }
2N/A if (_aio_sigev_thread(aiocbp) != 0)
2N/A return (-1);
2N/A aiocbp->aio_returned = AIO_NOTRETURNED;
2N/A if (aiocbp->aio_lio_opcode == LIO_NOP)
2N/A aiocbp->aio_state = NOCHECK;
2N/A else {
2N/A aiocbp->aio_state = state;
2N/A if (KAIO_SUPPORTED(aiocbp->aio_fildes))
2N/A do_kaio++;
2N/A else
2N/A aiocbp->aio_resultp.aio_errno = ENOTSUP;
2N/A }
2N/A }
2N/A if (_aio_sigev_thread_init(sigevp) != 0)
2N/A return (-1);
2N/A
2N/A if (do_kaio) {
2N/A error = (int)_kaio(AIOLIO, mode, list, nent, sigevp);
2N/A if (error == 0)
2N/A return (0);
2N/A oerrno = errno;
2N/A } else {
2N/A oerrno = errno = ENOTSUP;
2N/A error = -1;
2N/A }
2N/A
2N/A if (error == -1 && errno == ENOTSUP) {
2N/A error = errno = 0;
2N/A /*
2N/A * If LIO_WAIT, or notification required, allocate a list head.
2N/A */
2N/A if (mode == LIO_WAIT ||
2N/A (sigevp != NULL &&
2N/A (sigevp->sigev_notify == SIGEV_SIGNAL ||
2N/A sigevp->sigev_notify == SIGEV_THREAD ||
2N/A sigevp->sigev_notify == SIGEV_PORT)))
2N/A head = _aio_lio_alloc();
2N/A if (head) {
2N/A sig_mutex_lock(&head->lio_mutex);
2N/A head->lio_mode = mode;
2N/A head->lio_largefile = 0;
2N/A if (mode == LIO_NOWAIT && sigevp != NULL) {
2N/A if (sigevp->sigev_notify == SIGEV_THREAD) {
2N/A head->lio_port = sigevp->sigev_signo;
2N/A head->lio_event = AIOLIO;
2N/A head->lio_sigevent = sigevp;
2N/A head->lio_sigval.sival_ptr =
2N/A sigevp->sigev_value.sival_ptr;
2N/A } else if (sigevp->sigev_notify == SIGEV_PORT) {
2N/A port_notify_t *pn =
2N/A sigevp->sigev_value.sival_ptr;
2N/A head->lio_port = pn->portnfy_port;
2N/A head->lio_event = AIOLIO;
2N/A head->lio_sigevent = sigevp;
2N/A head->lio_sigval.sival_ptr =
2N/A pn->portnfy_user;
2N/A } else { /* SIGEV_SIGNAL */
2N/A head->lio_signo = sigevp->sigev_signo;
2N/A head->lio_sigval.sival_ptr =
2N/A sigevp->sigev_value.sival_ptr;
2N/A }
2N/A }
2N/A head->lio_nent = head->lio_refcnt = nent;
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A }
2N/A /*
2N/A * find UFS requests, errno == ENOTSUP/EBADFD,
2N/A */
2N/A for (i = 0; i < nent; i++) {
2N/A if ((aiocbp = list[i]) == NULL ||
2N/A aiocbp->aio_lio_opcode == LIO_NOP ||
2N/A (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
2N/A aiocbp->aio_resultp.aio_errno != EBADFD)) {
2N/A if (head)
2N/A _lio_list_decr(head);
2N/A continue;
2N/A }
2N/A if (aiocbp->aio_resultp.aio_errno == EBADFD)
2N/A SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
2N/A if (aiocbp->aio_reqprio != 0) {
2N/A aiocbp->aio_resultp.aio_errno = EINVAL;
2N/A aiocbp->aio_resultp.aio_return = -1;
2N/A EIOflg = 1;
2N/A if (head)
2N/A _lio_list_decr(head);
2N/A continue;
2N/A }
2N/A /*
2N/A * submit an AIO request with flags AIO_NO_KAIO
2N/A * to avoid the kaio() syscall in _aio_rw()
2N/A */
2N/A switch (aiocbp->aio_lio_opcode) {
2N/A case LIO_READ:
2N/A rw = AIOAREAD;
2N/A break;
2N/A case LIO_WRITE:
2N/A rw = AIOAWRITE;
2N/A break;
2N/A }
2N/A error = _aio_rw(aiocbp, head, &__nextworker_rw, rw,
2N/A (AIO_NO_KAIO | AIO_NO_DUPS));
2N/A if (error == 0)
2N/A aio_ufs++;
2N/A else {
2N/A if (head)
2N/A _lio_list_decr(head);
2N/A aiocbp->aio_resultp.aio_errno = error;
2N/A EIOflg = 1;
2N/A }
2N/A }
2N/A }
2N/A if (EIOflg) {
2N/A errno = EIO;
2N/A return (-1);
2N/A }
2N/A if (mode == LIO_WAIT && oerrno == ENOTSUP) {
2N/A /*
2N/A * call kaio(AIOLIOWAIT) to get all outstanding
2N/A * kernel AIO requests
2N/A */
2N/A if ((nent - aio_ufs) > 0)
2N/A (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
2N/A if (head != NULL && head->lio_nent > 0) {
2N/A sig_mutex_lock(&head->lio_mutex);
2N/A while (head->lio_refcnt > 0) {
2N/A int err;
2N/A head->lio_waiting = 1;
2N/A pthread_cleanup_push(_lio_listio_cleanup, head);
2N/A err = sig_cond_wait(&head->lio_cond_cv,
2N/A &head->lio_mutex);
2N/A pthread_cleanup_pop(0);
2N/A head->lio_waiting = 0;
2N/A if (err && head->lio_nent > 0) {
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A errno = err;
2N/A return (-1);
2N/A }
2N/A }
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
2N/A _aio_lio_free(head);
2N/A for (i = 0; i < nent; i++) {
2N/A if ((aiocbp = list[i]) != NULL &&
2N/A aiocbp->aio_resultp.aio_errno) {
2N/A errno = EIO;
2N/A return (-1);
2N/A }
2N/A }
2N/A }
2N/A return (0);
2N/A }
2N/A return (error);
2N/A}
2N/A
2N/Astatic void
2N/A_lio_list_decr(aio_lio_t *head)
2N/A{
2N/A sig_mutex_lock(&head->lio_mutex);
2N/A head->lio_nent--;
2N/A head->lio_refcnt--;
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A}
2N/A
2N/A/*
2N/A * __aio_suspend() cancellation handler.
2N/A */
2N/A/* ARGSUSED */
2N/Astatic void
2N/A_aio_suspend_cleanup(int *counter)
2N/A{
2N/A ASSERT(MUTEX_HELD(&__aio_mutex));
2N/A (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A}
2N/A
2N/Astatic int
2N/A__aio_suspend(void **list, int nent, const timespec_t *timo, int largefile)
2N/A{
2N/A int cv_err; /* error code from cond_xxx() */
2N/A int kerr; /* error code from _kaio(AIOSUSPEND) */
2N/A int i;
2N/A timespec_t twait; /* copy of timo for internal calculations */
2N/A timespec_t *wait = NULL;
2N/A int timedwait;
2N/A int req_outstanding;
2N/A aiocb_t **listp;
2N/A aiocb_t *aiocbp;
2N/A#if !defined(_LP64)
2N/A aiocb64_t **listp64;
2N/A aiocb64_t *aiocbp64;
2N/A#endif
2N/A hrtime_t hrtstart;
2N/A hrtime_t hrtend;
2N/A hrtime_t hrtres;
2N/A
2N/A#if defined(_LP64)
2N/A if (largefile)
2N/A aio_panic("__aio_suspend: largefile set when _LP64 defined");
2N/A#endif
2N/A
2N/A if (nent <= 0) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A
2N/A if (timo) {
2N/A if (timo->tv_sec < 0 || timo->tv_nsec < 0 ||
2N/A timo->tv_nsec >= NANOSEC) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A /* Initialize start time if time monitoring desired */
2N/A if (timo->tv_sec > 0 || timo->tv_nsec > 0) {
2N/A timedwait = AIO_TIMEOUT_WAIT;
2N/A hrtstart = gethrtime();
2N/A } else {
2N/A /* content of timeout = 0 : polling */
2N/A timedwait = AIO_TIMEOUT_POLL;
2N/A }
2N/A } else {
2N/A /* timeout pointer = NULL : wait indefinitely */
2N/A timedwait = AIO_TIMEOUT_INDEF;
2N/A }
2N/A
2N/A#if !defined(_LP64)
2N/A if (largefile) {
2N/A listp64 = (aiocb64_t **)list;
2N/A for (i = 0; i < nent; i++) {
2N/A if ((aiocbp64 = listp64[i]) != NULL &&
2N/A aiocbp64->aio_state == CHECK)
2N/A aiocbp64->aio_state = CHECKED;
2N/A }
2N/A } else
2N/A#endif /* !_LP64 */
2N/A {
2N/A listp = (aiocb_t **)list;
2N/A for (i = 0; i < nent; i++) {
2N/A if ((aiocbp = listp[i]) != NULL &&
2N/A aiocbp->aio_state == CHECK)
2N/A aiocbp->aio_state = CHECKED;
2N/A }
2N/A }
2N/A
2N/A sig_mutex_lock(&__aio_mutex);
2N/A
2N/A /*
2N/A * The next "if -case" is required to accelerate the
2N/A * access to completed RAW-IO requests.
2N/A */
2N/A if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
2N/A /* Only kernel requests pending */
2N/A
2N/A /*
2N/A * _aio_kernel_suspend is used to detect completed non RAW-IO
2N/A * requests.
2N/A * As long as this thread resides in the kernel (_kaio) further
2N/A * asynchronous non RAW-IO requests could be submitted.
2N/A */
2N/A _aio_kernel_suspend++;
2N/A
2N/A /*
2N/A * Always do the kaio() call without using the KAIO_SUPPORTED()
2N/A * checks because it is not mandatory to have a valid fd
2N/A * set in the list entries, only the resultp must be set.
2N/A *
2N/A * _kaio(AIOSUSPEND ...) return values :
2N/A * 0: everythink ok, completed request found
2N/A * -1: error
2N/A * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,)
2N/A * system call using _kaio(AIONOTIFY). It means, that some
2N/A * non RAW-IOs completed inbetween.
2N/A */
2N/A
2N/A pthread_cleanup_push(_aio_suspend_cleanup,
2N/A &_aio_kernel_suspend);
2N/A pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A _cancel_prologue();
2N/A kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
2N/A list, nent, timo, -1);
2N/A _cancel_epilogue();
2N/A pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */
2N/A pthread_cleanup_pop(0);
2N/A
2N/A _aio_kernel_suspend--;
2N/A
2N/A if (!kerr) {
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A return (0);
2N/A }
2N/A } else {
2N/A kerr = 1; /* simulation: _kaio detected AIONOTIFY */
2N/A }
2N/A
2N/A /*
2N/A * Return kernel error code if no other IOs are outstanding.
2N/A */
2N/A req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt;
2N/A
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A
2N/A if (req_outstanding == 0) {
2N/A /* no IOs outstanding in the thread pool */
2N/A if (kerr == 1)
2N/A /* return "no IOs completed" */
2N/A errno = EAGAIN;
2N/A return (-1);
2N/A }
2N/A
2N/A /*
2N/A * IOs using the thread pool are outstanding.
2N/A */
2N/A if (timedwait == AIO_TIMEOUT_WAIT) {
2N/A /* time monitoring */
2N/A hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC +
2N/A (hrtime_t)timo->tv_nsec;
2N/A hrtres = hrtend - gethrtime();
2N/A if (hrtres <= 0)
2N/A hrtres = 1;
2N/A twait.tv_sec = hrtres / (hrtime_t)NANOSEC;
2N/A twait.tv_nsec = hrtres % (hrtime_t)NANOSEC;
2N/A wait = &twait;
2N/A } else if (timedwait == AIO_TIMEOUT_POLL) {
2N/A twait = *timo; /* content of timo = 0 : polling */
2N/A wait = &twait;
2N/A }
2N/A
2N/A for (;;) {
2N/A int error;
2N/A int inprogress;
2N/A
2N/A /* first scan file system requests */
2N/A inprogress = 0;
2N/A for (i = 0; i < nent; i++) {
2N/A#if !defined(_LP64)
2N/A if (largefile) {
2N/A if ((aiocbp64 = listp64[i]) == NULL)
2N/A continue;
2N/A error = aiocbp64->aio_resultp.aio_errno;
2N/A } else
2N/A#endif
2N/A {
2N/A if ((aiocbp = listp[i]) == NULL)
2N/A continue;
2N/A error = aiocbp->aio_resultp.aio_errno;
2N/A }
2N/A if (error == EINPROGRESS)
2N/A inprogress = 1;
2N/A else if (error != ECANCELED) {
2N/A errno = 0;
2N/A return (0);
2N/A }
2N/A }
2N/A
2N/A sig_mutex_lock(&__aio_mutex);
2N/A
2N/A /*
2N/A * If there aren't outstanding I/Os in the thread pool then
2N/A * we have to return here, provided that all kernel RAW-IOs
2N/A * also completed.
2N/A * If the kernel was notified to return, then we have to check
2N/A * possible pending RAW-IOs.
2N/A */
2N/A if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) {
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A errno = EAGAIN;
2N/A break;
2N/A }
2N/A
2N/A /*
2N/A * There are outstanding IOs in the thread pool or the kernel
2N/A * was notified to return.
2N/A * Check pending RAW-IOs first.
2N/A */
2N/A if (kerr == 1) {
2N/A /*
2N/A * _aiodone just notified the kernel about
2N/A * completed non RAW-IOs (AIONOTIFY was detected).
2N/A */
2N/A if (timedwait == AIO_TIMEOUT_WAIT) {
2N/A /* Update remaining timeout for the kernel */
2N/A hrtres = hrtend - gethrtime();
2N/A if (hrtres <= 0) {
2N/A /* timer expired */
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A errno = EAGAIN;
2N/A break;
2N/A }
2N/A wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
2N/A wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
2N/A }
2N/A _aio_kernel_suspend++;
2N/A
2N/A pthread_cleanup_push(_aio_suspend_cleanup,
2N/A &_aio_kernel_suspend);
2N/A pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A _cancel_prologue();
2N/A kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
2N/A list, nent, wait, -1);
2N/A _cancel_epilogue();
2N/A pthread_cleanup_pop(1);
2N/A pthread_cleanup_pop(0);
2N/A
2N/A _aio_kernel_suspend--;
2N/A
2N/A if (!kerr) {
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A return (0);
2N/A }
2N/A }
2N/A
2N/A if (timedwait == AIO_TIMEOUT_POLL) {
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A errno = EAGAIN;
2N/A break;
2N/A }
2N/A
2N/A if (timedwait == AIO_TIMEOUT_WAIT) {
2N/A /* Update remaining timeout */
2N/A hrtres = hrtend - gethrtime();
2N/A if (hrtres <= 0) {
2N/A /* timer expired */
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A errno = EAGAIN;
2N/A break;
2N/A }
2N/A wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
2N/A wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
2N/A }
2N/A
2N/A if (_aio_outstand_cnt == 0) {
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A continue;
2N/A }
2N/A
2N/A _aio_suscv_cnt++; /* ID for _aiodone (wake up) */
2N/A
2N/A pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt);
2N/A if (timedwait == AIO_TIMEOUT_WAIT) {
2N/A cv_err = sig_cond_reltimedwait(&_aio_iowait_cv,
2N/A &__aio_mutex, wait);
2N/A if (cv_err == ETIME)
2N/A cv_err = EAGAIN;
2N/A } else {
2N/A /* wait indefinitely */
2N/A cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex);
2N/A }
2N/A /* this decrements _aio_suscv_cnt and drops __aio_mutex */
2N/A pthread_cleanup_pop(1);
2N/A
2N/A if (cv_err) {
2N/A errno = cv_err;
2N/A break;
2N/A }
2N/A }
2N/A return (-1);
2N/A}
2N/A
2N/Aint
2N/Aaio_suspend(const aiocb_t * const list[], int nent,
2N/A const timespec_t *timeout)
2N/A{
2N/A return (__aio_suspend((void **)list, nent, timeout, 0));
2N/A}
2N/A
2N/Aint
2N/Aaio_error(const aiocb_t *aiocbp)
2N/A{
2N/A const aio_result_t *resultp = &aiocbp->aio_resultp;
2N/A aio_req_t *reqp;
2N/A int error;
2N/A
2N/A if ((error = resultp->aio_errno) == EINPROGRESS) {
2N/A if (aiocbp->aio_state == CHECK) {
2N/A /*
2N/A * Always do the kaio() call without using the
2N/A * KAIO_SUPPORTED() checks because it is not
2N/A * mandatory to have a valid fd set in the
2N/A * aiocb, only the resultp must be set.
2N/A */
2N/A if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A error = resultp->aio_errno;
2N/A } else if (aiocbp->aio_state == CHECKED) {
2N/A ((aiocb_t *)aiocbp)->aio_state = CHECK;
2N/A }
2N/A if (error == EINPROGRESS)
2N/A return (EINPROGRESS);
2N/A }
2N/A sig_mutex_lock(&__aio_mutex);
2N/A if (aiocbp->aio_returned != AIO_NOTRETURNED) {
2N/A /* aio_return() was called or aiocb is uninitialized */
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) {
2N/A ((aiocb_t *)aiocbp)->aio_state = CHECKED;
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A } else {
2N/A ((aiocb_t *)aiocbp)->aio_state = NOCHECK;
2N/A ASSERT(reqp->req_head == NULL);
2N/A (void) _aio_req_remove(reqp);
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A _aio_req_free(reqp);
2N/A }
2N/A return (error);
2N/A}
2N/A
2N/Assize_t
2N/Aaio_return(aiocb_t *aiocbp)
2N/A{
2N/A aio_result_t *resultp = &aiocbp->aio_resultp;
2N/A aio_req_t *reqp;
2N/A int error;
2N/A ssize_t retval;
2N/A
2N/A /*
2N/A * The _aiodone() function stores resultp->aio_return before
2N/A * storing resultp->aio_errno (with an membar_producer() in
2N/A * between). We use membar_consumer() below to ensure proper
2N/A * memory ordering between _aiodone() and ourself.
2N/A */
2N/A error = resultp->aio_errno;
2N/A membar_consumer();
2N/A retval = resultp->aio_return;
2N/A
2N/A if (error == EINPROGRESS) {
2N/A errno = EINPROGRESS;
2N/A return (-1);
2N/A }
2N/A
2N/A /*
2N/A * Before we return, mark the result as being returned so that later
2N/A * calls to aio_return() or aio_error() will return the fact that
2N/A * the result has already been returned.
2N/A */
2N/A sig_mutex_lock(&__aio_mutex);
2N/A if (aiocbp->aio_returned != AIO_NOTRETURNED) {
2N/A /* aio_return() already called or aiocb is uninitialized */
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A aiocbp->aio_returned = 0;
2N/A if ((reqp = _aio_hash_del(resultp)) == NULL)
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A else {
2N/A aiocbp->aio_state = NOCHECK;
2N/A ASSERT(reqp->req_head == NULL);
2N/A (void) _aio_req_remove(reqp);
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A _aio_req_free(reqp);
2N/A }
2N/A
2N/A if (retval == -1)
2N/A errno = error;
2N/A return (retval);
2N/A}
2N/A
2N/Avoid
2N/A_lio_remove(aio_req_t *reqp)
2N/A{
2N/A aio_lio_t *head;
2N/A int refcnt;
2N/A
2N/A if ((head = reqp->req_head) != NULL) {
2N/A sig_mutex_lock(&head->lio_mutex);
2N/A ASSERT(head->lio_refcnt == head->lio_nent);
2N/A refcnt = --head->lio_nent;
2N/A head->lio_refcnt--;
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A if (refcnt == 0)
2N/A _aio_lio_free(head);
2N/A reqp->req_head = NULL;
2N/A }
2N/A}
2N/A
2N/A/*
2N/A * This function returns the number of asynchronous I/O requests submitted.
2N/A */
2N/Astatic int
2N/A__aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
2N/A int workerscnt)
2N/A{
2N/A int i;
2N/A int error;
2N/A aio_worker_t *next = aiowp;
2N/A
2N/A for (i = 0; i < workerscnt; i++) {
2N/A error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
2N/A if (error != 0) {
2N/A sig_mutex_lock(&head->lio_mutex);
2N/A head->lio_mode = LIO_DESTROY; /* ignore fsync */
2N/A head->lio_nent -= workerscnt - i;
2N/A head->lio_refcnt -= workerscnt - i;
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A errno = EAGAIN;
2N/A return (i);
2N/A }
2N/A next = next->work_forw;
2N/A }
2N/A return (i);
2N/A}
2N/A
2N/Aint
2N/Aaio_fsync(int op, aiocb_t *aiocbp)
2N/A{
2N/A aio_lio_t *head;
2N/A struct stat statb;
2N/A int fret;
2N/A
2N/A if (aiocbp == NULL)
2N/A return (0);
2N/A if (op != O_DSYNC && op != O_SYNC) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
2N/A errno = EBUSY;
2N/A return (-1);
2N/A }
2N/A if (fstat(aiocbp->aio_fildes, &statb) < 0)
2N/A return (-1);
2N/A if (_aio_sigev_thread(aiocbp) != 0)
2N/A return (-1);
2N/A aiocbp->aio_returned = AIO_NOTRETURNED;
2N/A
2N/A /*
2N/A * Kernel aio_fsync() is not supported.
2N/A * We force user-level aio_fsync() just
2N/A * for the notification side-effect.
2N/A */
2N/A if (!__uaio_ok && __uaio_init() == -1)
2N/A return (-1);
2N/A
2N/A /*
2N/A * The first asynchronous I/O request in the current process will
2N/A * create a bunch of workers (via __uaio_init()). If the number
2N/A * of workers is zero then the number of pending asynchronous I/O
2N/A * requests is zero. In such a case only execute the standard
2N/A * fsync(3C) or fdatasync(3RT) as appropriate.
2N/A */
2N/A if (__rw_workerscnt == 0) {
2N/A if (op == O_DSYNC)
2N/A return (__fdsync(aiocbp->aio_fildes, FDSYNC));
2N/A else
2N/A return (__fdsync(aiocbp->aio_fildes, FSYNC));
2N/A }
2N/A
2N/A /*
2N/A * re-use aio_offset as the op field.
2N/A * O_DSYNC - fdatasync()
2N/A * O_SYNC - fsync()
2N/A */
2N/A aiocbp->aio_offset = op;
2N/A aiocbp->aio_lio_opcode = AIOFSYNC;
2N/A
2N/A /*
2N/A * Create a list of fsync requests. The worker that
2N/A * gets the last request will do the fsync request.
2N/A */
2N/A head = _aio_lio_alloc();
2N/A if (head == NULL) {
2N/A errno = EAGAIN;
2N/A return (-1);
2N/A }
2N/A head->lio_mode = LIO_FSYNC;
2N/A head->lio_nent = head->lio_refcnt = __rw_workerscnt;
2N/A head->lio_largefile = 0;
2N/A
2N/A /*
2N/A * Insert an fsync request on every worker's queue.
2N/A */
2N/A fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt);
2N/A if (fret != __rw_workerscnt) {
2N/A /*
2N/A * Fewer fsync requests than workers means that it was
2N/A * not possible to submit fsync requests to all workers.
2N/A * Actions:
2N/A * a) number of fsync requests submitted is 0:
2N/A * => free allocated memory (aio_lio_t).
2N/A * b) number of fsync requests submitted is > 0:
2N/A * => the last worker executing the fsync request
2N/A * will free the aio_lio_t struct.
2N/A */
2N/A if (fret == 0)
2N/A _aio_lio_free(head);
2N/A return (-1);
2N/A }
2N/A return (0);
2N/A}
2N/A
2N/Aint
2N/Aaio_cancel(int fd, aiocb_t *aiocbp)
2N/A{
2N/A aio_req_t *reqp;
2N/A aio_worker_t *aiowp;
2N/A int done = 0;
2N/A int canceled = 0;
2N/A struct stat buf;
2N/A
2N/A if (fstat(fd, &buf) < 0)
2N/A return (-1);
2N/A
2N/A if (aiocbp != NULL) {
2N/A if (fd != aiocbp->aio_fildes) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if (aiocbp->aio_state == USERAIO) {
2N/A sig_mutex_lock(&__aio_mutex);
2N/A reqp = _aio_hash_find(&aiocbp->aio_resultp);
2N/A if (reqp == NULL) {
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A return (AIO_ALLDONE);
2N/A }
2N/A aiowp = reqp->req_worker;
2N/A sig_mutex_lock(&aiowp->work_qlock1);
2N/A (void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
2N/A sig_mutex_unlock(&aiowp->work_qlock1);
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A if (done)
2N/A return (AIO_ALLDONE);
2N/A if (canceled)
2N/A return (AIO_CANCELED);
2N/A return (AIO_NOTCANCELED);
2N/A }
2N/A if (aiocbp->aio_state == USERAIO_DONE)
2N/A return (AIO_ALLDONE);
2N/A return ((int)_kaio(AIOCANCEL, fd, aiocbp));
2N/A }
2N/A
2N/A return (aiocancel_all(fd));
2N/A}
2N/A
2N/A/*
2N/A * __aio_waitn() cancellation handler.
2N/A */
2N/A/* ARGSUSED */
2N/Astatic void
2N/A_aio_waitn_cleanup(void *arg)
2N/A{
2N/A ASSERT(MUTEX_HELD(&__aio_mutex));
2N/A
2N/A /* check for pending aio_waitn() calls */
2N/A _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING);
2N/A if (_aio_flags & AIO_LIB_WAITN_PENDING) {
2N/A _aio_flags &= ~AIO_LIB_WAITN_PENDING;
2N/A (void) cond_signal(&_aio_waitn_cv);
2N/A }
2N/A
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A}
2N/A
2N/A/*
2N/A * aio_waitn can be used to reap the results of several I/O operations that
2N/A * were submitted asynchronously. The submission of I/Os can be done using
2N/A * existing POSIX interfaces: lio_listio, aio_write or aio_read.
2N/A * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have
2N/A * completed and it returns the descriptors for these I/Os in "list". The
2N/A * maximum size of this list is given by "nent" and the actual number of I/Os
2N/A * completed is returned in "nwait". Otherwise aio_waitn might also
2N/A * return if the timeout expires. Additionally, aio_waitn returns 0 if
2N/A * successful or -1 if an error occurred.
2N/A */
2N/Astatic int
2N/A__aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo)
2N/A{
2N/A int error = 0;
2N/A uint_t dnwait = 0; /* amount of requests in the waitn-done list */
2N/A uint_t kwaitcnt; /* expected "done" requests from kernel */
2N/A uint_t knentcnt; /* max. expected "done" requests from kernel */
2N/A int uerrno = 0;
2N/A int kerrno = 0; /* save errno from _kaio() call */
2N/A int timedwait = AIO_TIMEOUT_UNDEF;
2N/A aio_req_t *reqp;
2N/A timespec_t end;
2N/A timespec_t twait; /* copy of utimo for internal calculations */
2N/A timespec_t *wait = NULL;
2N/A
2N/A if (nent == 0 || *nwait == 0 || *nwait > nent) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A
2N/A /*
2N/A * Only one running aio_waitn call per process allowed.
2N/A * Further calls will be blocked here until the running
2N/A * call finishes.
2N/A */
2N/A
2N/A sig_mutex_lock(&__aio_mutex);
2N/A
2N/A while (_aio_flags & AIO_LIB_WAITN) {
2N/A if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) {
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A *nwait = 0;
2N/A return (0);
2N/A }
2N/A _aio_flags |= AIO_LIB_WAITN_PENDING;
2N/A pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex);
2N/A error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex);
2N/A pthread_cleanup_pop(0);
2N/A if (error != 0) {
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A *nwait = 0;
2N/A errno = error;
2N/A return (-1);
2N/A }
2N/A }
2N/A
2N/A pthread_cleanup_push(_aio_waitn_cleanup, NULL);
2N/A
2N/A _aio_flags |= AIO_LIB_WAITN;
2N/A
2N/A if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
2N/A error = -1;
2N/A dnwait = 0;
2N/A goto out;
2N/A }
2N/A if (timedwait != AIO_TIMEOUT_INDEF) {
2N/A twait = *utimo;
2N/A wait = &twait;
2N/A }
2N/A
2N/A /*
2N/A * If both counters are still set to zero, then only
2N/A * kernel requests are currently outstanding (raw-I/Os).
2N/A */
2N/A if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
2N/A for (;;) {
2N/A kwaitcnt = *nwait - dnwait;
2N/A knentcnt = nent - dnwait;
2N/A if (knentcnt > AIO_WAITN_MAXIOCBS)
2N/A knentcnt = AIO_WAITN_MAXIOCBS;
2N/A kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
2N/A
2N/A pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A _cancel_prologue();
2N/A error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
2N/A &kwaitcnt, wait);
2N/A _cancel_epilogue();
2N/A pthread_cleanup_pop(1);
2N/A
2N/A if (error == 0) {
2N/A dnwait += kwaitcnt;
2N/A if (dnwait >= *nwait ||
2N/A *nwait < AIO_WAITN_MAXIOCBS)
2N/A break;
2N/A if (timedwait == AIO_TIMEOUT_WAIT) {
2N/A error = _aio_get_timedelta(&end, wait);
2N/A if (error == -1) {
2N/A /* timer expired */
2N/A errno = ETIME;
2N/A break;
2N/A }
2N/A }
2N/A continue;
2N/A }
2N/A if (errno == EAGAIN) {
2N/A if (dnwait > 0)
2N/A error = 0;
2N/A break;
2N/A }
2N/A if (errno == ETIME || errno == EINTR) {
2N/A dnwait += kwaitcnt;
2N/A break;
2N/A }
2N/A /* fatal error */
2N/A break;
2N/A }
2N/A
2N/A goto out;
2N/A }
2N/A
2N/A /* File system I/Os outstanding ... */
2N/A
2N/A if (timedwait == AIO_TIMEOUT_UNDEF) {
2N/A if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
2N/A error = -1;
2N/A dnwait = 0;
2N/A goto out;
2N/A }
2N/A if (timedwait != AIO_TIMEOUT_INDEF) {
2N/A twait = *utimo;
2N/A wait = &twait;
2N/A }
2N/A }
2N/A
2N/A for (;;) {
2N/A uint_t sum_reqs;
2N/A
2N/A /*
2N/A * Calculate sum of active non RAW-IO requests (sum_reqs).
2N/A * If the expected amount of completed requests (*nwait) is
2N/A * greater than the calculated sum (sum_reqs) then
2N/A * use _kaio to check pending RAW-IO requests.
2N/A */
2N/A sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt;
2N/A kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0;
2N/A
2N/A if (kwaitcnt != 0) {
2N/A /* possibly some kernel I/Os outstanding */
2N/A knentcnt = nent - dnwait;
2N/A if (knentcnt > AIO_WAITN_MAXIOCBS)
2N/A knentcnt = AIO_WAITN_MAXIOCBS;
2N/A kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
2N/A
2N/A _aio_flags |= AIO_WAIT_INPROGRESS;
2N/A
2N/A pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A _cancel_prologue();
2N/A error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
2N/A &kwaitcnt, wait);
2N/A _cancel_epilogue();
2N/A pthread_cleanup_pop(1);
2N/A
2N/A _aio_flags &= ~AIO_WAIT_INPROGRESS;
2N/A
2N/A if (error == 0) {
2N/A dnwait += kwaitcnt;
2N/A } else {
2N/A switch (errno) {
2N/A case EINVAL:
2N/A case EAGAIN:
2N/A /* don't wait for kernel I/Os */
2N/A kerrno = 0; /* ignore _kaio() errno */
2N/A *nwait = _aio_doneq_cnt +
2N/A _aio_outstand_cnt + dnwait;
2N/A error = 0;
2N/A break;
2N/A case EINTR:
2N/A case ETIME:
2N/A /* just scan for completed LIB I/Os */
2N/A dnwait += kwaitcnt;
2N/A timedwait = AIO_TIMEOUT_POLL;
2N/A kerrno = errno; /* save _kaio() errno */
2N/A error = 0;
2N/A break;
2N/A default:
2N/A kerrno = errno; /* save _kaio() errno */
2N/A break;
2N/A }
2N/A }
2N/A if (error)
2N/A break; /* fatal kernel error */
2N/A }
2N/A
2N/A /* check completed FS requests in the "done" queue */
2N/A
2N/A while (_aio_doneq_cnt && dnwait < nent) {
2N/A /* get done requests */
2N/A if ((reqp = _aio_req_remove(NULL)) != NULL) {
2N/A (void) _aio_hash_del(reqp->req_resultp);
2N/A list[dnwait++] = reqp->req_aiocbp;
2N/A _aio_req_mark_done(reqp);
2N/A _lio_remove(reqp);
2N/A _aio_req_free(reqp);
2N/A }
2N/A }
2N/A
2N/A if (dnwait >= *nwait) {
2N/A /* min. requested amount of completed I/Os satisfied */
2N/A break;
2N/A }
2N/A if (timedwait == AIO_TIMEOUT_WAIT &&
2N/A (error = _aio_get_timedelta(&end, wait)) == -1) {
2N/A /* timer expired */
2N/A uerrno = ETIME;
2N/A break;
2N/A }
2N/A
2N/A /*
2N/A * If some I/Os are outstanding and we have to wait for them,
2N/A * then sleep here. _aiodone() will call _aio_waitn_wakeup()
2N/A * to wakeup this thread as soon as the required amount of
2N/A * completed I/Os is done.
2N/A */
2N/A if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) {
2N/A /*
2N/A * _aio_waitn_wakeup() will wake up this thread when:
2N/A * - _aio_waitncnt requests are completed or
2N/A * - _aio_outstand_cnt becomes zero.
2N/A * sig_cond_reltimedwait() could also return with
2N/A * a timeout error (ETIME).
2N/A */
2N/A if (*nwait < _aio_outstand_cnt)
2N/A _aio_waitncnt = *nwait;
2N/A else
2N/A _aio_waitncnt = _aio_outstand_cnt;
2N/A
2N/A _aio_flags |= AIO_IO_WAITING;
2N/A
2N/A if (wait)
2N/A uerrno = sig_cond_reltimedwait(&_aio_iowait_cv,
2N/A &__aio_mutex, wait);
2N/A else
2N/A uerrno = sig_cond_wait(&_aio_iowait_cv,
2N/A &__aio_mutex);
2N/A
2N/A _aio_flags &= ~AIO_IO_WAITING;
2N/A
2N/A if (uerrno == ETIME) {
2N/A timedwait = AIO_TIMEOUT_POLL;
2N/A continue;
2N/A }
2N/A if (uerrno != 0)
2N/A timedwait = AIO_TIMEOUT_POLL;
2N/A }
2N/A
2N/A if (timedwait == AIO_TIMEOUT_POLL) {
2N/A /* polling or timer expired */
2N/A break;
2N/A }
2N/A }
2N/A
2N/A errno = uerrno == 0 ? kerrno : uerrno;
2N/A if (errno)
2N/A error = -1;
2N/A else
2N/A error = 0;
2N/A
2N/Aout:
2N/A *nwait = dnwait;
2N/A
2N/A pthread_cleanup_pop(1); /* drops __aio_mutex */
2N/A
2N/A return (error);
2N/A}
2N/A
2N/Aint
2N/Aaio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait,
2N/A const timespec_t *timeout)
2N/A{
2N/A return (__aio_waitn((void **)list, nent, nwait, timeout));
2N/A}
2N/A
2N/Avoid
2N/A_aio_waitn_wakeup(void)
2N/A{
2N/A /*
2N/A * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that
2N/A * it is waiting for completed I/Os. The number of required
2N/A * completed I/Os is stored into "_aio_waitncnt".
2N/A * aio_waitn() is woken up when
2N/A * - there are no further outstanding I/Os
2N/A * (_aio_outstand_cnt == 0) or
2N/A * - the expected number of I/Os has completed.
2N/A * Only one __aio_waitn() function waits for completed I/Os at
2N/A * a time.
2N/A *
2N/A * __aio_suspend() increments "_aio_suscv_cnt" to notify
2N/A * _aiodone() that at least one __aio_suspend() call is
2N/A * waiting for completed I/Os.
2N/A * There could be more than one __aio_suspend() function
2N/A * waiting for completed I/Os. Because every function should
2N/A * be waiting for different I/Os, _aiodone() has to wake up all
2N/A * __aio_suspend() functions each time.
2N/A * Every __aio_suspend() function will compare the recently
2N/A * completed I/O with its own list.
2N/A */
2N/A ASSERT(MUTEX_HELD(&__aio_mutex));
2N/A if (_aio_flags & AIO_IO_WAITING) {
2N/A if (_aio_waitncnt > 0)
2N/A _aio_waitncnt--;
2N/A if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 ||
2N/A _aio_suscv_cnt > 0)
2N/A (void) cond_broadcast(&_aio_iowait_cv);
2N/A } else {
2N/A /* Wake up waiting aio_suspend calls */
2N/A if (_aio_suscv_cnt > 0)
2N/A (void) cond_broadcast(&_aio_iowait_cv);
2N/A }
2N/A}
2N/A
2N/A/*
2N/A * timedwait values :
2N/A * AIO_TIMEOUT_POLL : polling
2N/A * AIO_TIMEOUT_WAIT : timeout
2N/A * AIO_TIMEOUT_INDEF : wait indefinitely
2N/A */
2N/Astatic int
2N/A_aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait)
2N/A{
2N/A struct timeval curtime;
2N/A
2N/A if (utimo) {
2N/A if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 ||
2N/A utimo->tv_nsec >= NANOSEC) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) {
2N/A (void) gettimeofday(&curtime, NULL);
2N/A end->tv_sec = utimo->tv_sec + curtime.tv_sec;
2N/A end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec;
2N/A if (end->tv_nsec >= NANOSEC) {
2N/A end->tv_nsec -= NANOSEC;
2N/A end->tv_sec += 1;
2N/A }
2N/A *timedwait = AIO_TIMEOUT_WAIT;
2N/A } else {
2N/A /* polling */
2N/A *timedwait = AIO_TIMEOUT_POLL;
2N/A }
2N/A } else {
2N/A *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */
2N/A }
2N/A return (0);
2N/A}
2N/A
2N/A#if !defined(_LP64)
2N/A
2N/Aint
2N/Aaio_read64(aiocb64_t *aiocbp)
2N/A{
2N/A if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
2N/A errno = EBUSY;
2N/A return (-1);
2N/A }
2N/A if (_aio_sigev_thread64(aiocbp) != 0)
2N/A return (-1);
2N/A aiocbp->aio_returned = AIO_NOTRETURNED;
2N/A aiocbp->aio_lio_opcode = LIO_READ;
2N/A return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64,
2N/A (AIO_KAIO | AIO_NO_DUPS)));
2N/A}
2N/A
2N/Aint
2N/Aaio_write64(aiocb64_t *aiocbp)
2N/A{
2N/A if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
2N/A errno = EBUSY;
2N/A return (-1);
2N/A }
2N/A if (_aio_sigev_thread64(aiocbp) != 0)
2N/A return (-1);
2N/A aiocbp->aio_returned = AIO_NOTRETURNED;
2N/A aiocbp->aio_lio_opcode = LIO_WRITE;
2N/A return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64,
2N/A (AIO_KAIO | AIO_NO_DUPS)));
2N/A}
2N/A
2N/Aint
2N/Alio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
2N/A int nent, struct sigevent *_RESTRICT_KYWD sigevp)
2N/A{
2N/A int aio_ufs = 0;
2N/A int oerrno = 0;
2N/A aio_lio_t *head = NULL;
2N/A aiocb64_t *aiocbp;
2N/A int state = 0;
2N/A int EIOflg = 0;
2N/A int rw;
2N/A int do_kaio = 0;
2N/A int error;
2N/A int i;
2N/A
2N/A if (!_kaio_ok)
2N/A _kaio_init();
2N/A
2N/A if (aio_list_max == 0)
2N/A aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
2N/A
2N/A if (nent <= 0 || nent > aio_list_max) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A
2N/A switch (mode) {
2N/A case LIO_WAIT:
2N/A state = NOCHECK;
2N/A break;
2N/A case LIO_NOWAIT:
2N/A state = CHECK;
2N/A break;
2N/A default:
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A
2N/A for (i = 0; i < nent; i++) {
2N/A if ((aiocbp = list[i]) == NULL)
2N/A continue;
2N/A if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
2N/A errno = EBUSY;
2N/A return (-1);
2N/A }
2N/A if (_aio_sigev_thread64(aiocbp) != 0)
2N/A return (-1);
2N/A aiocbp->aio_returned = AIO_NOTRETURNED;
2N/A if (aiocbp->aio_lio_opcode == LIO_NOP)
2N/A aiocbp->aio_state = NOCHECK;
2N/A else {
2N/A aiocbp->aio_state = state;
2N/A if (KAIO_SUPPORTED(aiocbp->aio_fildes))
2N/A do_kaio++;
2N/A else
2N/A aiocbp->aio_resultp.aio_errno = ENOTSUP;
2N/A }
2N/A }
2N/A if (_aio_sigev_thread_init(sigevp) != 0)
2N/A return (-1);
2N/A
2N/A if (do_kaio) {
2N/A error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp);
2N/A if (error == 0)
2N/A return (0);
2N/A oerrno = errno;
2N/A } else {
2N/A oerrno = errno = ENOTSUP;
2N/A error = -1;
2N/A }
2N/A
2N/A if (error == -1 && errno == ENOTSUP) {
2N/A error = errno = 0;
2N/A /*
2N/A * If LIO_WAIT, or notification required, allocate a list head.
2N/A */
2N/A if (mode == LIO_WAIT ||
2N/A (sigevp != NULL &&
2N/A (sigevp->sigev_notify == SIGEV_SIGNAL ||
2N/A sigevp->sigev_notify == SIGEV_THREAD ||
2N/A sigevp->sigev_notify == SIGEV_PORT)))
2N/A head = _aio_lio_alloc();
2N/A if (head) {
2N/A sig_mutex_lock(&head->lio_mutex);
2N/A head->lio_mode = mode;
2N/A head->lio_largefile = 1;
2N/A if (mode == LIO_NOWAIT && sigevp != NULL) {
2N/A if (sigevp->sigev_notify == SIGEV_THREAD) {
2N/A head->lio_port = sigevp->sigev_signo;
2N/A head->lio_event = AIOLIO64;
2N/A head->lio_sigevent = sigevp;
2N/A head->lio_sigval.sival_ptr =
2N/A sigevp->sigev_value.sival_ptr;
2N/A } else if (sigevp->sigev_notify == SIGEV_PORT) {
2N/A port_notify_t *pn =
2N/A sigevp->sigev_value.sival_ptr;
2N/A head->lio_port = pn->portnfy_port;
2N/A head->lio_event = AIOLIO64;
2N/A head->lio_sigevent = sigevp;
2N/A head->lio_sigval.sival_ptr =
2N/A pn->portnfy_user;
2N/A } else { /* SIGEV_SIGNAL */
2N/A head->lio_signo = sigevp->sigev_signo;
2N/A head->lio_sigval.sival_ptr =
2N/A sigevp->sigev_value.sival_ptr;
2N/A }
2N/A }
2N/A head->lio_nent = head->lio_refcnt = nent;
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A }
2N/A /*
2N/A * find UFS requests, errno == ENOTSUP/EBADFD,
2N/A */
2N/A for (i = 0; i < nent; i++) {
2N/A if ((aiocbp = list[i]) == NULL ||
2N/A aiocbp->aio_lio_opcode == LIO_NOP ||
2N/A (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
2N/A aiocbp->aio_resultp.aio_errno != EBADFD)) {
2N/A if (head)
2N/A _lio_list_decr(head);
2N/A continue;
2N/A }
2N/A if (aiocbp->aio_resultp.aio_errno == EBADFD)
2N/A SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
2N/A if (aiocbp->aio_reqprio != 0) {
2N/A aiocbp->aio_resultp.aio_errno = EINVAL;
2N/A aiocbp->aio_resultp.aio_return = -1;
2N/A EIOflg = 1;
2N/A if (head)
2N/A _lio_list_decr(head);
2N/A continue;
2N/A }
2N/A /*
2N/A * submit an AIO request with flags AIO_NO_KAIO
2N/A * to avoid the kaio() syscall in _aio_rw()
2N/A */
2N/A switch (aiocbp->aio_lio_opcode) {
2N/A case LIO_READ:
2N/A rw = AIOAREAD64;
2N/A break;
2N/A case LIO_WRITE:
2N/A rw = AIOAWRITE64;
2N/A break;
2N/A }
2N/A error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw,
2N/A (AIO_NO_KAIO | AIO_NO_DUPS));
2N/A if (error == 0)
2N/A aio_ufs++;
2N/A else {
2N/A if (head)
2N/A _lio_list_decr(head);
2N/A aiocbp->aio_resultp.aio_errno = error;
2N/A EIOflg = 1;
2N/A }
2N/A }
2N/A }
2N/A if (EIOflg) {
2N/A errno = EIO;
2N/A return (-1);
2N/A }
2N/A if (mode == LIO_WAIT && oerrno == ENOTSUP) {
2N/A /*
2N/A * call kaio(AIOLIOWAIT) to get all outstanding
2N/A * kernel AIO requests
2N/A */
2N/A if ((nent - aio_ufs) > 0)
2N/A (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
2N/A if (head != NULL && head->lio_nent > 0) {
2N/A sig_mutex_lock(&head->lio_mutex);
2N/A while (head->lio_refcnt > 0) {
2N/A int err;
2N/A head->lio_waiting = 1;
2N/A pthread_cleanup_push(_lio_listio_cleanup, head);
2N/A err = sig_cond_wait(&head->lio_cond_cv,
2N/A &head->lio_mutex);
2N/A pthread_cleanup_pop(0);
2N/A head->lio_waiting = 0;
2N/A if (err && head->lio_nent > 0) {
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A errno = err;
2N/A return (-1);
2N/A }
2N/A }
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
2N/A _aio_lio_free(head);
2N/A for (i = 0; i < nent; i++) {
2N/A if ((aiocbp = list[i]) != NULL &&
2N/A aiocbp->aio_resultp.aio_errno) {
2N/A errno = EIO;
2N/A return (-1);
2N/A }
2N/A }
2N/A }
2N/A return (0);
2N/A }
2N/A return (error);
2N/A}
2N/A
2N/Aint
2N/Aaio_suspend64(const aiocb64_t * const list[], int nent,
2N/A const timespec_t *timeout)
2N/A{
2N/A return (__aio_suspend((void **)list, nent, timeout, 1));
2N/A}
2N/A
2N/Aint
2N/Aaio_error64(const aiocb64_t *aiocbp)
2N/A{
2N/A const aio_result_t *resultp = &aiocbp->aio_resultp;
2N/A aio_req_t *reqp;
2N/A int error;
2N/A
2N/A if ((error = resultp->aio_errno) == EINPROGRESS) {
2N/A if (aiocbp->aio_state == CHECK) {
2N/A /*
2N/A * Always do the kaio() call without using the
2N/A * KAIO_SUPPORTED() checks because it is not
2N/A * mandatory to have a valid fd set in the
2N/A * aiocb, only the resultp must be set.
2N/A */
2N/A if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A error = resultp->aio_errno;
2N/A } else if (aiocbp->aio_state == CHECKED) {
2N/A ((aiocb64_t *)aiocbp)->aio_state = CHECK;
2N/A }
2N/A if (error == EINPROGRESS)
2N/A return (EINPROGRESS);
2N/A }
2N/A sig_mutex_lock(&__aio_mutex);
2N/A if (aiocbp->aio_returned != AIO_NOTRETURNED) {
2N/A /* aio_return() was called or aiocb is uninitialized */
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) {
2N/A ((aiocb64_t *)aiocbp)->aio_state = CHECKED;
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A } else {
2N/A ((aiocb64_t *)aiocbp)->aio_state = NOCHECK;
2N/A ASSERT(reqp->req_head == NULL);
2N/A (void) _aio_req_remove(reqp);
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A _aio_req_free(reqp);
2N/A }
2N/A return (error);
2N/A}
2N/A
2N/Assize_t
2N/Aaio_return64(aiocb64_t *aiocbp)
2N/A{
2N/A aio_result_t *resultp = &aiocbp->aio_resultp;
2N/A aio_req_t *reqp;
2N/A int error;
2N/A ssize_t retval;
2N/A
2N/A /*
2N/A * The _aiodone() function stores resultp->aio_return before
2N/A * storing resultp->aio_errno (with an membar_producer() in
2N/A * between). We use membar_consumer() below to ensure proper
2N/A * memory ordering between _aiodone() and ourself.
2N/A */
2N/A error = resultp->aio_errno;
2N/A membar_consumer();
2N/A retval = resultp->aio_return;
2N/A
2N/A if (error == EINPROGRESS) {
2N/A errno = EINPROGRESS;
2N/A return (-1);
2N/A }
2N/A
2N/A /*
2N/A * Before we return, mark the result as being returned so that later
2N/A * calls to aio_return() or aio_error() will return the fact that
2N/A * the result has already been returned.
2N/A */
2N/A sig_mutex_lock(&__aio_mutex);
2N/A if (aiocbp->aio_returned != AIO_NOTRETURNED) {
2N/A /* aio_return() already called or aiocb is uninitialized */
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A aiocbp->aio_returned = 0;
2N/A if ((reqp = _aio_hash_del(resultp)) == NULL)
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A else {
2N/A aiocbp->aio_state = NOCHECK;
2N/A ASSERT(reqp->req_head == NULL);
2N/A (void) _aio_req_remove(reqp);
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A _aio_req_free(reqp);
2N/A }
2N/A
2N/A if (retval == -1)
2N/A errno = error;
2N/A return (retval);
2N/A}
2N/A
2N/Astatic int
2N/A__aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
2N/A int workerscnt)
2N/A{
2N/A int i;
2N/A int error;
2N/A aio_worker_t *next = aiowp;
2N/A
2N/A for (i = 0; i < workerscnt; i++) {
2N/A error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
2N/A if (error != 0) {
2N/A sig_mutex_lock(&head->lio_mutex);
2N/A head->lio_mode = LIO_DESTROY; /* ignore fsync */
2N/A head->lio_nent -= workerscnt - i;
2N/A head->lio_refcnt -= workerscnt - i;
2N/A sig_mutex_unlock(&head->lio_mutex);
2N/A errno = EAGAIN;
2N/A return (i);
2N/A }
2N/A next = next->work_forw;
2N/A }
2N/A return (i);
2N/A}
2N/A
2N/Aint
2N/Aaio_fsync64(int op, aiocb64_t *aiocbp)
2N/A{
2N/A aio_lio_t *head;
2N/A struct stat64 statb;
2N/A int fret;
2N/A
2N/A if (aiocbp == NULL)
2N/A return (0);
2N/A if (op != O_DSYNC && op != O_SYNC) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
2N/A errno = EBUSY;
2N/A return (-1);
2N/A }
2N/A if (fstat64(aiocbp->aio_fildes, &statb) < 0)
2N/A return (-1);
2N/A if (_aio_sigev_thread64(aiocbp) != 0)
2N/A return (-1);
2N/A aiocbp->aio_returned = AIO_NOTRETURNED;
2N/A
2N/A /*
2N/A * Kernel aio_fsync() is not supported.
2N/A * We force user-level aio_fsync() just
2N/A * for the notification side-effect.
2N/A */
2N/A if (!__uaio_ok && __uaio_init() == -1)
2N/A return (-1);
2N/A
2N/A /*
2N/A * The first asynchronous I/O request in the current process will
2N/A * create a bunch of workers (via __uaio_init()). If the number
2N/A * of workers is zero then the number of pending asynchronous I/O
2N/A * requests is zero. In such a case only execute the standard
2N/A * fsync(3C) or fdatasync(3RT) as appropriate.
2N/A */
2N/A if (__rw_workerscnt == 0) {
2N/A if (op == O_DSYNC)
2N/A return (__fdsync(aiocbp->aio_fildes, FDSYNC));
2N/A else
2N/A return (__fdsync(aiocbp->aio_fildes, FSYNC));
2N/A }
2N/A
2N/A /*
2N/A * re-use aio_offset as the op field.
2N/A * O_DSYNC - fdatasync()
2N/A * O_SYNC - fsync()
2N/A */
2N/A aiocbp->aio_offset = op;
2N/A aiocbp->aio_lio_opcode = AIOFSYNC;
2N/A
2N/A /*
2N/A * Create a list of fsync requests. The worker that
2N/A * gets the last request will do the fsync request.
2N/A */
2N/A head = _aio_lio_alloc();
2N/A if (head == NULL) {
2N/A errno = EAGAIN;
2N/A return (-1);
2N/A }
2N/A head->lio_mode = LIO_FSYNC;
2N/A head->lio_nent = head->lio_refcnt = __rw_workerscnt;
2N/A head->lio_largefile = 1;
2N/A
2N/A /*
2N/A * Insert an fsync request on every worker's queue.
2N/A */
2N/A fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt);
2N/A if (fret != __rw_workerscnt) {
2N/A /*
2N/A * Fewer fsync requests than workers means that it was
2N/A * not possible to submit fsync requests to all workers.
2N/A * Actions:
2N/A * a) number of fsync requests submitted is 0:
2N/A * => free allocated memory (aio_lio_t).
2N/A * b) number of fsync requests submitted is > 0:
2N/A * => the last worker executing the fsync request
2N/A * will free the aio_lio_t struct.
2N/A */
2N/A if (fret == 0)
2N/A _aio_lio_free(head);
2N/A return (-1);
2N/A }
2N/A return (0);
2N/A}
2N/A
2N/Aint
2N/Aaio_cancel64(int fd, aiocb64_t *aiocbp)
2N/A{
2N/A aio_req_t *reqp;
2N/A aio_worker_t *aiowp;
2N/A int done = 0;
2N/A int canceled = 0;
2N/A struct stat64 buf;
2N/A
2N/A if (fstat64(fd, &buf) < 0)
2N/A return (-1);
2N/A
2N/A if (aiocbp != NULL) {
2N/A if (fd != aiocbp->aio_fildes) {
2N/A errno = EINVAL;
2N/A return (-1);
2N/A }
2N/A if (aiocbp->aio_state == USERAIO) {
2N/A sig_mutex_lock(&__aio_mutex);
2N/A reqp = _aio_hash_find(&aiocbp->aio_resultp);
2N/A if (reqp == NULL) {
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A return (AIO_ALLDONE);
2N/A }
2N/A aiowp = reqp->req_worker;
2N/A sig_mutex_lock(&aiowp->work_qlock1);
2N/A (void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
2N/A sig_mutex_unlock(&aiowp->work_qlock1);
2N/A sig_mutex_unlock(&__aio_mutex);
2N/A if (done)
2N/A return (AIO_ALLDONE);
2N/A if (canceled)
2N/A return (AIO_CANCELED);
2N/A return (AIO_NOTCANCELED);
2N/A }
2N/A if (aiocbp->aio_state == USERAIO_DONE)
2N/A return (AIO_ALLDONE);
2N/A return ((int)_kaio(AIOCANCEL, fd, aiocbp));
2N/A }
2N/A
2N/A return (aiocancel_all(fd));
2N/A}
2N/A
2N/Aint
2N/Aaio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait,
2N/A const timespec_t *timeout)
2N/A{
2N/A return (__aio_waitn((void **)list, nent, nwait, timeout));
2N/A}
2N/A
2N/A#endif /* !defined(_LP64) */