aio.c revision 16660111facc11b9bca945ff5e29d7b6339decc3
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Kernel asynchronous I/O.
* This is only for raw devices now (as of Nov. 1993).
*/
#include <vm/faultcode.h>
#include <sys/sysmacros.h>
#include <sys/autoconf.h>
#include <sys/ddi_impldefs.h>
#include <sys/aio_impl.h>
/*
* external entry point.
*/
#ifdef _LP64
#endif
#define AIO_64 0
#define AIO_32 1
#define AIO_LARGEFILE 2
/*
* implementation specific functions (private)
*/
#ifdef _LP64
#endif
static int aionotify(void);
static int aioinit(void);
static int aiostart(void);
cred_t *);
static aio_t *aio_aiop_alloc();
static int aio_lio_alloc(aio_lio_t **);
static aio_req_t *aio_req_done(void *);
aio_result_t *, vnode_t *);
static int aio_cleanup_thread(aio_t *);
static void lio_set_uerror(void *, int);
extern void aio_zerolen(aio_req_t *);
static int aiosuspend(void *, int, struct timespec *, int,
long *, int);
static int aliowait(int, void *, int, void *, int);
static int aioerror(void *, int);
static int aio_cancel(int, void *, long *, int);
static int aiorw(int, void *, int, int);
static int alioLF(int, void *, int, void *);
aio_result_t *, vnode_t *);
static int alio32(int, void *, int, void *);
#ifdef _SYSCALL32_IMPL
#endif /* _SYSCALL32_IMPL */
/*
* implementation specific functions (external)
*/
/*
* Event Port framework
*/
static int aio_port_callback(void *, int *, pid_t, int, void *);
/*
* This is the loadable module wrapper.
*/
#ifdef _LP64
static struct sysent kaio_sysent = {
6,
(int (*)())kaioc
};
#ifdef _SYSCALL32_IMPL
static struct sysent kaio_sysent32 = {
7,
};
#endif /* _SYSCALL32_IMPL */
#else /* _LP64 */
static struct sysent kaio_sysent = {
7,
};
#endif /* _LP64 */
/*
* Module linkage information for the kernel.
*/
"kernel Async I/O",
};
#ifdef _SYSCALL32_IMPL
"kernel Async I/O for 32 bit compatibility",
};
#endif /* _SYSCALL32_IMPL */
static struct modlinkage modlinkage = {
&modlsys,
#ifdef _SYSCALL32_IMPL
#endif
};
int
_init(void)
{
int retval;
return (retval);
return (0);
}
int
_fini(void)
{
int retval;
return (retval);
}
int
{
}
#ifdef _LP64
static int64_t
long a0,
long a1,
long a2,
long a3,
long a4,
long a5)
{
int error;
long rval = 0;
switch ((int)a0 & ~AIO_POLL_BIT) {
case AIOREAD:
break;
case AIOWRITE:
break;
case AIOWAIT:
break;
case AIOWAITN:
(timespec_t *)a4);
break;
case AIONOTIFY:
break;
case AIOINIT:
break;
case AIOSTART:
break;
case AIOLIO:
break;
case AIOLIOWAIT:
break;
case AIOSUSPEND:
break;
case AIOERROR:
break;
case AIOAREAD:
break;
case AIOAWRITE:
break;
case AIOCANCEL:
break;
/*
* The large file related stuff is valid only for
* 32 bit kernel and not for 64 bit kernel
* On 64 bit kernel we convert large file calls
* to regular 64bit calls.
*/
default:
}
if (error)
return (rval);
}
#endif
static int
kaio(
{
long rval = 0;
int error = 0;
#if defined(_LITTLE_ENDIAN)
#else
#endif
switch (uap[0] & ~AIO_POLL_BIT) {
/*
* It must be the 32 bit system call on 64 bit kernel
*/
case AIOREAD:
case AIOWRITE:
case AIOWAIT:
&rval);
break;
case AIOWAITN:
break;
case AIONOTIFY:
return (aionotify());
case AIOINIT:
return (aioinit());
case AIOSTART:
return (aiostart());
case AIOLIO:
(void *)uap[4]));
case AIOLIOWAIT:
case AIOSUSPEND:
break;
case AIOERROR:
case AIOAREAD:
case AIOAWRITE:
case AIOCANCEL:
AIO_32));
break;
case AIOLIO64:
case AIOLIOWAIT64:
case AIOSUSPEND64:
break;
case AIOERROR64:
case AIOAREAD64:
case AIOAWRITE64:
case AIOCANCEL64:
&rval, AIO_LARGEFILE));
break;
default:
return (EINVAL);
}
return (error);
}
/*
* wake up LWPs in this process that are sleeping in
* aiowait().
*/
static int
aionotify(void)
{
return (0);
aiop->aio_notifycnt++;
return (0);
}
static int
{
#ifdef _SYSCALL32_IMPL
struct timeval32 wait_time_32;
#endif
*blocking = 1;
return (0);
}
/*
* Need to correctly compare with the -1 passed in for a user
* address pointer, with both 32 bit and 64 bit apps.
*/
if (model == DATAMODEL_NATIVE) {
*blocking = 0;
return (0);
}
return (EFAULT);
}
#ifdef _SYSCALL32_IMPL
else {
/*
* -1 from a 32bit app. It will not get sign extended.
* don't wait if -1.
*/
*blocking = 0;
return (0);
}
return (EFAULT);
}
#endif /* _SYSCALL32_IMPL */
*blocking = 0;
return (0);
}
return (EINVAL);
*blocking = 1;
return (0);
}
static int
{
#ifdef _SYSCALL32_IMPL
#endif
*blocking = 1;
return (0);
}
if (model == DATAMODEL_NATIVE) {
return (EFAULT);
}
#ifdef _SYSCALL32_IMPL
else {
return (EFAULT);
}
#endif /* _SYSCALL32_IMPL */
*blocking = 0;
return (0);
}
return (EINVAL);
*blocking = 1;
return (0);
}
/*ARGSUSED*/
static int
int dontblockflg,
long *rval)
{
int error;
int blocking;
int timecheck;
return (EINVAL);
/*
* Establish the absolute future time for the timeout.
*/
if (error)
return (error);
if (rqtp) {
gethrestime(&now);
}
for (;;) {
/* process requests on poll queue */
aio_cleanup(0);
}
break;
}
/* user-level done queue might not be empty */
if (aiop->aio_notifycnt > 0) {
aiop->aio_notifycnt--;
*rval = 1;
break;
}
/* don't block if no outstanding aio */
break;
}
if (blocking) {
if (status > 0) /* check done queue again */
continue;
if (status == 0) { /* interrupted by a signal */
*rval = -1;
} else { /* timer expired */
}
}
break;
}
if (reqp) {
}
return (error);
}
/*
* aiowaitn can be used to reap completed asynchronous requests submitted with
* lio_listio, aio_read or aio_write.
*/
/*ARGSUSED*/
static int
{
int error = 0;
int iocb_index = 0;
int blocking = 1;
int timecheck;
return (EINVAL);
if (aiop->aio_outstanding == 0)
return (EAGAIN);
return (EFAULT);
/* set *nwait to zero, if we must return prematurely */
return (EFAULT);
if (waitcnt == 0) {
blocking = 0;
} else {
if (error)
return (error);
}
if (model == DATAMODEL_NATIVE)
#ifdef _SYSCALL32_IMPL
else
#endif /* _SYSCALL32_IMPL */
/*
* Only one aio_waitn call is allowed at a time.
* The active aio_waitn will collect all requests
* out of the "done" list and if necessary it will wait
* parameter.
* A second or further aio_waitn calls will sleep here
* until the active aio_waitn finishes and leaves the kernel
* If the second call does not block (poll), then return
* immediately with the error code : EAGAIN.
* If the second call should block, then sleep here, but
* do not touch the timeout. The timeout starts when this
* aio_waitn-call becomes active.
*/
if (blocking == 0) {
return (EAGAIN);
}
/* block, no timeout */
return (EINTR);
}
}
/*
* Establish the absolute future time for the timeout.
*/
if (rqtp) {
gethrestime(&now);
}
}
return (ENOMEM);
}
} else {
}
for (;;) {
/* push requests on poll queue to done queue */
aio_cleanup(0);
}
/* check for requests on done queue */
}
/* user-level done queue might not be empty */
if (aiop->aio_notifycnt > 0) {
aiop->aio_notifycnt--;
error = 0;
break;
}
/*
* if we are here second time as a result of timer
* expiration, we reset error if there are enough
* aiocb's to satisfy request.
* We return also if all requests are already done
* and we picked up the whole done queue.
*/
error = 0;
break;
}
if (rval > 0)
continue;
if (rval < 0) {
blocking = 0;
continue;
}
}
break;
}
if (cnt > 0) {
if (model == DATAMODEL_NATIVE)
#ifdef _SYSCALL32_IMPL
else
#endif /* _SYSCALL32_IMPL */
}
/* check if there is another thread waiting for execution */
}
return (error);
}
/*
* aio_unlock_requests
* copyouts the result of the request as well as the return value.
* It builds the list of completed asynchronous requests,
* unlocks the allocated memory ranges and
* put the aio request structure back into the free list.
*/
static int
int iocb_index,
{
if (model == DATAMODEL_NATIVE) {
}
}
#ifdef _SYSCALL32_IMPL
else {
}
}
#endif /* _SYSCALL32_IMPL */
return (iocb_index);
}
/*
* aio_reqlist_concat
* moves "max" elements from the done queue to the reqlist queue and removes
* the AIO_DONEQ flag.
* - reqlist queue is a simple linked list
* - done queue is a double linked list
*/
static int
{
int count = 0;
while (max-- > 0) {
count++;
break;
}
/* all elements revised */
} else {
/*
* max < elements in the doneq
* detach only the required amount of elements
* out of the doneq
*/
}
return (count);
}
/*ARGSUSED*/
static int
void *aiocb,
int nent,
int flag,
long *rval,
int run_mode)
{
int error;
#ifdef _SYSCALL32_IMPL
#endif /* _SYSCALL32_IMPL */
int rv;
int i;
int blocking;
int timecheck;
return (EINVAL);
/*
* Establish the absolute future time for the timeout.
*/
if (error)
return (error);
if (rqtp) {
gethrestime(&now);
}
/*
* If we are not blocking and there's no IO complete
* skip aiocb copyin.
*/
return (EAGAIN);
}
if (model == DATAMODEL_NATIVE)
#ifdef _SYSCALL32_IMPL
else
#endif /* _SYSCALL32_IMPL */
return (ENOMEM);
goto done;
}
/*
* we need to get the aio_cleanupq_mutex since we call
* aio_req_done().
*/
for (;;) {
/* push requests on poll queue to done queue */
aio_cleanup(0);
}
/* check for requests on done queue */
if (model == DATAMODEL_NATIVE)
#ifdef _SYSCALL32_IMPL
else
#endif /* _SYSCALL32_IMPL */
for (i = 0; i < nent; i++) {
if (model == DATAMODEL_NATIVE) {
continue;
if (run_mode != AIO_LARGEFILE)
reqp = aio_req_done(
&cbp->aio_resultp);
else {
reqp = aio_req_done(
&cbp64->aio_resultp);
}
}
#ifdef _SYSCALL32_IMPL
else {
if ((cbp32 =
continue;
reqp = aio_req_done(
&cbp32->aio_resultp);
} else if (run_mode == AIO_LARGEFILE) {
if ((cbp64 =
(aiocb64_32_t *)(uintptr_t)
continue;
reqp = aio_req_done(
&cbp64->aio_resultp);
}
}
#endif /* _SYSCALL32_IMPL */
if (reqp) {
}
break;
}
if (found)
break;
}
if (aiop->aio_notifycnt > 0) {
/*
* nothing on the kernel's queue. the user
* has notified the kernel that it has items
* on a user-level queue.
*/
aiop->aio_notifycnt--;
*rval = 1;
error = 0;
break;
}
/* don't block if nothing is outstanding */
if (aiop->aio_outstanding == 0) {
break;
}
if (blocking) {
/*
* drop the aio_cleanupq_mutex as we are
* going to block.
*/
/*
* we have to drop aio_mutex and
* grab it in the right order.
*/
if (rv > 0) /* check done queue again */
continue;
if (rv == 0) /* interrupted by a signal */
else /* timer expired */
} else {
}
break;
}
}
done:
return (error);
}
/*
* initialize aio by allocating an aio_t struct for this
* process.
*/
static int
aioinit(void)
{
mutex_enter(&p->p_lock);
aiop = aio_aiop_alloc();
}
mutex_exit(&p->p_lock);
return (ENOMEM);
return (0);
}
/*
* start a special thread that will cleanup after aio requests
* that are preventing a segment from being unmapped. as_unmap()
* blocks until all phsyio to this segment is completed. this
* doesn't happen until all the pages in this segment are not
* SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
* requests still outstanding. this special thread will make sure
* that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
*
* this function will return an error if the process has only
* one LWP. the assumption is that the caller is a separate LWP
* that remains blocked in the kernel for the life of this process.
*/
static int
aiostart(void)
{
if (p->p_lwpcnt == 1)
return (EDEADLK);
mutex_enter(&p->p_lock);
else {
}
mutex_exit(&p->p_lock);
return (aio_cleanup_thread(aiop));
/* should return only to exit */
}
return (error);
}
/*
* Associate an aiocb with a port.
* This function is used by aiorw() to associate a transaction with a port.
* Allocate an event port structure (port_alloc_event()) and store the
* delivered user pointer (portnfy_user) in the portkev_user field of the
* port_kevent_t structure..
* The aio_req_portkev pointer in the aio_req_t structure was added to identify
* the port association.
*/
static int
{
int error;
PORT_SOURCE_AIO, &pkevp);
if (error) {
else
} else {
}
return (error);
}
#ifdef _LP64
/*
* Asynchronous list IO. A chain of aiocb's are copied in
* one at a time. If the aiocb is invalid, it is skipped.
* For each aiocb, the appropriate driver entry point is
* called. Optimize for the common case where the list
* of requests is to the same file descriptor.
*
* One possible optimization is to define a new driver entry
* point that supports a list of IO requests. Whether this
* improves performance depends somewhat on the driver's
* locking strategy. Processing a list could adversely impact
* the driver's interrupt latency.
*/
static int
alio(
int mode_arg,
int nent,
{
int prev_mode = -1;
int (*aio_func)();
int mode;
int error = 0;
int aio_errors = 0;
int i;
int deadhead = 0;
int aio_notsupported = 0;
int lio_head_port;
int aio_port;
int aio_thread;
int portused = 0;
int event;
return (EINVAL);
return (EFAULT);
}
/* Event Ports */
if (sigev &&
return (EFAULT);
}
if (error) {
else
return (error);
}
portused = 1;
}
/*
* a list head should be allocated if notification is
* enabled for this list.
*/
if (error)
goto done;
deadhead = 1;
goto done;
}
} else {
}
if (pkevtp) {
/*
* Prepare data to send when list of aiocb's
* has completed.
*/
}
}
/* skip entry if it can't be copied. */
if (head) {
head->lio_refcnt--;
}
continue;
}
/* skip if opcode for aiocb is LIO_NOP */
if (head) {
head->lio_refcnt--;
}
continue;
}
/* increment file descriptor's ref count. */
if (head) {
head->lio_refcnt--;
}
aio_errors++;
continue;
}
/*
* check the permission of the partition
*/
if (head) {
head->lio_refcnt--;
}
aio_errors++;
continue;
}
/*
* common case where requests are to the same fd
* for the same r/w operation.
* for UFS, need to set EBADFD
*/
if (head) {
head->lio_refcnt--;
}
continue;
} else {
}
}
if (error) {
if (head) {
head->lio_refcnt--;
}
aio_errors++;
continue;
}
deadhead = 0;
/*
* Set the errno field now before sending the request to
* the driver to avoid a race condition
*/
if (aio_port | aio_thread) {
/*
* Prepare data to send with each aiocb completed.
*/
if (aio_port) {
void *paddr =
} else { /* aio_thread */
}
if (error)
/* EMPTY */;
else
&lpkevp);
if (error == 0) {
}
}
/*
* send the request to driver.
*/
if (error == 0) {
if (aiocb->aio_nbytes == 0) {
continue;
}
CRED());
}
/*
* the fd's ref count is not decremented until the IO has
* completed unless there was an error.
*/
if (error) {
if (head) {
head->lio_refcnt--;
}
else
aio_errors++;
} else {
}
}
if (aio_notsupported) {
} else if (aio_errors) {
/*
* return EIO if any request failed
*/
}
while (head->lio_refcnt > 0) {
goto done;
}
}
}
done:
if (deadhead) {
if (head->lio_portkev)
}
return (error);
}
#endif /* _LP64 */
/*
* Asynchronous list IO.
* If list I/O is called with LIO_WAIT it can still return
* before all the I/O's are completed if a signal is caught
* or if the list include UFS I/O requests. If this happens,
* libaio will call aliowait() to wait for the I/O's to
* complete
*/
/*ARGSUSED*/
static int
int mode,
void *aiocb,
int nent,
void *sigev,
int run_mode)
{
#ifdef _SYSCALL32_IMPL
#endif
int error = 0;
int i;
return (EINVAL);
if (model == DATAMODEL_NATIVE)
#ifdef _SYSCALL32_IMPL
else
#endif /* _SYSCALL32_IMPL */
if (ssize == 0)
return (EINVAL);
if (model == DATAMODEL_NATIVE)
#ifdef _SYSCALL32_IMPL
else
#endif /* _SYSCALL32_IMPL */
goto done;
}
/*
* To find the list head, we go through the
* list of aiocb structs, find the request
* its for, then get the list head that reqp
* points to
*/
for (i = 0; i < nent; i++) {
if (model == DATAMODEL_NATIVE) {
/*
* Since we are only checking for a NULL pointer
* Following should work on both native data sizes
* as well as for largefile aiocb.
*/
continue;
if (run_mode != AIO_LARGEFILE)
break;
else {
/*
* This is a case when largefile call is
* made on 32 bit kernel.
* Treat each pointer as pointer to
* aiocb64_32
*/
break;
}
}
#ifdef _SYSCALL32_IMPL
else {
if (run_mode == AIO_LARGEFILE) {
if ((cbp64 = (aiocb64_32_t *)
continue;
&cbp64->aio_resultp))
break;
continue;
&cbp32->aio_resultp))
break;
}
}
#endif /* _SYSCALL32_IMPL */
}
goto done;
}
while (head->lio_refcnt > 0) {
goto done;
}
}
done:
return (error);
}
{
long index;
return (NULL);
if (resultp) {
return (head);
}
}
}
return (NULL);
}
static void
{
/*
* the resultp field is a pointer to where the
* error should be written out to the user's
* aiocb.
*
*/
if (get_udatamodel() == DATAMODEL_NATIVE) {
(ssize_t)-1);
}
#ifdef _SYSCALL32_IMPL
else {
(uint_t)-1);
}
#endif /* _SYSCALL32_IMPL */
}
/*
* do cleanup completion for all requests in list. memory for
* each request is also freed.
*/
static void
{
int i;
for (i = 0; i < nent; i++) {
if (get_udatamodel() == DATAMODEL_NATIVE) {
continue;
if (run_mode == AIO_LARGEFILE) {
resultp = (aio_result_t *)
} else
}
#ifdef _SYSCALL32_IMPL
else {
continue;
} else if (run_mode == AIO_LARGEFILE) {
}
}
#endif /* _SYSCALL32_IMPL */
/*
* we need to get the aio_cleanupq_mutex since we call
* aio_req_done().
*/
}
}
}
/*
* Write out the results for an aio request that is done.
*/
static int
{
int retval;
return (EINVAL);
if (get_udatamodel() == DATAMODEL_NATIVE) {
if (run_mode == AIO_LARGEFILE)
else
}
#ifdef _SYSCALL32_IMPL
else {
if (run_mode == AIO_LARGEFILE)
}
#endif /* _SYSCALL32_IMPL */
/*
* we need to get the aio_cleanupq_mutex since we call
* aio_req_find().
*/
if (retval == 0) {
return (0);
} else if (retval == 1)
return (EINPROGRESS);
else if (retval == 2)
return (EINVAL);
return (0);
}
/*
* aio_cancel - if no requests outstanding,
* return AIO_ALLDONE
* else
* return AIO_NOTCANCELED
*/
static int
int fildes,
void *cb,
long *rval,
int run_mode)
{
void *resultp;
int index;
/*
* Verify valid file descriptor
*/
return (EBADF);
}
return (EINVAL);
if (aiop->aio_outstanding == 0) {
*rval = AIO_ALLDONE;
return (0);
}
if (get_udatamodel() == DATAMODEL_NATIVE) {
if (run_mode == AIO_LARGEFILE)
->aio_resultp;
else
}
#ifdef _SYSCALL32_IMPL
else {
if (run_mode == AIO_LARGEFILE)
->aio_resultp;
->aio_resultp;
}
#endif /* _SYSCALL32_IMPL */
*rval = AIO_ALLDONE;
return (0);
}
*rval = AIO_NOTCANCELED;
return (0);
}
}
*rval = AIO_ALLDONE;
return (0);
}
*rval = AIO_NOTCANCELED;
return (0);
}
}
}
}
*rval = AIO_ALLDONE;
return (0);
}
/*
* solaris version of asynchronous read and write
*/
static int
arw(
int opcode,
int fdes,
char *bufp,
int bufsize,
int mode)
{
int error;
int (*aio_func)();
#ifdef _LP64
#else
#endif
return (EINVAL);
return (EBADF);
}
/*
* check the permission of the partition
*/
return (EBADF);
}
return (EBADFD);
}
#ifdef _LP64
#else
#endif
if (error) {
return (error);
}
/*
* enable polling on this request if the opcode has
* the AIO poll bit set
*/
if (opcode & AIO_POLL_BIT)
if (bufsize == 0) {
return (0);
}
/*
* send the request to driver.
*/
/*
* the fd is stored in the aio_req_t by aio_req_setup(), and
* is released by the aio_cleanup_thread() when the IO has
* completed.
*/
if (error) {
aiop->aio_pending--;
return (error);
}
return (0);
}
/*
* posix version of asynchronous read and write
*/
static int
int opcode,
void *aiocb_arg,
int mode,
int run_mode)
{
#ifdef _SYSCALL32_IMPL
struct sigevent32 *sigev32;
#endif
int (*aio_func)();
int aio_use_port = 0;
model = get_udatamodel();
return (EINVAL);
if (model == DATAMODEL_NATIVE) {
if (run_mode != AIO_LARGEFILE) {
return (EFAULT);
return (EBADF);
}
} else {
/*
* We come here only when we make largefile
* call on 32 bit kernel using 32 bit library.
*/
return (EFAULT);
->aio_resultp);
return (EBADF);
}
&pntfy, sizeof (port_notify_t))) {
return (EFAULT);
}
aio_use_port = 1;
aio_use_port = 1;
}
}
#ifdef _SYSCALL32_IMPL
else {
/* 32 bit system call is being made on 64 bit kernel */
return (EFAULT);
return (EBADF);
}
} else if (run_mode == AIO_LARGEFILE) {
/*
* We come here only when we make largefile
* call on 64 bit kernel using 32 bit library.
*/
return (EFAULT);
->aio_resultp);
return (EBADF);
}
if (copyin(
&pntfy32, sizeof (port_notify32_t))) {
return (EFAULT);
}
aio_use_port = 1;
aio_use_port = 1;
}
}
#endif /* _SYSCALL32_IMPL */
/*
* check the permission of the partition
*/
return (EBADF);
}
return (EBADFD);
}
if (run_mode == AIO_LARGEFILE)
else
if (error) {
return (error);
}
/*
* enable polling on this request if the opcode has
* the AIO poll bit set
*/
if (opcode & AIO_POLL_BIT)
if (model == DATAMODEL_NATIVE)
#ifdef _SYSCALL32_IMPL
else
#endif
if (aio_use_port) {
}
/*
* send the request to driver.
*/
if (error == 0) {
if (bufsize == 0) {
return (0);
}
}
/*
* the fd is stored in the aio_req_t by aio_req_setup(), and
* is released by the aio_cleanup_thread() when the IO has
* completed.
*/
if (error) {
if (aio_use_port)
aiop->aio_pending--;
return (error);
}
return (0);
}
/*
* set error for a list IO entry that failed.
*/
static void
{
return;
if (portused)
aiop->aio_pending--;
/* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
/*
* Need to free the request now as its never
* going to get on the done queue
*
* Note: aio_outstanding is decremented in
* aio_req_free()
*/
}
/*
* check if a specified request is done, and remove it from
* the done queue. otherwise remove anybody from the done queue
* if NULL is specified.
*/
static aio_req_t *
aio_req_done(void *resultp)
{
long index;
if (resultp) {
return (aio_req_remove(ent));
}
return (NULL);
}
}
/* no match, resultp is invalid */
return (NULL);
}
return (aio_req_remove(NULL));
}
/*
* determine if a user-level resultp pointer is associated with an
* active IO request. Zero is returned when the request is done,
* and the request is removed from the done queue. Only when the
* return value is zero, is the "reqp" pointer valid. One is returned
* when the request is inprogress. Two is returned when the request
* is invalid.
*/
static int
{
long index;
return (0);
}
return (1);
}
}
/* no match, resultp is invalid */
return (2);
}
/*
* remove a request from the done queue.
*/
static aio_req_t *
{
/* only one request on queue */
} else {
}
} else {
/*
* The request can be either on the aio_doneq or the
* aio_cleanupq
*/
}
/* only one request on queue */
} else {
}
}
return (reqp);
}
static int
{
int error;
return (EAGAIN);
}
if (sqp)
return (EIO);
}
/*
* get an aio_reqp from the free list or allocate one
* from dynamic memory.
*/
if (sqp)
return (error);
}
aiop->aio_pending++;
aiop->aio_outstanding++;
/*
* initialize aio request.
*/
return (0);
}
/*
* Allocate p_aio struct.
*/
static aio_t *
aio_aiop_alloc(void)
{
if (aiop) {
NULL);
}
return (aiop);
}
/*
* Allocate an aio_req struct.
*/
static int
{
} else {
/*
* Check whether memory is getting tight.
* This is a temporary mechanism to avoid memory
* exhaustion by a single process until we come up
* with a per process solution such as setrlimit().
*/
return (EAGAIN);
return (EAGAIN);
}
return (EINVAL);
}
return (0);
}
/*
* Allocate an aio_lio_t struct.
*/
static int
{
} else {
/*
* Check whether memory is getting tight.
* This is a temporary mechanism to avoid memory
* exhaustion by a single process until we come up
* with a per process solution such as setrlimit().
*/
return (EAGAIN);
return (EAGAIN);
}
return (0);
}
/*
* this is a special per-process thread that is only activated if
* the process is unmapping a segment with outstanding aio. normally,
* the process will have completed the aio before unmapping the
* segment. If the process does unmap a segment with outstanding aio,
* this special thread will guarentee that the locked pages due to
* aphysio() are released, thereby permitting the segment to be
* unmapped. In addition to this, the cleanup thread is woken up
* during DR operations to release the locked pages.
*/
static int
{
int poked = 0;
int exit_flag = 0;
int rqclnup = 0;
for (;;) {
/*
* if a segment is being unmapped, and the current
* process's done queue is not empty, then every request
* on the doneq with locked resources should be forced
* to release their locks. By moving the doneq request
* to the cleanupq, aio_cleanup() will process the cleanupq,
* and place requests back onto the doneq. All requests
* processed by aio_cleanup() will have their physical
* resources unlocked.
*/
if (aiop->aio_rqclnup) {
aiop->aio_rqclnup = 0;
rqclnup = 1;
}
}
}
/*
* thread should block on the cleanupcv while
* AIO_CLEANUP is set.
*/
continue;
}
/*
* AIO_CLEANUP determines when the cleanup thread
* should be active. This flag is set when
* the cleanup thread is awakened by as_unmap() or
* due to DR operations.
* The flag is cleared when the blocking as_unmap()
* that originally awakened us is allowed to
* complete. as_unmap() blocks when trying to
* unmap a segment that has SOFTLOCKed pages. when
* the segment's pages are all SOFTUNLOCKed,
* as->a_flags & AS_UNMAPWAIT should be zero.
*
* In case of cleanup request by DR, the flag is cleared
* once all the pending aio requests have been processed.
*
* The flag shouldn't be cleared right away if the
* cleanup thread was interrupted because the process
* is doing forkall(). This happens when cv_wait_sig()
* returns zero, because it was awakened by a pokelwps().
* If the process is not exiting, it must be doing forkall().
*/
if ((poked == 0) &&
(aiop->aio_pending == 0))) {
rqclnup = 0;
}
if (poked) {
/*
* immediately without waiting for pending I/O's
* and releasing the page locks.
*/
/*
* If exit_flag is set, then it is
* safe to exit because we have released
* page locks of completed I/O's.
*/
if (exit_flag)
break;
/*
* Wait for all the pending aio to complete.
*/
while (aiop->aio_pending != 0)
exit_flag = 1;
continue;
} else if (p->p_flag &
/*
* hold LWP until it
* is continued.
*/
mutex_enter(&p->p_lock);
mutex_exit(&p->p_lock);
poked = 0;
continue;
}
} else {
/*
* When started this thread will sleep on as->a_cv.
* as_unmap will awake this thread if the
* segment has SOFTLOCKed pages (poked = 0).
* 1. pokelwps() awakes this thread =>
* break the loop to check SEXITLWPS, SHOLDFORK, etc
* 2. as_unmap awakes this thread =>
* to break the loop it is necessary that
* - AS_UNMAPWAIT is set (as_unmap is waiting for
* memory to be unlocked)
* - AIO_CLEANUP is not set
* (if AIO_CLEANUP is set we have to wait for
* pending requests. aio_done will send a signal
* for every request which completes to continue
* unmapping the corresponding address range)
* 3. A cleanup request will wake this thread up, ex.
* by the DR operations. The aio_rqclnup flag will
* be set.
*/
while (poked == 0) {
/*
* The clean up requests that came in
* after we had just cleaned up, couldn't
* be causing the unmap thread to block - as
* unmap event happened first.
* Let aio_done() wake us up if it sees a need.
*/
if (aiop->aio_rqclnup &&
break;
if (AS_ISUNMAPWAIT(as) == 0)
if (aiop->aio_outstanding != 0)
break;
}
}
}
exit:
return (0);
}
/*
* save a reference to a user's outstanding aio in a hash list.
*/
static int
{
long index;
return (DUPLICATE);
}
return (0);
}
static int
cred_t *)
{
int (*aio_func)();
/*
* return NULL for requests to files and STREAMs so
* that libaio takes care of them.
*/
/* no stream device for kaio */
if (STREAMSTAB(major)) {
return (NULL);
}
} else {
return (NULL);
}
/*
* Check old drivers which do not have async I/O entry points.
*/
return (NULL);
return (NULL);
/*
* Check whether this device is a block device.
* Kaio is not supported for devices like tty.
*/
return (NULL);
/*
* Clustering: If vnode is a PXFS vnode, then the device may be remote.
* We cannot call the driver directly. Instead return the
* PXFS functions.
*/
return (clpxfs_aio_read);
else
return (clpxfs_aio_write);
}
else
/*
* Do we need this ?
* nodev returns ENXIO anyway.
*/
return (NULL);
return (aio_func);
}
/*
* Clustering: We want check_vp to return a function prototyped
* correctly that will be common to both PXFS and regular case.
* We define this intermediate function that will do the right
* thing for driver cases.
*/
static int
{
}
/*
* Clustering: We want check_vp to return a function prototyped
* correctly that will be common to both PXFS and regular case.
* We define this intermediate function that will do the right
* thing for driver cases.
*/
static int
{
}
/*
* This routine is called when a largefile call is made by a 32bit
* process on a ILP32 or LP64 kernel. All 64bit processes are large
* file by definition and will call alio() instead.
*/
static int
int mode_arg,
void *aiocb_arg,
int nent,
void *sigev)
{
int prev_mode = -1;
#ifdef _LP64
#endif
struct sigevent32 sigevk;
int (*aio_func)();
int mode;
int error = 0;
int aio_errors = 0;
int i;
int deadhead = 0;
int aio_notsupported = 0;
int lio_head_port;
int aio_port;
int aio_thread;
int portused = 0;
int event;
return (EINVAL);
return (EFAULT);
}
/* Event Ports */
if (sigev &&
} else if (copyin(
return (EFAULT);
}
if (error) {
else
return (error);
}
portused = 1;
}
/*
* a list head should be allocated if notification is
* enabled for this list.
*/
if (error)
goto done;
deadhead = 1;
goto done;
}
} else {
}
if (pkevtp) {
/*
* Prepare data to send when list of aiocb's
* has completed.
*/
}
}
/* skip entry if it can't be copied. */
if (head) {
head->lio_refcnt--;
}
continue;
}
/* skip if opcode for aiocb is LIO_NOP */
if (head) {
head->lio_refcnt--;
}
continue;
}
/* increment file descriptor's ref count. */
if (head) {
head->lio_refcnt--;
}
aio_errors++;
continue;
}
/*
* check the permission of the partition
*/
if (head) {
head->lio_refcnt--;
}
aio_errors++;
continue;
}
/*
* common case where requests are to the same fd
* for the same r/w operation
* for UFS, need to set EBADFD
*/
if (head) {
head->lio_refcnt--;
}
continue;
} else {
}
}
#ifdef _LP64
#else
#endif /* _LP64 */
if (error) {
if (head) {
head->lio_refcnt--;
}
aio_errors++;
continue;
}
deadhead = 0;
/*
* Set the errno field now before sending the request to
* the driver to avoid a race condition
*/
if (aio_port | aio_thread) {
/*
* Prepare data to send with each aiocb completed.
*/
if (aio_port) {
} else { /* aio_thread */
}
if (error)
/* EMPTY */;
else
&lpkevp);
if (error == 0) {
}
}
/*
* send the request to driver.
*/
if (error == 0) {
if (aiocb->aio_nbytes == 0) {
continue;
}
CRED());
}
/*
* the fd's ref count is not decremented until the IO has
* completed unless there was an error.
*/
if (error) {
if (head) {
head->lio_refcnt--;
}
else
aio_errors++;
} else {
}
}
if (aio_notsupported) {
} else if (aio_errors) {
/*
* return EIO if any request failed
*/
}
while (head->lio_refcnt > 0) {
goto done;
}
}
}
done:
if (deadhead) {
if (head->lio_portkev)
}
return (error);
}
#ifdef _SYSCALL32_IMPL
static void
{
/*
* See comment in sigqueue32() on handling of 32-bit
* sigvals in a 64-bit kernel.
*/
}
#endif
/*
* This function is used only for largefile calls made by
* 32 bit applications.
*/
static int
{
struct sigevent32 *sigev;
int error;
return (EAGAIN);
}
if (sqp)
return (EIO);
}
/*
* get an aio_reqp from the free list or allocate one
* from dynamic memory.
*/
if (sqp)
return (error);
}
aiop->aio_pending++;
aiop->aio_outstanding++;
/*
* initialize aio request.
*/
return (0);
}
/*
* This routine is called when a non largefile call is made by a 32bit
* process on a ILP32 or LP64 kernel.
*/
static int
int mode_arg,
void *aiocb_arg,
int nent,
void *sigev)
{
int prev_mode = -1;
#ifdef _LP64
struct sigevent32 sigevk;
#else
#endif
int (*aio_func)();
int mode;
int error = 0;
int aio_errors = 0;
int i;
int deadhead = 0;
int aio_notsupported = 0;
int lio_head_port;
int aio_port;
int aio_thread;
int portused = 0;
#ifdef _LP64
#else
#endif
int event;
return (EINVAL);
#ifdef _LP64
#else
#endif
return (EFAULT);
}
/* Event Ports */
if (sigev &&
} else if (copyin(
return (EFAULT);
}
if (error) {
else
return (error);
}
portused = 1;
}
/*
* a list head should be allocated if notification is
* enabled for this list.
*/
if (error)
goto done;
deadhead = 1;
goto done;
}
} else {
}
if (pkevtp) {
/*
* Prepare data to send when list of aiocb's has
* completed.
*/
}
}
/* skip entry if it can't be copied. */
#ifdef _LP64
#else
#endif
{
if (head) {
head->lio_refcnt--;
}
continue;
}
#ifdef _LP64
/*
* copy 32 bit structure into 64 bit structure
*/
#endif /* _LP64 */
/* skip if opcode for aiocb is LIO_NOP */
if (head) {
head->lio_refcnt--;
}
continue;
}
/* increment file descriptor's ref count. */
if (head) {
head->lio_refcnt--;
}
aio_errors++;
continue;
}
/*
* check the permission of the partition
*/
if (head) {
head->lio_refcnt--;
}
aio_errors++;
continue;
}
/*
* common case where requests are to the same fd
* for the same r/w operation
* for UFS, need to set EBADFD
*/
if (head) {
head->lio_refcnt--;
}
continue;
} else {
}
}
if (error) {
if (head) {
head->lio_refcnt--;
}
aio_errors++;
continue;
}
deadhead = 0;
/*
* Set the errno field now before sending the request to
* the driver to avoid a race condition
*/
if (aio_port | aio_thread) {
/*
* Prepare data to send with each aiocb completed.
*/
#ifdef _LP64
if (aio_port) {
} else { /* aio_thread */
}
#else
if (aio_port) {
void *paddr =
} else { /* aio_thread */
}
#endif
if (error)
/* EMPTY */;
else
&lpkevp);
if (error == 0) {
}
}
/*
* send the request to driver.
*/
if (error == 0) {
if (aiocb->aio_nbytes == 0) {
continue;
}
CRED());
}
/*
* the fd's ref count is not decremented until the IO has
* completed unless there was an error.
*/
if (error) {
if (head) {
head->lio_refcnt--;
}
else
aio_errors++;
} else {
}
}
if (aio_notsupported) {
} else if (aio_errors) {
/*
* return EIO if any request failed
*/
}
while (head->lio_refcnt > 0) {
goto done;
}
}
}
done:
if (deadhead) {
if (head->lio_portkev)
}
return (error);
}
#ifdef _SYSCALL32_IMPL
void
{
/*
* See comment in sigqueue32() on handling of 32-bit
* sigvals in a 64-bit kernel.
*/
}
#endif /* _SYSCALL32_IMPL */
/*
* aio_port_callback() is called just before the event is retrieved from the
* port. The task of this callback function is to finish the work of the
* transaction for the application, it means :
* - copyout transaction data to the application
* (this thread is running in the right process context)
* - keep trace of the transaction (update of counters).
* - free allocated buffers
* The aiocb pointer is the object element of the port_kevent_t structure.
*
* flag :
* PORT_CALLBACK_DEFAULT : do copyout and free resources
* PORT_CALLBACK_CLOSE : don't do copyout, free resources
*/
/*ARGSUSED*/
int
{
void *resultp;
/* wrong proc !!, can not deliver data here ... */
return (EACCES);
}
return (0);
}
if (flag == PORT_CALLBACK_DEFAULT)
return (0);
}