fileaio-posix.cpp revision 571c90a734400801da973f986190fac9fc5efd0d
/* $Id$ */
/** @file
* IPRT - File async I/O, native implementation for POSIX compliant host platforms.
*/
/*
* Copyright (C) 2006-2007 Sun Microsystems, Inc.
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 USA or visit http://www.sun.com if you need
* additional information or have any questions.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#define LOG_GROUP RTLOGGROUP_DIR
#include <iprt/semaphore.h>
#if defined(RT_OS_DARWIN)
#endif
#include <aio.h>
#include <errno.h>
#include <time.h>
/*
* Linux does not define this value.
* Just define it with really big
* value.
*/
#ifndef AIO_LISTIO_MAX
# define AIO_LISTIO_MAX UINT32_MAX
#endif
/*******************************************************************************
* Structures and Typedefs *
*******************************************************************************/
/**
* Async I/O request state.
*/
typedef struct RTFILEAIOREQINTERNAL
{
/** The aio control block. FIRST ELEMENT! */
/** Next element in the chain. */
struct RTFILEAIOREQINTERNAL *pNext;
/** Previous element in the chain. */
struct RTFILEAIOREQINTERNAL *pPrev;
/** Current state the request is in. */
/** Flag whether this is a flush request. */
bool fFlush;
/** Flag indicating if the request was canceled. */
volatile bool fCanceled;
/** Opaque user data. */
void *pvUser;
/** Number of bytes actually transfered. */
/** Status code. */
int Rc;
/** Completion context we are assigned to. */
struct RTFILEAIOCTXINTERNAL *pCtxInt;
/** Entry in the waiting list the request is in. */
unsigned iWaitingList;
/** Magic value (RTFILEAIOREQ_MAGIC). */
/**
* Async I/O completion context state.
*/
typedef struct RTFILEAIOCTXINTERNAL
{
/** Current number of requests active on this context. */
/** Maximum number of requests this context can handle. */
/** The ID of the thread which is currently waiting for requests. */
volatile RTTHREAD hThreadWait;
/** Flag whether the thread was woken up. */
volatile bool fWokenUp;
/** Flag whether the thread is currently waiting in the syscall. */
volatile bool fWaiting;
/** Magic value (RTFILEAIOCTX_MAGIC). */
/** Flag whether the thread was woken up due to a internal event. */
volatile bool fWokenUpInternal;
/** List of new requests which needs to be inserted into apReqs by the
* waiting thread. */
/** Special entry for requests which are canceled. Because only one
* request can be canceled at a time and the thread canceling the request
* has to wait we need only one entry. */
volatile PRTFILEAIOREQINTERNAL pReqToCancel;
/** Event semaphore the canceling thread is waiting for completion of
* the operation. */
/** Number of elements in the waiting list. */
unsigned cReqsWait;
/** First free slot in the waiting list. */
unsigned iFirstFree;
/** List of requests we are currently waiting on.
* Size depends on cMaxRequests. */
/**
* Internal worker for waking up the waiting thread.
*/
{
/*
* Read the thread handle before the status flag.
* If we read the handle after the flag we might
* end up with an invalid handle because the thread
* waiting in RTFileAioCtxWakeup() might get scheduled
* before we read the flag and returns.
* We can ensure that the handle is valid if fWaiting is true
* when reading the handle before the status flag.
*/
if (fWaiting)
{
/*
* If a thread waits the handle must be valid.
* It is possible that the thread returns from
* aio_suspend() before the signal is send.
* This is no problem because we already set fWokenUp
* to true which will let the thread return VERR_INTERRUPTED
* and the next call to RTFileAioCtxWait() will not
* return VERR_INTERRUPTED because signals are not saved
* and will simply vanish if the destination thread can't
* receive it.
*/
}
}
/**
* Internal worker processing events and inserting new requests into the waiting list.
*/
{
int rc = VINF_SUCCESS;
/* Process new requests first. */
if (fWokenUp)
{
{
PRTFILEAIOREQINTERNAL pReqHead = (PRTFILEAIOREQINTERNAL)ASMAtomicXchgPtr((void* volatile*)&pCtxInt->apReqsNewHead[iSlot],
NULL);
while (pReqHead)
{
/* Clear pointer to next and previous element just for safety. */
pCtxInt->iFirstFree++;
}
}
/* Check if a request needs to be canceled. */
PRTFILEAIOREQINTERNAL pReqToCancel = (PRTFILEAIOREQINTERNAL)ASMAtomicReadPtr((void* volatile*)&pCtxInt->pReqToCancel);
if (pReqToCancel)
{
/* Put it out of the waiting list. */
}
}
else
{
}
return rc;
}
{
int rcBSD = 0;
#if defined(RT_OS_DARWIN)
int cReqsOutstandingMax = 0;
size_t cbParameter = sizeof(int);
&cReqsOutstandingMax, /* Where to store the old value. */
&cbParameter, /* Size of the memory pointed to. */
NULL, /* Where the new value is located. */
NULL); /* Where the size of the new value is stored. */
if (rcBSD == -1)
return RTErrConvertFromErrno(errno);
pAioLimits->cbBufferAlignment = 0;
#else
pAioLimits->cbBufferAlignment = 0;
#endif
return VINF_SUCCESS;
}
{
if (RT_UNLIKELY(!pReqInt))
return VERR_NO_MEMORY;
return VINF_SUCCESS;
}
{
/*
* Validate the handle and ignore nil.
*/
if (hReq == NIL_RTFILEAIOREQ)
return VINF_SUCCESS;
/*
* Trash the magic and free it.
*/
return VINF_SUCCESS;
}
/**
* Worker setting up the request.
*/
unsigned uTransferDirection,
void *pvUser)
{
/*
* Validate the input.
*/
Assert(cbTransfer > 0);
return VINF_SUCCESS;
}
{
}
{
}
{
return VINF_SUCCESS;
}
{
}
{
if (rcPosix == AIO_CANCELED)
{
/*
* Notify the waiting thread that the request was canceled.
*/
("Invalid state. Request was canceled but wasn't submitted\n"));
/* Wait for acknowledge. */
return VINF_SUCCESS;
}
else if (rcPosix == AIO_ALLDONE)
return VERR_FILE_AIO_COMPLETED;
else if (rcPosix == AIO_NOTCANCELED)
return VERR_FILE_AIO_IN_PROGRESS;
else
return RTErrConvertFromErrno(errno);
}
{
&& (pcbTransfered))
}
{
+ cAioReqsMax * sizeof(PRTFILEAIOREQINTERNAL));
if (RT_UNLIKELY(!pCtxInt))
return VERR_NO_MEMORY;
/* Create event semaphore. */
if (RT_FAILURE(rc))
{
return rc;
}
return VINF_SUCCESS;
}
{
return VERR_FILE_AIO_BUSY;
return VINF_SUCCESS;
}
{
if (hAioCtx == NIL_RTFILEAIOCTX)
return RTFILEAIO_UNLIMITED_REQS;
else
return pCtxInt->cMaxRequests;
}
{
return VINF_SUCCESS;
}
{
int rc = VINF_SUCCESS;
/* Parameter checks */
/* Check that we don't exceed the limit */
return VERR_FILE_AIO_LIMIT_EXCEEDED;
do
{
int rcPosix = 0;
size_t cReqsSubmit = 0;
size_t i = 0;
while ( (i < cReqs)
&& (i < AIO_LISTIO_MAX))
{
{
/* Undo everything and stop submitting. */
{
/* Unlink from the list again. */
if (pNext)
if (pPrev)
else
}
break;
}
/* Link them together. */
if (pHead)
break;
cReqsSubmit++;
i++;
}
if (cReqsSubmit)
{
if (RT_UNLIKELY(rcPosix < 0))
{
else
/* Check which ones were not submitted. */
for (i = 0; i < cReqs; i++)
{
if (rcPosix != EINPROGRESS)
{
{
/* Was not submitted. */
}
else
{
/* An error occurred. */
pReqInt->cbTransfered = 0;
}
/* Unlink from the list. */
if (pNext)
if (pPrev)
else
}
}
break;
}
cReqs -= cReqsSubmit;
pahReqs += cReqsSubmit;
}
/* Check if we have a flush request now. */
if (cReqs)
{
/*
* lio_listio does not work with flush requests so
* we have to use aio_fsync directly.
*/
if (RT_UNLIKELY(rcPosix < 0))
{
pReqInt->cbTransfered = 0;
/* Unlink from the list. */
if (pNext)
if (pPrev)
else
break;
}
cReqs--;
pahReqs++;
}
} while (cReqs);
if (pHead)
{
/*
* Forward successfully submitted requests to the thread waiting for requests.
* We search for a free slot first and if we don't find one
* we will grab the first one and append our list to the existing entries.
*/
unsigned iSlot = 0;
iSlot++;
{
/* Nothing found. */
PRTFILEAIOREQINTERNAL pOldHead = (PRTFILEAIOREQINTERNAL)ASMAtomicXchgPtr((void * volatile *)&pCtxInt->apReqsNewHead[0],
NULL);
/* Find the end of the current head and link the old list to the current. */
}
/* Set the internal wakeup flag and wakeup the thread if possible. */
if (!fWokenUp)
}
return rc;
}
{
int rc = VINF_SUCCESS;
int cRequestsCompleted = 0;
uint64_t StartNanoTS = 0;
/* Check parameters. */
return VERR_FILE_AIO_NO_REQUEST;
if (cMillisTimeout != RT_INDEFINITE_WAIT)
{
StartNanoTS = RTTimeNanoTS();
}
/* Wait for at least one. */
if (!cMinReqs)
cMinReqs = 1;
/* For the wakeup call. */
/* Update the waiting list once before we enter the loop. */
while ( cMinReqs
&& RT_SUCCESS_NP(rc))
{
if (rcPosix < 0)
{
/* Check that this is an external wakeup event. */
else
}
else
{
/* Requests finished. */
unsigned iReqCurr = 0;
int cDone = 0;
/* Remove completed requests from the waiting list. */
{
if (rcReq != EINPROGRESS)
{
/* Completed store the return code. */
if (rcReq == 0)
{
/* Call aio_return() to free ressources. */
}
else
/* Mark the request as finished. */
cDone++;
/*
* Move the last entry into the current position to avoid holes
* but only if it is not the last element already.
*/
{
}
else
pCtxInt->iFirstFree--;
/* Put the request into the completed list. */
}
else
iReqCurr++;
}
{
/* Recalculate the timeout. */
}
/* Check for new elements. */
}
}
return rc;
}
{
/** @todo r=bird: Define the protocol for how to resume work after calling
* this function. */
if (!fWokenUp)
return VINF_SUCCESS;
}