PDMAsyncCompletionFile.cpp revision 9c745ce211e2da9aa21d3ab0021a9c9fd2876a0e
/* $Id$ */
/** @file
* PDM Async I/O - Transport data asynchronous in R3 using EMT.
*/
/*
* Copyright (C) 2006-2009 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#define RT_STRICT
#include "PDMInternal.h"
#include <iprt/critsect.h>
#include <iprt/semaphore.h>
# include <errno.h>
# include <fcntl.h>
# include <unistd.h>
#endif
#ifdef RT_OS_WINDOWS
# define _WIN32_WINNT 0x0500
# include <windows.h>
# include <winioctl.h>
#endif
#ifdef RT_OS_DARWIN
#endif /* RT_OS_DARWIN */
#ifdef RT_OS_SOLARIS
# include <stropts.h>
#endif /* RT_OS_SOLARIS */
#ifdef RT_OS_FREEBSD
#endif /* RT_OS_FREEBSD */
#include "PDMAsyncCompletionFileInternal.h"
/*******************************************************************************
* Internal Functions *
*******************************************************************************/
#ifdef VBOX_WITH_DEBUGGER
static DECLCALLBACK(int) pdmacEpFileErrorInject(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR pArgs, unsigned cArgs);
static DECLCALLBACK(int) pdmacEpFileDelayInject(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR pArgs, unsigned cArgs);
#endif
/*******************************************************************************
* Global Variables *
*******************************************************************************/
#ifdef VBOX_WITH_DEBUGGER
static const DBGCVARDESC g_aInjectErrorArgs[] =
{
/* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */
};
static const DBGCVARDESC g_aInjectDelayArgs[] =
{
/* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */
};
# endif
/** Command descriptors. */
{
/* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, fFlags, pfnHandler pszSyntax, ....pszDescription */
{ "injecterror", 3, 3, &g_aInjectErrorArgs[0], 3, 0, pdmacEpFileErrorInject, "", "Inject error into I/O subsystem." }
,{ "injectdelay", 3, 3, &g_aInjectDelayArgs[0], 3, 0, pdmacEpFileDelayInject, "", "Inject a delay of a request." }
# endif
};
#endif
/**
* Frees a task.
*
* @returns nothing.
* @param pEndpoint Pointer to the endpoint the segment was for.
* @param pTask The task to free.
*/
{
/* Try the per endpoint cache first. */
{
/* Add it to the list. */
}
else
{
}
}
/**
* Allocates a task segment
*
* @returns Pointer to the new task segment or NULL
* @param pEndpoint Pointer to the endpoint
*/
{
/* Try the small per endpoint cache first. */
{
/* Try the bigger endpoint class cache. */
PPDMASYNCCOMPLETIONEPCLASSFILE pEndpointClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
/*
* Allocate completely new.
* If this fails we return NULL.
*/
sizeof(PDMACTASKFILE),
(void **)&pTask);
if (RT_FAILURE(rc))
}
else
{
/* Grab a free task from the head. */
AssertMsg(pEndpoint->cTasksCached > 0, ("No tasks cached but list contains more than one element\n"));
}
return pTask;
}
{
/*
* Get pending tasks.
*/
/* Reverse the list to process in FIFO order. */
if (pTasks)
{
while (pTask)
{
}
}
return pTasks;
}
{
if (!fWokenUp)
{
int rc = VINF_SUCCESS;
if (fWaitingEventSem)
}
}
static int pdmacFileAioMgrWaitForBlockingEvent(PPDMACEPFILEMGR pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT enmEvent)
{
int rc = VINF_SUCCESS;
/* Wakeup the async I/O manager */
/* Wait for completion. */
ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID);
return rc;
}
{
int rc;
/* Update the assigned I/O manager. */
return rc;
}
static int pdmacFileAioMgrRemoveEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
{
int rc;
return rc;
}
static int pdmacFileAioMgrCloseEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
{
int rc;
return rc;
}
{
int rc;
return rc;
}
{
do
{
return VINF_SUCCESS;
}
{
{
}
else
{
Assert((uint32_t)pTask->DataSeg.cbSeg == pTask->DataSeg.cbSeg && (int32_t)pTask->DataSeg.cbSeg >= 0);
/* The first error will be returned. */
if (RT_FAILURE(rc))
#ifdef VBOX_WITH_DEBUGGER
else
{
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pTaskFile->Core.pEndpoint;
/* Overwrite with injected error code. */
else
if (RT_FAILURE(rc))
}
#endif
{
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pTaskFile->Core.pEndpoint;
/* Check if we should delay completion of the request. */
{
/* Arm the delay. */
return;
}
#endif
/* Check for an expired delay. */
{
}
#endif
}
}
}
{
}
{
|| (enmTransfer == PDMACTASKFILETRANSFER_WRITE));
for (unsigned i = 0; i < cSegments; i++)
{
/* Send it off to the I/O manager. */
}
return VINF_AIO_TASK_PENDING;
}
/**
* Creates a new async I/O manager.
*
* @returns VBox status code.
* @param pEpClass Pointer to the endpoint class data.
* @param ppAioMgr Where to store the pointer to the new async I/O manager on success.
* @param enmMgrType Wanted manager type - can be overwritten by the global override.
*/
{
int rc = VINF_SUCCESS;
LogFlowFunc((": Entered\n"));
rc = MMR3HeapAllocZEx(pEpClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION, sizeof(PDMACEPFILEMGR), (void **)&pAioMgrNew);
if (RT_SUCCESS(rc))
{
else
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
/* Init the rest of the manager. */
if (RT_SUCCESS(rc))
{
0,
0,
? "F"
: "N");
if (RT_SUCCESS(rc))
{
/* Link it into the list. */
if (pEpClass->pAioMgrHead)
*ppAioMgr = pAioMgrNew;
return VINF_SUCCESS;
}
}
}
}
}
}
return rc;
}
/**
* Destroys a async I/O manager.
*
* @returns nothing.
* @param pAioMgr The async I/O manager to destroy.
*/
static void pdmacFileAioMgrDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile, PPDMACEPFILEMGR pAioMgr)
{
/* Unlink from the list. */
if (pPrev)
else
if (pNext)
pEpClassFile->cAioMgrs--;
/* Free the resources. */
}
{
int rc = VINF_SUCCESS;
else
return rc;
}
{
if (enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE)
return "Simple";
if (enmMgrType == PDMACEPFILEMGRTYPE_ASYNC)
return "Async";
return NULL;
}
{
int rc = VINF_SUCCESS;
else
return rc;
}
{
return "Buffered";
return "NonBuffered";
return NULL;
}
/**
* Get the size of the given file.
* Works for block devices too.
*
* @returns VBox status code.
* @param hFile The file handle.
* @param pcbSize Where to store the size of the file on success.
*/
{
int rc = VINF_SUCCESS;
else
{
#ifdef RT_OS_WINDOWS
{
{
{
/* IOCTL_DISK_GET_LENGTH_INFO is supported -- override cbSize. */
}
rc = VINF_SUCCESS;
}
else
{
}
}
else
#elif defined(RT_OS_DARWIN)
{
{
else
}
else
}
else
#elif defined(RT_OS_SOLARIS)
{
else
}
else
#elif defined(RT_OS_FREEBSD)
{
{
}
else
}
else
#else
/* Could be a block device */
#endif
else if (RT_SUCCESS(rc))
}
return rc;
}
#ifdef VBOX_WITH_DEBUGGER
/**
* Error inject callback.
*/
static DECLCALLBACK(int) pdmacEpFileErrorInject(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR pArgs, unsigned cArgs)
{
/*
* Validate input.
*/
pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pVM->pUVM->pdm.s.apAsyncCompletionEndpointClass[PDMASYNCCOMPLETIONEPCLASSTYPE_FILE];
/* Syntax is "read|write <filename> <status code>" */
bool fWrite;
fWrite = false;
fWrite = true;
else
return DBGCCmdHlpFail(pCmdHlp, pCmd, "The status code '%lld' is out of range", pArgs[0].u.u64Number);
/*
* Search for the matching endpoint.
*/
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpClassFile->Core.pEndpointsHead;
while (pEpFile)
{
break;
}
if (pEpFile)
{
/*
* Do the job.
*/
if (fWrite)
else
}
if (!pEpFile)
return VINF_SUCCESS;
}
/**
* Delay inject callback.
*/
static DECLCALLBACK(int) pdmacEpFileDelayInject(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR pArgs, unsigned cArgs)
{
/*
* Validate input.
*/
pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pVM->pUVM->pdm.s.apAsyncCompletionEndpointClass[PDMASYNCCOMPLETIONEPCLASSTYPE_FILE];
/* Syntax is "read|write <filename> <status code>" */
bool fWrite;
fWrite = false;
fWrite = true;
else
/*
* Search for the matching endpoint.
*/
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpClassFile->Core.pEndpointsHead;
while (pEpFile)
{
break;
}
if (pEpFile)
{
if (fXchg)
else
}
if (!pEpFile)
return VINF_SUCCESS;
}
# endif /* PDM_ASYNC_COMPLETION_FILE_WITH_DELAY */
#endif /* VBOX_WITH_DEBUGGER */
{
int rc = VINF_SUCCESS;
#ifdef DEBUG
#endif
if (RT_FAILURE(rc))
{
LogRel(("AIO: Async I/O manager not supported (rc=%Rrc). Falling back to simple manager\n",
rc));
}
else
{
pEpClassFile->uBitmaskAlignment = AioLimits.cbBufferAlignment ? ~((RTR3UINTPTR)AioLimits.cbBufferAlignment - 1) : RTR3UINTPTR_MAX;
if (pCfgNode)
{
/* Query the default manager type */
if (RT_FAILURE(rc))
return rc;
LogRel(("AIOMgr: Default manager type is \"%s\"\n", pdmacFileMgrTypeToName(pEpClassFile->enmMgrTypeOverride)));
/* Query default backend type */
if (RT_FAILURE(rc))
return rc;
LogRel(("AIOMgr: Default file backend is \"%s\"\n", pdmacFileBackendTypeToName(pEpClassFile->enmEpBackendDefault)));
#ifdef RT_OS_LINUX
{
LogRel(("AIOMgr: Linux does not support buffered async I/O, changing to non buffered\n"));
}
#endif
}
else
{
/* No configuration supplied, set defaults */
}
}
/* Init critical section. */
#ifdef VBOX_WITH_DEBUGGER
/* Install the error injection handler. */
if (RT_SUCCESS(rc))
{
}
#endif
return rc;
}
{
/* All endpoints should be closed at this point. */
/* Destroy all left async I/O managers. */
while (pEpClassFile->pAioMgrHead)
}
{
int rc = VINF_SUCCESS;
AssertMsgReturn((fFlags & ~(PDMACEP_FILE_FLAGS_READ_ONLY | PDMACEP_FILE_FLAGS_DONT_LOCK | PDMACEP_FILE_FLAGS_HOST_CACHE_ENABLED)) == 0,
("PDMAsyncCompletion: Invalid flag specified\n"), VERR_INVALID_PARAMETER);
unsigned fFileFlags = RTFILE_O_OPEN;
/*
* Revert to the simple manager and the buffered backend if
* the host cache should be enabled.
*/
{
}
else
{
/*
* avoid the lock. Return an error in case caching is enabled
* because this can lead to data corruption.
*/
else
}
if (enmMgrType == PDMACEPFILEMGRTYPE_ASYNC)
{
/*
* We only disable the cache if the size of the file is a multiple of 512.
* Certain hosts like Windows, Linux and Solaris require that transfer sizes
* are aligned to the volume sector size.
* If not we just make sure that the data is written to disk with RTFILE_O_WRITE_THROUGH
* which will trash the host cache but ensures that the host cache will not
* contain dirty buffers.
*/
if (RT_SUCCESS(rc))
{
else
{
/* Downgrade to the buffered backend */
#ifdef RT_OS_LINUX
#endif
}
}
}
/* Open with final flags. */
{
LogRel(("pdmacFileEpInitialize: RTFileOpen %s / %08x failed with %Rrc\n",
/*
* Solaris doesn't support directio on ZFS so far. :-\
* Trying to enable it returns VERR_INVALID_FUNCTION
* (ENOTTY). Remove it and hope for the best.
* ZFS supports write throttling in case applications
* write more data than can be synced to the disk
* without blocking the whole application.
*
* On Linux we have the same problem with cifs.
* Have to disable async I/O here too because it requires O_DIRECT.
*/
#ifdef RT_OS_LINUX
#endif
/* Open again. */
if (RT_FAILURE(rc))
{
LogRel(("pdmacFileEpInitialize: RTFileOpen %s / %08x failed AGAIN(!) with %Rrc\n",
}
}
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
/* Initialize the segment cache */
sizeof(PDMACTASKFILE),
(void **)&pEpFile->pTasksFreeHead);
if (RT_SUCCESS(rc))
{
pEpFile->cTasksCached = 0;
/*
* Disable async flushes on Solaris for now.
* They cause weird hangs which needs more investigations.
*/
#ifndef RT_OS_SOLARIS
pEpFile->fAsyncFlushSupported = true;
#else
pEpFile->fAsyncFlushSupported = false;
#endif
if (enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE)
{
/* Simple mode. Every file has its own async I/O manager. */
}
else
{
/* Check for an idling manager of the same type */
while (pAioMgr)
{
break;
}
if (!pAioMgr)
{
}
}
rc = VERR_NO_MEMORY;
else
{
/* Assign the endpoint to the thread. */
if (RT_FAILURE(rc))
{
}
}
}
}
if (RT_FAILURE(rc))
}
#ifdef VBOX_WITH_STATISTICS
if (RT_SUCCESS(rc))
{
STAMUNIT_TICKS_PER_CALL, "Time taken to read from the endpoint",
STAMUNIT_TICKS_PER_CALL, "Time taken to write to the endpoint",
}
#endif
if (RT_SUCCESS(rc))
LogRel(("AIOMgr: Endpoint for file '%s' (flags %08x) created successfully\n", pszUri, pEpFile->fFlags));
return rc;
}
{
AssertMsgFailed(("The locked ranges tree should be empty at that point\n"));
return VINF_SUCCESS;
}
{
int rc = VINF_SUCCESS;
/* Make sure that all tasks finished for this endpoint. */
/*
* If the async I/O manager is in failsafe mode this is the only endpoint
* he processes and thus can be destroyed now.
*/
/* Free cached tasks. */
while (pTask)
{
}
/* Destroy the locked ranges tree now. */
#ifdef VBOX_WITH_STATISTICS
#endif
return VINF_SUCCESS;
}
{
int rc = VINF_SUCCESS;
LogFlowFunc(("pTask=%#p pEndpoint=%#p off=%RTfoff paSegments=%#p cSegments=%zu cbRead=%zu\n",
{
}
else
return rc;
}
{
int rc = VINF_SUCCESS;
return VERR_NOT_SUPPORTED;
return rc;
}
{
return VERR_NOT_SUPPORTED;
pdmacFileEpTaskInit(pTask, 0);
if (RT_UNLIKELY(!pIoTask))
return VERR_NO_MEMORY;
return VINF_AIO_TASK_PENDING;
}
{
return VINF_SUCCESS;
}
{
}
{
/* u32Version */
/* pcszName */
"File",
/* enmClassType */
/* cbEndpointClassGlobal */
sizeof(PDMASYNCCOMPLETIONEPCLASSFILE),
/* cbEndpoint */
sizeof(PDMASYNCCOMPLETIONENDPOINTFILE),
/* cbTask */
sizeof(PDMASYNCCOMPLETIONTASKFILE),
/* pfnInitialize */
/* pfnTerminate */
/* pfnEpInitialize. */
/* pfnEpClose */
/* pfnEpRead */
/* pfnEpWrite */
/* pfnEpFlush */
/* pfnEpGetSize */
/* pfnEpSetSize */
/* u32VersionEnd */
};