PDMAsyncCompletionFileInternal.h revision 22ec733a5e041fcdfe02fce2eafc9faf8b0077dd
/* $Id$ */
/** @file
* PDM Async I/O - Transport data asynchronous in R3 using EMT.
*/
/*
* Copyright (C) 2006-2008 Sun Microsystems, Inc.
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 USA or visit http://www.sun.com if you need
* additional information or have any questions.
*/
#include <iprt/semaphore.h>
#include <iprt/critsect.h>
#include <iprt/spinlock.h>
#include <iprt/memcache.h>
#include "PDMAsyncCompletionInternal.h"
/** @todo: Revise the caching of tasks. We have currently four caches:
* Per endpoint task cache
* Per class cache
* Per endpoint task segment cache
* Per class task segment cache
*
* We could use the RT heap for this probably or extend MMR3Heap (uses RTMemAlloc
* instead of managing larger blocks) to have this global for the whole VM.
*/
/**
* A few forward declerations.
*/
typedef struct PDMASYNCCOMPLETIONENDPOINTFILE *PPDMASYNCCOMPLETIONENDPOINTFILE;
/** Pointer to a request segment. */
typedef struct PDMACTASKFILE *PPDMACTASKFILE;
/** Pointer to the endpoint class data. */
typedef struct PDMASYNCCOMPLETIONTASKFILE *PPDMASYNCCOMPLETIONTASKFILE;
/** Pointer to a cache LRU list. */
typedef struct PDMACFILELRULIST *PPDMACFILELRULIST;
/** Pointer to the global cache structure. */
typedef struct PDMACFILECACHEGLOBAL *PPDMACFILECACHEGLOBAL;
/** Pointer to a task segment. */
typedef struct PDMACFILETASKSEG *PPDMACFILETASKSEG;
/**
* Blocking event types.
*/
typedef enum PDMACEPFILEAIOMGRBLOCKINGEVENT
{
/** Invalid tye */
/** An endpoint is added to the manager. */
/** An endpoint is removed from the manager. */
/** An endpoint is about to be closed. */
/** The manager is requested to terminate */
/** The manager is requested to suspend */
/** The manager is requested to resume */
/** 32bit hack */
PDMACEPFILEAIOMGRBLOCKINGEVENT_32BIT_HACK = 0x7fffffff
/**
* I/O manager type.
*/
typedef enum PDMACEPFILEMGRTYPE
{
/** Simple aka failsafe */
/** Async I/O with host cache enabled. */
/** 32bit hack */
PDMACEPFILEMGRTYPE_32BIT_HACK = 0x7fffffff
/** Pointer to a I/O manager type */
typedef PDMACEPFILEMGRTYPE *PPDMACEPFILEMGRTYPE;
/**
* States of the I/O manager.
*/
typedef enum PDMACEPFILEMGRSTATE
{
/** Invalid state. */
/** Normal running state accepting new requests
* and processing them.
*/
/** Fault state - not accepting new tasks for endpoints but waiting for
* remaining ones to finish.
*/
/** Suspending state - not accepting new tasks for endpoints but waiting
* for remaining ones to finish.
*/
/** Shutdown state - not accepting new tasks for endpoints but waiting
* for remaining ones to finish.
*/
/** 32bit hack */
PDMACEPFILEMGRSTATE_32BIT_HACK = 0x7fffffff
/**
* State of a async I/O manager.
*/
typedef struct PDMACEPFILEMGR
{
/** Next Aio manager in the list. */
/** Previous Aio manager in the list. */
/** Manager type */
/** Current state of the manager. */
/** Event semaphore the manager sleeps on when waiting for new requests. */
/** Flag whether the thread waits in the event semaphore. */
volatile bool fWaitingEventSem;
/** Thread data */
/** The async I/O context for this manager. */
/** Flag whether the I/O manager was woken up. */
volatile bool fWokenUp;
/** List of endpoints assigned to this manager. */
/** Number of endpoints assigned to the manager. */
unsigned cEndpoints;
/** Number of requests active currently. */
unsigned cRequestsActive;
/** Pointer to an array of free async I/O request handles. */
/** Next free position for a free request handle. */
unsigned iFreeEntryNext;
/** Position of the next free task handle */
unsigned iFreeReqNext;
/** Size of the array. */
unsigned cReqEntries;
/** Flag whether at least one endpoint reached its bandwidth limit. */
bool fBwLimitReached;
/** Memory cache for file range locks. */
/** Critical section protecting the blocking event handling. */
/** Event sempahore for blocking external events.
* The caller waits on it until the async I/O manager
* finished processing the event. */
/** Flag whether a blocking event is pending and needs
* processing by the I/O manager. */
volatile bool fBlockingEventPending;
/** Blocking event type */
/** Event type data */
union
{
/** Add endpoint event. */
struct
{
/** The endpoint to be added */
volatile PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
} AddEndpoint;
/** Remove endpoint event. */
struct
{
/** The endpoint to be removed */
volatile PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
/** Close endpoint event. */
struct
{
/** The endpoint to be closed */
volatile PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
/** Pointer to a async I/O manager state. */
typedef PDMACEPFILEMGR *PPDMACEPFILEMGR;
/** Pointer to a async I/O manager state pointer. */
typedef PPDMACEPFILEMGR *PPPDMACEPFILEMGR;
/**
* Bandwidth control manager instance data
*/
typedef struct PDMACFILEBWMGR
{
/** Maximum number of bytes the VM is allowed to transfer (Max is 4GB/s) */
/** Number of bytes we start with */
/** Step after each update */
/** Number of bytes we are allowed to transfer till the next update.
* Resetted by the refresh timer. */
volatile uint32_t cbVMTransferAllowed;
/** Timestamp of the last update */
volatile uint64_t tsUpdatedLast;
/** Reference counter - How many endpoints are associated with this manager. */
/** Pointer to a bandwidth control manager */
typedef PDMACFILEBWMGR *PPDMACFILEBWMGR;
/** Pointer to a bandwidth control manager pointer */
typedef PPDMACFILEBWMGR *PPPDMACFILEBWMGR;
/**
* A file access range lock.
*/
typedef struct PDMACFILERANGELOCK
{
/** AVL node in the locked range tree of the endpoint. */
/** How many tasks have locked this range. */
/** Flag whether this is a read or write lock. */
bool fReadLock;
/** List of tasks which are waiting that the range gets unlocked. */
/** List of tasks which are waiting that the range gets unlocked. */
/**
* Data for one request segment waiting for cache entry.
*/
typedef struct PDMACFILETASKSEG
{
/** Next task segment in the list. */
struct PDMACFILETASKSEG *pNext;
/** Task this segment is for. */
/** Offset into the cache entry buffer to start reading from. */
/** Number of bytes to transfer. */
/** Pointer to the buffer. */
void *pvBuf;
/** Flag whether this entry writes data to the cache. */
bool fWrite;
/**
* A cache entry
*/
typedef struct PDMACFILECACHEENTRY
{
/** The AVL entry data. */
/** Pointer to the previous element. Used in one of the LRU lists.*/
struct PDMACFILECACHEENTRY *pPrev;
/** Pointer to the next element. Used in one of the LRU lists.*/
struct PDMACFILECACHEENTRY *pNext;
/** Pointer to the list the entry is in. */
/** Pointer to the global cache structure. */
/** Endpoint the entry belongs to. */
/** Flags for this entry. Combinations of PDMACFILECACHE_* #defines */
/** Reference counter. Prevents eviction of the entry if > 0. */
/** Size of the entry. */
/** Pointer to the memory containing the data. */
/** Head of list of tasks waiting for this one to finish. */
/** Tail of list of tasks waiting for this one to finish. */
/** Node for dirty but not yet committed entries list per endpoint. */
/** I/O is still in progress for this entry. This entry is not evictable. */
#define PDMACFILECACHE_ENTRY_IO_IN_PROGRESS RT_BIT(0)
/** Entry is locked and thus not evictable. */
/** Entry is dirty */
/** Entry is not evictable. */
#define PDMACFILECACHE_NOT_EVICTABLE (PDMACFILECACHE_ENTRY_LOCKED | PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)
/**
* LRU list data
*/
typedef struct PDMACFILELRULIST
{
/** Head of the list. */
/** Tail of the list. */
/** Number of bytes cached in the list. */
/**
* Global cache data.
*/
typedef struct PDMACFILECACHEGLOBAL
{
/** Maximum size of the cache in bytes. */
/** Current size of the cache in bytes. */
/** Critical section protecting the cache. */
/** Maximum number of bytes cached. */
/** Maximum number of bytes in the paged out list .*/
/** Recently used cache entries list */
/** Scorecard cache entry list. */
/** List of frequently used cache entries */
/** Commit timeout in milli seconds */
/** Number of dirty bytes needed to start a commit of the data to the disk. */
/** Current number of dirty bytes in the cache. */
/** Flag whether a commit is currently in progress. */
volatile bool fCommitInProgress;
/** Commit interval timer */
/** Number of endpoints using the cache. */
/** List of all endpoints using this cache. */
#ifdef VBOX_WITH_STATISTICS
/** Hit counter. */
/** Partial hit counter. */
/** Miss counter. */
/** Bytes read from cache. */
/** Bytes written to the cache. */
/** Time spend to get an entry in the AVL tree. */
/** Time spend to insert an entry in the AVL tree. */
/** Time spend to remove an entry in the AVL tree. */
/** Number of times a buffer could be reused. */
#endif
/**
* Per endpoint cache data.
*/
typedef struct PDMACFILEENDPOINTCACHE
{
/** AVL tree managing cache entries. */
/** R/W semaphore protecting cached entries for this endpoint. */
/** Pointer to the gobal cache data */
/** Number of writes outstanding. */
volatile uint32_t cWritesOutstanding;
/** Handle of the flush request if one is active */
volatile PPDMASYNCCOMPLETIONTASKFILE pTaskFlush;
/** Lock protecting the dirty entries list. */
/** List of dirty but not committed entries for this endpoint. */
/** Node of the cache endpoint list. */
#ifdef VBOX_WITH_STATISTICS
/** Number of times a write was deferred because the cache entry was still in progress */
#endif
/**
* Backend type for the endpoint.
*/
typedef enum PDMACFILEEPBACKEND
{
/** Non buffered. */
/** Buffered (i.e host cache enabled) */
/** 32bit hack */
PDMACFILEEPBACKEND_32BIT_HACK = 0x7fffffff
/** Pointer to a backend type. */
typedef PDMACFILEEPBACKEND *PPDMACFILEEPBACKEND;
/**
* Global data for the file endpoint class.
*/
typedef struct PDMASYNCCOMPLETIONEPCLASSFILE
{
/** Common data. */
/** Override I/O manager type - set to SIMPLE after failure. */
/** Default backend type for the endpoint. */
/** Flag whether the file data cache is enabled. */
bool fCacheEnabled;
/** Critical section protecting the list of async I/O managers. */
/** Pointer to the head of the async I/O managers. */
/** Number of async I/O managers currently running. */
unsigned cAioMgrs;
/** Maximum number of segments to cache per endpoint */
unsigned cTasksCacheMax;
/** Maximum number of simultaneous outstandingrequests. */
/** Bitmask for checking the alignment of a buffer. */
/** Global cache data. */
/** Flag whether the out of resources warning was printed already. */
/** The global bandwidth control manager */
/** Pointer to the endpoint class data. */
typedef enum PDMACEPFILEBLOCKINGEVENT
{
/** The invalid event type */
/** A task is about to be canceled */
/** Usual 32bit hack */
PDMACEPFILEBLOCKINGEVENT_32BIT_HACK = 0x7fffffff
/**
* States of the endpoint.
*/
typedef enum PDMASYNCCOMPLETIONENDPOINTFILESTATE
{
/** Invalid state. */
/** Normal running state accepting new requests
* and processing them.
*/
/** The endpoint is about to be closed - not accepting new tasks for endpoints but waiting for
* remaining ones to finish.
*/
/** Removing from current I/O manager state - not processing new tasks for endpoints but waiting
* for remaining ones to finish.
*/
/** The current endpoint will be migrated to another I/O manager. */
/** 32bit hack */
PDMASYNCCOMPLETIONENDPOINTFILESTATE_32BIT_HACK = 0x7fffffff
/**
* Data for the file endpoint.
*/
typedef struct PDMASYNCCOMPLETIONENDPOINTFILE
{
/** Common data. */
/** Current state of the endpoint. */
/** The backend to use for this endpoint. */
/** async I/O manager this endpoint is assigned to. */
/** Flags for opening the file. */
unsigned fFlags;
/** File handle. */
/** Size of the endpoint.
* Updated while data is appended even if it is
* only in the cache yet and not written to the file.
*/
volatile uint64_t cbEndpoint;
/**
* Real size of the file. Only updated if
* data is appended.
*/
/** Flag whether caching is enabled for this file. */
bool fCaching;
/** Flag whether the file was opened readonly. */
bool fReadonly;
/** List of new tasks. */
/** Head of the small cache for allocated task segments for exclusive
* use by this endpoint. */
/** Tail of the small cache for allocated task segments for exclusive
* use by this endpoint. */
/** Number of elements in the cache. */
volatile uint32_t cTasksCached;
/** Cache of endpoint data. */
/** Pointer to the associated bandwidth control manager */
/** Flag whether a flush request is currently active */
/** Event sempahore for blocking external events.
* The caller waits on it until the async I/O manager
* finished processing the event. */
/** Flag whether a blocking event is pending and needs
* processing by the I/O manager. */
bool fBlockingEventPending;
/** Blocking event type */
#ifdef VBOX_WITH_STATISTICS
/** Time spend in a read. */
/** Time spend in a write. */
#endif
/** Additional data needed for the event types. */
union
{
/** Cancelation event. */
struct
{
/** The task to cancel. */
} Cancel;
/** Data for exclusive use by the assigned async I/O manager. */
struct
{
/** Pointer to the next endpoint assigned to the manager. */
/** Pointer to the previous endpoint assigned to the manager. */
/** List of pending requests (not submitted due to usage restrictions
* or a pending flush request) */
/** Tail of pending requests. */
/** Tree of currently locked ranges.
* If a write task is enqueued the range gets locked and any other
* task writing to that range has to wait until the task completes.
*/
/** Number of requests currently being processed for this endpoint
* (excluded flush requests). */
unsigned cRequestsActive;
/** Number of requests processed during the last second. */
unsigned cReqsPerSec;
/** Current number of processed requests for the current update period. */
unsigned cReqsProcessed;
/** Flag whether the endpoint is about to be moved to another manager. */
bool fMoving;
/** Destination I/O manager. */
} AioMgr;
/** Pointer to the endpoint class data. */
/** Request completion function */
/** Pointer to a request completion function. */
typedef FNPDMACTASKCOMPLETED *PFNPDMACTASKCOMPLETED;
/**
* Transfer type.
*/
typedef enum PDMACTASKFILETRANSFER
{
/** Invalid. */
/** Read transfer. */
/** Write transfer. */
/** Flush transfer. */
/**
* Data of a request.
*/
typedef struct PDMACTASKFILE
{
/** Pointer to the range lock we are waiting for */
/** Next task in the list. (Depending on the state) */
struct PDMACTASKFILE *pNext;
/** Endpoint */
/** Transfer type. */
/** Start offset */
/** Data segment. */
/** Flag whether this segment uses a bounce buffer
* because the provided buffer doesn't meet host requirements. */
bool fBounceBuffer;
/** Pointer to the used bounce buffer if any. */
void *pvBounceBuffer;
/** Start offset in the bounce buffer to copy from. */
/** Flag whether this is a prefetch request. */
bool fPrefetch;
/** Completion function to call on completion. */
/** User data */
void *pvUser;
/**
* Per task data.
*/
typedef struct PDMASYNCCOMPLETIONTASKFILE
{
/** Common data. */
/** Number of bytes to transfer until this task completes. */
volatile int32_t cbTransferLeft;
/** Flag whether the task completed. */
volatile bool fCompleted;
/** Return code. */
volatile int rc;
int pdmacFileAioMgrCreate(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass, PPPDMACEPFILEMGR ppAioMgr, PDMACEPFILEMGRTYPE enmMgrType);
int pdmacFileEpCacheInit(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile);
int pdmacFileEpCacheRead(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
int pdmacFileEpCacheWrite(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
int pdmacFileEpCacheFlush(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask);
#endif