/* $Id$ */
/** @file
* PDM Block Cache.
*/
/*
* Copyright (C) 2006-2011 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* you can redistribute it and/or modify it under the terms of the GNU
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*/
#ifndef ___PDMBlkCacheInternal_h
#define ___PDMBlkCacheInternal_h
#include <VBox/vmm/cfgm.h>
#include <VBox/vmm/stam.h>
#include <VBox/vmm/tm.h>
#include <VBox/vmm/pdmblkcache.h>
#include <iprt/types.h>
#include <iprt/file.h>
#include <iprt/thread.h>
#include <iprt/semaphore.h>
#include <iprt/critsect.h>
#include <iprt/avl.h>
#include <iprt/list.h>
#include <iprt/spinlock.h>
#include <iprt/memcache.h>
RT_C_DECLS_BEGIN
/**
* A few forward declarations.
*/
/** Pointer to a cache LRU list. */
typedef struct PDMBLKLRULIST *PPDMBLKLRULIST;
/** Pointer to the global cache structure. */
typedef struct PDMBLKCACHEGLOBAL *PPDMBLKCACHEGLOBAL;
/** Pointer to a cache entry waiter structure. */
typedef struct PDMBLKCACHEWAITER *PPDMBLKCACHEWAITER;
/**
* A cache entry
*/
typedef struct PDMBLKCACHEENTRY
{
/** The AVL entry data. */
AVLRU64NODECORE Core;
/** Pointer to the previous element. Used in one of the LRU lists.*/
struct PDMBLKCACHEENTRY *pPrev;
/** Pointer to the next element. Used in one of the LRU lists.*/
struct PDMBLKCACHEENTRY *pNext;
/** Pointer to the list the entry is in. */
PPDMBLKLRULIST pList;
/** Cache the entry belongs to. */
PPDMBLKCACHE pBlkCache;
/** Flags for this entry. Combinations of PDMACFILECACHE_* #defines */
volatile uint32_t fFlags;
/** Reference counter. Prevents eviction of the entry if > 0. */
volatile uint32_t cRefs;
/** Size of the entry. */
uint32_t cbData;
/** Pointer to the memory containing the data. */
uint8_t *pbData;
/** Head of list of tasks waiting for this one to finish. */
PPDMBLKCACHEWAITER pWaitingHead;
/** Tail of list of tasks waiting for this one to finish. */
PPDMBLKCACHEWAITER pWaitingTail;
/** Node for dirty but not yet committed entries list per endpoint. */
RTLISTNODE NodeNotCommitted;
} PDMBLKCACHEENTRY, *PPDMBLKCACHEENTRY;
/** I/O is still in progress for this entry. This entry is not evictable. */
#define PDMBLKCACHE_ENTRY_IO_IN_PROGRESS RT_BIT(0)
/** Entry is locked and thus not evictable. */
#define PDMBLKCACHE_ENTRY_LOCKED RT_BIT(1)
/** Entry is dirty */
#define PDMBLKCACHE_ENTRY_IS_DIRTY RT_BIT(2)
/** Entry is not evictable. */
#define PDMBLKCACHE_NOT_EVICTABLE (PDMBLKCACHE_ENTRY_LOCKED | PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)
/**
* LRU list data
*/
typedef struct PDMBLKLRULIST
{
/** Head of the list. */
PPDMBLKCACHEENTRY pHead;
/** Tail of the list. */
PPDMBLKCACHEENTRY pTail;
/** Number of bytes cached in the list. */
uint32_t cbCached;
} PDMBLKLRULIST;
/**
* Global cache data.
*/
typedef struct PDMBLKCACHEGLOBAL
{
/** Pointer to the owning VM instance. */
PVM pVM;
/** Maximum size of the cache in bytes. */
uint32_t cbMax;
/** Current size of the cache in bytes. */
uint32_t cbCached;
/** Critical section protecting the cache. */
RTCRITSECT CritSect;
/** Maximum number of bytes cached. */
uint32_t cbRecentlyUsedInMax;
/** Maximum number of bytes in the paged out list .*/
uint32_t cbRecentlyUsedOutMax;
/** Recently used cache entries list */
PDMBLKLRULIST LruRecentlyUsedIn;
/** Scorecard cache entry list. */
PDMBLKLRULIST LruRecentlyUsedOut;
/** List of frequently used cache entries */
PDMBLKLRULIST LruFrequentlyUsed;
/** Commit timeout in milli seconds */
uint32_t u32CommitTimeoutMs;
/** Number of dirty bytes needed to start a commit of the data to the disk. */
uint32_t cbCommitDirtyThreshold;
/** Current number of dirty bytes in the cache. */
volatile uint32_t cbDirty;
/** Flag whether the VM was suspended becaus of an I/O error. */
volatile bool fIoErrorVmSuspended;
/** Flag whether a commit is currently in progress. */
volatile bool fCommitInProgress;
/** Commit interval timer */
PTMTIMERR3 pTimerCommit;
/** Number of endpoints using the cache. */
uint32_t cRefs;
/** List of all users of this cache. */
RTLISTANCHOR ListUsers;
#ifdef VBOX_WITH_STATISTICS
/** Hit counter. */
STAMCOUNTER cHits;
/** Partial hit counter. */
STAMCOUNTER cPartialHits;
/** Miss counter. */
STAMCOUNTER cMisses;
/** Bytes read from cache. */
STAMCOUNTER StatRead;
/** Bytes written to the cache. */
STAMCOUNTER StatWritten;
/** Time spend to get an entry in the AVL tree. */
STAMPROFILEADV StatTreeGet;
/** Time spend to insert an entry in the AVL tree. */
STAMPROFILEADV StatTreeInsert;
/** Time spend to remove an entry in the AVL tree. */
STAMPROFILEADV StatTreeRemove;
/** Number of times a buffer could be reused. */
STAMCOUNTER StatBuffersReused;
#endif
} PDMBLKCACHEGLOBAL;
#ifdef VBOX_WITH_STATISTICS
AssertCompileMemberAlignment(PDMBLKCACHEGLOBAL, cHits, sizeof(uint64_t));
#endif
/**
* Block cache type.
*/
typedef enum PDMBLKCACHETYPE
{
/** Device . */
PDMBLKCACHETYPE_DEV = 1,
/** Driver consumer. */
PDMBLKCACHETYPE_DRV,
/** Internal consumer. */
PDMBLKCACHETYPE_INTERNAL,
/** Usb consumer. */
PDMBLKCACHETYPE_USB
} PDMBLKCACHETYPE;
/**
* Per user cache data.
*/
typedef struct PDMBLKCACHE
{
/** Pointer to the id for the cache. */
char *pszId;
/** AVL tree managing cache entries. */
PAVLRU64TREE pTree;
/** R/W semaphore protecting cached entries for this endpoint. */
RTSEMRW SemRWEntries;
/** Pointer to the gobal cache data */
PPDMBLKCACHEGLOBAL pCache;
/** Lock protecting the dirty entries list. */
RTSPINLOCK LockList;
/** List of dirty but not committed entries for this endpoint. */
RTLISTANCHOR ListDirtyNotCommitted;
/** Node of the cache user list. */
RTLISTNODE NodeCacheUser;
/** Block cache type. */
PDMBLKCACHETYPE enmType;
/** Type specific data. */
union
{
/** PDMASYNCCOMPLETIONTEMPLATETYPE_DEV */
struct
{
/** Pointer to the device instance owning the block cache. */
R3PTRTYPE(PPDMDEVINS) pDevIns;
/** Complete callback to the user. */
R3PTRTYPE(PFNPDMBLKCACHEXFERCOMPLETEDEV) pfnXferComplete;
/** I/O enqueue callback. */
R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDEV) pfnXferEnqueue;
/** Discard enqueue callback. */
R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV) pfnXferEnqueueDiscard;
} Dev;
/** PDMASYNCCOMPLETIONTEMPLATETYPE_DRV */
struct
{
/** Pointer to the driver instance owning the block cache. */
R3PTRTYPE(PPDMDRVINS) pDrvIns;
/** Complete callback to the user. */
R3PTRTYPE(PFNPDMBLKCACHEXFERCOMPLETEDRV) pfnXferComplete;
/** I/O enqueue callback. */
R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDRV) pfnXferEnqueue;
/** Discard enqueue callback. */
R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV) pfnXferEnqueueDiscard;
} Drv;
/** PDMASYNCCOMPLETIONTEMPLATETYPE_INTERNAL */
struct
{
/** Pointer to user data. */
R3PTRTYPE(void *) pvUser;
/** Complete callback to the user. */
R3PTRTYPE(PFNPDMBLKCACHEXFERCOMPLETEINT) pfnXferComplete;
/** I/O enqueue callback. */
R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEINT) pfnXferEnqueue;
/** Discard enqueue callback. */
R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDISCARDINT) pfnXferEnqueueDiscard;
} Int;
/** PDMASYNCCOMPLETIONTEMPLATETYPE_USB */
struct
{
/** Pointer to the usb instance owning the template. */
R3PTRTYPE(PPDMUSBINS) pUsbIns;
/** Complete callback to the user. */
R3PTRTYPE(PFNPDMBLKCACHEXFERCOMPLETEUSB) pfnXferComplete;
/** I/O enqueue callback. */
R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEUSB) pfnXferEnqueue;
/** Discard enqueue callback. */
R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB) pfnXferEnqueueDiscard;
} Usb;
} u;
#ifdef VBOX_WITH_STATISTICS
#if HC_ARCH_BITS == 64
uint32_t u32Alignment;
#endif
/** Number of times a write was deferred because the cache entry was still in progress */
STAMCOUNTER StatWriteDeferred;
/** Number appended cache entries. */
STAMCOUNTER StatAppendedWrites;
#endif
/** Flag whether the cache was suspended. */
volatile bool fSuspended;
} PDMBLKCACHE, *PPDMBLKCACHE;
#ifdef VBOX_WITH_STATISTICS
AssertCompileMemberAlignment(PDMBLKCACHE, StatWriteDeferred, sizeof(uint64_t));
#endif
/**
* I/O task.
*/
typedef struct PDMBLKCACHEREQ
{
/** Opaque user data returned on completion. */
void *pvUser;
/** Number of pending transfers (waiting for a cache entry and passed through). */
volatile uint32_t cXfersPending;
/** Status code. */
volatile int rcReq;
} PDMBLKCACHEREQ, *PPDMBLKCACHEREQ;
/**
* I/O transfer from the cache to the underlying medium.
*/
typedef struct PDMBLKCACHEIOXFER
{
/** Flag whether the I/O xfer updates a cache entry or updates the request directly. */
bool fIoCache;
/** Type dependent data. */
union
{
/** Pointer to the entry the transfer updates. */
PPDMBLKCACHEENTRY pEntry;
/** Pointer to the request the transfer updates. */
PPDMBLKCACHEREQ pReq;
};
/** Transfer direction. */
PDMBLKCACHEXFERDIR enmXferDir;
/** Segment used if a cache entry is updated. */
RTSGSEG SgSeg;
/** S/G buffer. */
RTSGBUF SgBuf;
} PDMBLKCACHEIOXFER;
/**
* Cache waiter
*/
typedef struct PDMBLKCACHEWAITER
{
/* Next waiter in the list. */
struct PDMBLKCACHEWAITER *pNext;
/** S/G buffer holding or receiving data. */
RTSGBUF SgBuf;
/** Offset into the cache entry to start the transfer. */
uint32_t offCacheEntry;
/** How many bytes to transfer. */
size_t cbTransfer;
/** Flag whether the task wants to read or write into the entry. */
bool fWrite;
/** Task the waiter is for. */
PPDMBLKCACHEREQ pReq;
} PDMBLKCACHEWAITER;
RT_C_DECLS_END
#endif