PGMAllGst.h revision 87fe4d73d7e6e53fbcec40dc6be2372479851cd4
/* $Id$ */
/** @file
* VBox - Page Manager, Guest Paging Template - All context code.
*/
/*
* Copyright (C) 2006-2007 Sun Microsystems, Inc.
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* you can redistribute it and/or modify it under the terms of the GNU
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 USA or visit http://www.sun.com if you need
* additional information or have any questions.
*/
/*******************************************************************************
* Internal Functions *
*******************************************************************************/
__BEGIN_DECLS
PGM_GST_DECL(int, GetPage)(PVMCPU pVCpu, RTGCPTR GCPtr, uint64_t *pfFlags, PRTGCPHYS pGCPhys);
PGM_GST_DECL(int, ModifyPage)(PVMCPU pVCpu, RTGCPTR GCPtr, size_t cb, uint64_t fFlags, uint64_t fMask);
PGM_GST_DECL(int, GetPDE)(PVMCPU pVCpu, RTGCPTR GCPtr, PX86PDEPAE pPDE);
PGM_GST_DECL(bool, HandlerVirtualUpdate)(PVM pVM, uint32_t cr4);
__END_DECLS
/**
* Gets effective Guest OS page information.
*
* When GCPtr is in a big page, the function will return as if it was a normal
* 4KB page. If the need for distinguishing between big and normal page becomes
* necessary at a later point, a PGMGstGetPage Ex() will be created for that
* purpose.
*
* @returns VBox status.
* @param pVCpu The VMCPU handle.
* @param GCPtr Guest Context virtual address of the page. Page aligned!
* @param pfFlags Where to store the flags. These are X86_PTE_*, even for big pages.
* @param pGCPhys Where to store the GC physical address of the page.
* This is page aligned. The fact that the
*/
PGM_GST_DECL(int, GetPage)(PVMCPU pVCpu, RTGCPTR GCPtr, uint64_t *pfFlags, PRTGCPHYS pGCPhys)
{
#if PGM_GST_TYPE == PGM_TYPE_REAL \
|| PGM_GST_TYPE == PGM_TYPE_PROT
/*
* Fake it.
*/
if (pfFlags)
*pfFlags = X86_PTE_P | X86_PTE_RW | X86_PTE_US;
if (pGCPhys)
*pGCPhys = GCPtr & PAGE_BASE_GC_MASK;
return VINF_SUCCESS;
#elif PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
PVM pVM = pVCpu->CTX_SUFF(pVM);
/*
* Get the PDE.
*/
# if PGM_GST_TYPE == PGM_TYPE_32BIT
X86PDE Pde = pgmGstGet32bitPDE(&pVCpu->pgm.s, GCPtr);
#elif PGM_GST_TYPE == PGM_TYPE_PAE
/* pgmGstGetPaePDE will return 0 if the PDPTE is marked as not present.
* All the other bits in the PDPTE are only valid in long mode (r/w, u/s, nx). */
X86PDEPAE Pde = pgmGstGetPaePDE(&pVCpu->pgm.s, GCPtr);
bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVCpu) & MSR_K6_EFER_NXE);
#elif PGM_GST_TYPE == PGM_TYPE_AMD64
PX86PML4E pPml4e;
X86PDPE Pdpe;
X86PDEPAE Pde = pgmGstGetLongModePDEEx(&pVCpu->pgm.s, GCPtr, &pPml4e, &Pdpe);
bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVCpu) & MSR_K6_EFER_NXE);
Assert(pPml4e);
if (!(pPml4e->n.u1Present & Pdpe.n.u1Present))
return VERR_PAGE_TABLE_NOT_PRESENT;
/* Merge accessed, write, user and no-execute bits into the PDE. */
Pde.n.u1Accessed &= pPml4e->n.u1Accessed & Pdpe.lm.u1Accessed;
Pde.n.u1Write &= pPml4e->n.u1Write & Pdpe.lm.u1Write;
Pde.n.u1User &= pPml4e->n.u1User & Pdpe.lm.u1User;
Pde.n.u1NoExecute &= pPml4e->n.u1NoExecute & Pdpe.lm.u1NoExecute;
# endif
/*
* Lookup the page.
*/
if (!Pde.n.u1Present)
return VERR_PAGE_TABLE_NOT_PRESENT;
if ( !Pde.b.u1Size
# if PGM_GST_TYPE != PGM_TYPE_AMD64
|| !(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE)
# endif
)
{
PGSTPT pPT;
int rc = PGM_GCPHYS_2_PTR(pVM, Pde.u & GST_PDE_PG_MASK, &pPT);
if (RT_FAILURE(rc))
return rc;
/*
* Get PT entry and check presence.
*/
const GSTPTE Pte = pPT->a[(GCPtr >> GST_PT_SHIFT) & GST_PT_MASK];
if (!Pte.n.u1Present)
return VERR_PAGE_NOT_PRESENT;
/*
* Store the result.
* RW and US flags depend on all levels (bitwise AND) - except for legacy PAE
* where the PDPE is simplified.
*/
if (pfFlags)
{
*pfFlags = (Pte.u & ~GST_PTE_PG_MASK)
& ((Pde.u & (X86_PTE_RW | X86_PTE_US)) | ~(uint64_t)(X86_PTE_RW | X86_PTE_US));
# if PGM_WITH_NX(PGM_GST_TYPE, PGM_GST_TYPE)
/* The NX bit is determined by a bitwise OR between the PT and PD */
if (fNoExecuteBitValid)
*pfFlags |= (Pte.u & Pde.u & X86_PTE_PAE_NX);
# endif
}
if (pGCPhys)
*pGCPhys = Pte.u & GST_PTE_PG_MASK;
}
else
{
/*
* Map big to 4k PTE and store the result
*/
if (pfFlags)
{
*pfFlags = (Pde.u & ~(GST_PTE_PG_MASK | X86_PTE_PAT))
| ((Pde.u & X86_PDE4M_PAT) >> X86_PDE4M_PAT_SHIFT);
# if PGM_WITH_NX(PGM_GST_TYPE, PGM_GST_TYPE)
/* The NX bit is determined by a bitwise OR between the PT and PD */
if (fNoExecuteBitValid)
*pfFlags |= (Pde.u & X86_PTE_PAE_NX);
# endif
}
if (pGCPhys)
*pGCPhys = GST_GET_PDE_BIG_PG_GCPHYS(Pde) | (GCPtr & (~GST_PDE_BIG_PG_MASK ^ ~GST_PTE_PG_MASK));
}
return VINF_SUCCESS;
#else
# error "shouldn't be here!"
/* something else... */
return VERR_NOT_SUPPORTED;
#endif
}
/**
* Modify page flags for a range of pages in the guest's tables
*
* The existing flags are ANDed with the fMask and ORed with the fFlags.
*
* @returns VBox status code.
* @param pVCpu The VMCPU handle.
* @param GCPtr Virtual address of the first page in the range. Page aligned!
* @param cb Size (in bytes) of the page range to apply the modification to. Page aligned!
* @param fFlags The OR mask - page flags X86_PTE_*, excluding the page mask of course.
* @param fMask The AND mask - page flags X86_PTE_*.
*/
PGM_GST_DECL(int, ModifyPage)(PVMCPU pVCpu, RTGCPTR GCPtr, size_t cb, uint64_t fFlags, uint64_t fMask)
{
#if PGM_GST_TYPE == PGM_TYPE_32BIT \
|| PGM_GST_TYPE == PGM_TYPE_PAE \
|| PGM_GST_TYPE == PGM_TYPE_AMD64
PVM pVM = pVCpu->CTX_SUFF(pVM);
for (;;)
{
/*
* Get the PD entry.
*/
# if PGM_GST_TYPE == PGM_TYPE_32BIT
PX86PDE pPde = pgmGstGet32bitPDEPtr(&pVCpu->pgm.s, GCPtr);
# elif PGM_GST_TYPE == PGM_TYPE_PAE
/* pgmGstGetPaePDEPtr will return 0 if the PDPTE is marked as not present
* All the other bits in the PDPTE are only valid in long mode (r/w, u/s, nx)
*/
PX86PDEPAE pPde = pgmGstGetPaePDEPtr(&pVCpu->pgm.s, GCPtr);
Assert(pPde);
if (!pPde)
return VERR_PAGE_TABLE_NOT_PRESENT;
# elif PGM_GST_TYPE == PGM_TYPE_AMD64
/** @todo Setting the r/w, u/s & nx bits might have no effect depending on the pdpte & pml4 values */
PX86PDEPAE pPde = pgmGstGetLongModePDEPtr(&pVCpu->pgm.s, GCPtr);
Assert(pPde);
if (!pPde)
return VERR_PAGE_TABLE_NOT_PRESENT;
# endif
GSTPDE Pde = *pPde;
Assert(Pde.n.u1Present);
if (!Pde.n.u1Present)
return VERR_PAGE_TABLE_NOT_PRESENT;
if ( !Pde.b.u1Size
# if PGM_GST_TYPE != PGM_TYPE_AMD64
|| !(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE)
# endif
)
{
/*
* 4KB Page table
*
* Walk page tables and pages till we're done.
*/
PGSTPT pPT;
int rc = PGM_GCPHYS_2_PTR(pVM, Pde.u & GST_PDE_PG_MASK, &pPT);
if (RT_FAILURE(rc))
return rc;
unsigned iPTE = (GCPtr >> GST_PT_SHIFT) & GST_PT_MASK;
while (iPTE < RT_ELEMENTS(pPT->a))
{
GSTPTE Pte = pPT->a[iPTE];
Pte.u = (Pte.u & (fMask | X86_PTE_PAE_PG_MASK))
| (fFlags & ~GST_PTE_PG_MASK);
pPT->a[iPTE] = Pte;
/* next page */
cb -= PAGE_SIZE;
if (!cb)
return VINF_SUCCESS;
GCPtr += PAGE_SIZE;
iPTE++;
}
}
else
{
/*
* 4MB Page table
*/
# if PGM_GST_TYPE == PGM_TYPE_32BIT
Pde.u = (Pde.u & (fMask | ((fMask & X86_PTE_PAT) << X86_PDE4M_PAT_SHIFT) | GST_PDE_BIG_PG_MASK | X86_PDE4M_PG_HIGH_MASK | X86_PDE4M_PS))
# else
Pde.u = (Pde.u & (fMask | ((fMask & X86_PTE_PAT) << X86_PDE4M_PAT_SHIFT) | GST_PDE_BIG_PG_MASK | X86_PDE4M_PS))
# endif
| (fFlags & ~GST_PTE_PG_MASK)
| ((fFlags & X86_PTE_PAT) << X86_PDE4M_PAT_SHIFT);
*pPde = Pde;
/* advance */
const unsigned cbDone = GST_BIG_PAGE_SIZE - (GCPtr & GST_BIG_PAGE_OFFSET_MASK);
if (cbDone >= cb)
return VINF_SUCCESS;
cb -= cbDone;
GCPtr += cbDone;
}
}
#else
/* real / protected mode: ignore. */
return VINF_SUCCESS;
#endif
}
/**
* Retrieve guest PDE information
*
* @returns VBox status code.
* @param pVCpu The VMCPU handle.
* @param GCPtr Guest context pointer
* @param pPDE Pointer to guest PDE structure
*/
PGM_GST_DECL(int, GetPDE)(PVMCPU pVCpu, RTGCPTR GCPtr, PX86PDEPAE pPDE)
{
#if PGM_GST_TYPE == PGM_TYPE_32BIT \
|| PGM_GST_TYPE == PGM_TYPE_PAE \
|| PGM_GST_TYPE == PGM_TYPE_AMD64
# if PGM_GST_TYPE == PGM_TYPE_32BIT
X86PDE Pde = pgmGstGet32bitPDE(&pVCpu->pgm.s, GCPtr);
# elif PGM_GST_TYPE == PGM_TYPE_PAE
X86PDEPAE Pde = pgmGstGetPaePDE(&pVCpu->pgm.s, GCPtr);
# elif PGM_GST_TYPE == PGM_TYPE_AMD64
X86PDEPAE Pde = pgmGstGetLongModePDE(&pVCpu->pgm.s, GCPtr);
# endif
pPDE->u = (X86PGPAEUINT)Pde.u;
return VINF_SUCCESS;
#else
AssertFailed();
return VERR_NOT_IMPLEMENTED;
#endif
}
#if PGM_GST_TYPE == PGM_TYPE_32BIT \
|| PGM_GST_TYPE == PGM_TYPE_PAE \
|| PGM_GST_TYPE == PGM_TYPE_AMD64
/**
* Updates one virtual handler range.
*
* @returns 0
* @param pNode Pointer to a PGMVIRTHANDLER.
* @param pvUser Pointer to a PGMVHUARGS structure (see PGM.cpp).
*/
static DECLCALLBACK(int) PGM_GST_NAME(VirtHandlerUpdateOne)(PAVLROGCPTRNODECORE pNode, void *pvUser)
{
PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)pNode;
PPGMHVUSTATE pState = (PPGMHVUSTATE)pvUser;
PVM pVM = pState->pVM;
PVMCPU pVCpu = pState->pVCpu;
Assert(pCur->enmType != PGMVIRTHANDLERTYPE_HYPERVISOR);
#if PGM_GST_TYPE == PGM_TYPE_32BIT
PX86PD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
#endif
RTGCPTR GCPtr = pCur->Core.Key;
#if PGM_GST_MODE != PGM_MODE_AMD64
/* skip all stuff above 4GB if not AMD64 mode. */
if (GCPtr >= _4GB)
return 0;
#endif
unsigned offPage = GCPtr & PAGE_OFFSET_MASK;
unsigned iPage = 0;
while (iPage < pCur->cPages)
{
#if PGM_GST_TYPE == PGM_TYPE_32BIT
X86PDE Pde = pPDSrc->a[GCPtr >> X86_PD_SHIFT];
#elif PGM_GST_TYPE == PGM_TYPE_PAE
X86PDEPAE Pde = pgmGstGetPaePDE(&pVCpu->pgm.s, GCPtr);
#elif PGM_GST_TYPE == PGM_TYPE_AMD64
X86PDEPAE Pde = pgmGstGetLongModePDE(&pVCpu->pgm.s, GCPtr);
#endif
if (Pde.n.u1Present)
{
if ( !Pde.b.u1Size
# if PGM_GST_TYPE != PGM_TYPE_AMD64
|| !(pState->cr4 & X86_CR4_PSE)
# endif
)
{
/*
* Normal page table.
*/
PGSTPT pPT;
int rc = PGM_GCPHYS_2_PTR(pVM, Pde.u & GST_PDE_PG_MASK, &pPT);
if (RT_SUCCESS(rc))
{
for (unsigned iPTE = (GCPtr >> GST_PT_SHIFT) & GST_PT_MASK;
iPTE < RT_ELEMENTS(pPT->a) && iPage < pCur->cPages;
iPTE++, iPage++, GCPtr += PAGE_SIZE, offPage = 0)
{
GSTPTE Pte = pPT->a[iPTE];
RTGCPHYS GCPhysNew;
if (Pte.n.u1Present)
GCPhysNew = (RTGCPHYS)(pPT->a[iPTE].u & GST_PTE_PG_MASK) + offPage;
else
GCPhysNew = NIL_RTGCPHYS;
if (pCur->aPhysToVirt[iPage].Core.Key != GCPhysNew)
{
if (pCur->aPhysToVirt[iPage].Core.Key != NIL_RTGCPHYS)
pgmHandlerVirtualClearPage(&pVM->pgm.s, pCur, iPage);
#ifdef VBOX_STRICT_PGM_HANDLER_VIRTUAL
AssertReleaseMsg(!pCur->aPhysToVirt[iPage].offNextAlias,
("{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32} GCPhysNew=%RGp\n",
pCur->aPhysToVirt[iPage].Core.Key, pCur->aPhysToVirt[iPage].Core.KeyLast,
pCur->aPhysToVirt[iPage].offVirtHandler, pCur->aPhysToVirt[iPage].offNextAlias, GCPhysNew));
#endif
pCur->aPhysToVirt[iPage].Core.Key = GCPhysNew;
pState->fTodo |= PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL;
}
}
}
else
{
/* not-present. */
offPage = 0;
AssertRC(rc);
for (unsigned iPTE = (GCPtr >> GST_PT_SHIFT) & GST_PT_MASK;
iPTE < RT_ELEMENTS(pPT->a) && iPage < pCur->cPages;
iPTE++, iPage++, GCPtr += PAGE_SIZE)
{
if (pCur->aPhysToVirt[iPage].Core.Key != NIL_RTGCPHYS)
{
pgmHandlerVirtualClearPage(&pVM->pgm.s, pCur, iPage);
#ifdef VBOX_STRICT_PGM_HANDLER_VIRTUAL
AssertReleaseMsg(!pCur->aPhysToVirt[iPage].offNextAlias,
("{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32}\n",
pCur->aPhysToVirt[iPage].Core.Key, pCur->aPhysToVirt[iPage].Core.KeyLast,
pCur->aPhysToVirt[iPage].offVirtHandler, pCur->aPhysToVirt[iPage].offNextAlias));
#endif
pCur->aPhysToVirt[iPage].Core.Key = NIL_RTGCPHYS;
pState->fTodo |= PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL;
}
}
}
}
else
{
/*
* 2/4MB page.
*/
RTGCPHYS GCPhys = (RTGCPHYS)(Pde.u & GST_PDE_PG_MASK);
for (unsigned i4KB = (GCPtr >> GST_PT_SHIFT) & GST_PT_MASK;
i4KB < PAGE_SIZE / sizeof(GSTPDE) && iPage < pCur->cPages;
i4KB++, iPage++, GCPtr += PAGE_SIZE, offPage = 0)
{
RTGCPHYS GCPhysNew = GCPhys + (i4KB << PAGE_SHIFT) + offPage;
if (pCur->aPhysToVirt[iPage].Core.Key != GCPhysNew)
{
if (pCur->aPhysToVirt[iPage].Core.Key != NIL_RTGCPHYS)
pgmHandlerVirtualClearPage(&pVM->pgm.s, pCur, iPage);
#ifdef VBOX_STRICT_PGM_HANDLER_VIRTUAL
AssertReleaseMsg(!pCur->aPhysToVirt[iPage].offNextAlias,
("{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32} GCPhysNew=%RGp\n",
pCur->aPhysToVirt[iPage].Core.Key, pCur->aPhysToVirt[iPage].Core.KeyLast,
pCur->aPhysToVirt[iPage].offVirtHandler, pCur->aPhysToVirt[iPage].offNextAlias, GCPhysNew));
#endif
pCur->aPhysToVirt[iPage].Core.Key = GCPhysNew;
pState->fTodo |= PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL;
}
}
} /* pde type */
}
else
{
/* not-present. */
for (unsigned cPages = (GST_PT_MASK + 1) - ((GCPtr >> GST_PT_SHIFT) & GST_PT_MASK);
cPages && iPage < pCur->cPages;
iPage++, GCPtr += PAGE_SIZE)
{
if (pCur->aPhysToVirt[iPage].Core.Key != NIL_RTGCPHYS)
{
pgmHandlerVirtualClearPage(&pVM->pgm.s, pCur, iPage);
pCur->aPhysToVirt[iPage].Core.Key = NIL_RTGCPHYS;
pState->fTodo |= PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL;
}
}
offPage = 0;
}
} /* for pages in virtual mapping. */
return 0;
}
#endif /* 32BIT, PAE and AMD64 */
/**
* Updates the virtual page access handlers.
*
* @returns true if bits were flushed.
* @returns false if bits weren't flushed.
* @param pVM VM handle.
* @param pPDSrc The page directory.
* @param cr4 The cr4 register value.
*/
PGM_GST_DECL(bool, HandlerVirtualUpdate)(PVM pVM, uint32_t cr4)
{
#if PGM_GST_TYPE == PGM_TYPE_32BIT \
|| PGM_GST_TYPE == PGM_TYPE_PAE \
|| PGM_GST_TYPE == PGM_TYPE_AMD64
/** @todo
* In theory this is not sufficient: the guest can change a single page in a range with invlpg
*/
/*
* Resolve any virtual address based access handlers to GC physical addresses.
* This should be fairly quick.
*/
RTUINT fTodo = 0;
pgmLock(pVM);
STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3HandlerVirtualUpdate), a);
for (unsigned i=0;i<pVM->cCPUs;i++)
{
PGMHVUSTATE State;
PVMCPU pVCpu = &pVM->aCpus[i];
State.pVM = pVM;
State.pVCpu = pVCpu;
State.fTodo = pVCpu->pgm.s.fSyncFlags;
State.cr4 = cr4;
RTAvlroGCPtrDoWithAll(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, true, PGM_GST_NAME(VirtHandlerUpdateOne), &State);
fTodo |= State.fTodo;
}
STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3HandlerVirtualUpdate), a);
/*
* Set / reset bits?
*/
if (fTodo & PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL)
{
STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3HandlerVirtualReset), b);
Log(("HandlerVirtualUpdate: resets bits\n"));
RTAvlroGCPtrDoWithAll(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, true, pgmHandlerVirtualResetOne, pVM);
for (unsigned i=0;i<pVM->cCPUs;i++)
{
PVMCPU pVCpu = &pVM->aCpus[i];
pVCpu->pgm.s.fSyncFlags &= ~PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL;
}
STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3HandlerVirtualReset), b);
}
pgmUnlock(pVM);
return !!(fTodo & PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL);
#else /* real / protected */
return false;
#endif
}