PGMR0.cpp revision 67b4b089c50d0ab8ad847dddd8c0e0812fdadc9e
/* $Id$ */
/** @file
* PGM - Page Manager and Monitor, Ring-0.
*/
/*
* Copyright (C) 2007-2011 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#define LOG_GROUP LOG_GROUP_PGM
#include "PGMInternal.h"
#include "PGMInline.h"
/*
*/
#include "PGMR0Bth.h"
#include "PGMR0Bth.h"
#include "PGMR0Bth.h"
#include "PGMR0Bth.h"
/**
* Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
*
* @returns The following VBox status codes.
* @retval VINF_SUCCESS on success. FF cleared.
* @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
*
* @param pVM Pointer to the VM.
* @param pVCpu Pointer to the VMCPU.
*
* @remarks Must be called from within the PGM critical section. The caller
* must clear the new pages.
*/
{
/*
* Check for error injection.
*/
return VERR_NO_MEMORY;
/*
* Try allocate a full set of handy pages.
*/
if (!cPages)
return VINF_SUCCESS;
int rc = GMMR0AllocateHandyPages(pVM, pVCpu->idCpu, cPages, cPages, &pVM->pgm.s.aHandyPages[iFirst]);
if (RT_SUCCESS(rc))
{
#ifdef VBOX_STRICT
{
}
#endif
}
else if (rc != VERR_GMM_SEED_ME)
{
if ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
|| rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
&& iFirst < PGM_HANDY_PAGES_MIN)
{
#ifdef VBOX_STRICT
/* We're ASSUMING that GMM has updated all the entires before failing us. */
uint32_t i;
{
}
#endif
/*
* Reduce the number of pages until we hit the minimum limit.
*/
do
{
cPages >>= 1;
} while ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
|| rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
if (RT_SUCCESS(rc))
{
#ifdef VBOX_STRICT
while (i-- > 0)
{
}
{
}
#endif
}
}
{
}
}
return rc;
}
/**
* Worker function for PGMR3PhysAllocateLargeHandyPage
*
* @returns The following VBox status codes.
* @retval VINF_SUCCESS on success.
* @retval VINF_EM_NO_MEMORY if we're out of memory.
*
* @param pVM Pointer to the VM.
* @param pVCpu Pointer to the VMCPU.
*
* @remarks Must be called from within the PGM critical section. The caller
* must clear the new pages.
*/
{
if (RT_SUCCESS(rc))
return rc;
}
#ifdef VBOX_WITH_PCI_PASSTHROUGH
/* Interface sketch. The interface belongs to a global PCI pass-through
manager. It shall use the global VM handle, not the user VM handle to
store the per-VM info (domain) since that is all ring-0 stuff, thus
passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
we can discuss the PciRaw code re-organtization when I'm back from
vacation.
I've implemented the initial IOMMU set up below. For things to work
reliably, we will probably need add a whole bunch of checks and
GPciRawR0GuestPageUpdate call to the PGM code. For the present,
assuming nested paging (enforced) and prealloc (enforced), no
ballooning (check missing), page sharing (check missing) or live
migration (check missing), it might work fine. At least if some
VM power-off hook is present and can tear down the IOMMU page tables. */
/**
* Tells the global PCI pass-through manager that we are about to set up the
* guest page to host page mappings for the specfied VM.
*
* @returns VBox status code.
*
* @param pGVM The ring-0 VM structure.
*/
{
return VINF_SUCCESS;
}
/**
* Assigns a host page mapping for a guest page.
*
* This is only used when setting up the mappings, i.e. between
* GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
*
* @returns VBox status code.
* @param pGVM The ring-0 VM structure.
* @param GCPhys The address of the guest page (page aligned).
* @param HCPhys The address of the host page (page aligned).
*/
{
/** @todo: what do we do on failure? */
return VINF_SUCCESS;
}
/**
* Indicates that the specified guest page doesn't exists but doesn't have host
* page mapping we trust PCI pass-through with.
*
* This is only used when setting up the mappings, i.e. between
* GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
*
* @returns VBox status code.
* @param pGVM The ring-0 VM structure.
* @param GCPhys The address of the guest page (page aligned).
* @param HCPhys The address of the host page (page aligned).
*/
{
/** @todo: what do we do on failure? */
return VINF_SUCCESS;
}
/**
* Tells the global PCI pass-through manager that we have completed setting up
* the guest page to host page mappings for the specfied VM.
*
* This complements GPciRawR0GuestPageBeginAssignments and will be called even
* if some page assignment failed.
*
* @returns VBox status code.
*
* @param pGVM The ring-0 VM structure.
*/
{
return VINF_SUCCESS;
}
/**
* Tells the global PCI pass-through manager that a guest page mapping has
* changed after the initial setup.
*
* @returns VBox status code.
* @param pGVM The ring-0 VM structure.
* @param GCPhys The address of the guest page (page aligned).
* @param HCPhys The new host page address or NIL_RTHCPHYS if
* now unassigned.
*/
{
return VINF_SUCCESS;
}
#endif /* VBOX_WITH_PCI_PASSTHROUGH */
/**
* Sets up the IOMMU when raw PCI device is enabled.
*
* @note This is a hack that will probably be remodelled and refined later!
*
* @returns VBox status code.
*
* @param pVM Pointer to the VM.
*/
{
if (RT_FAILURE(rc))
return rc;
#ifdef VBOX_WITH_PCI_PASSTHROUGH
{
/*
* The Simplistic Approach - Enumerate all the pages and call tell the
* IOMMU about each of them.
*/
if (RT_SUCCESS(rc))
{
{
while (cLeft-- > 0)
{
/* Only expose pages that are 100% safe for now. */
else
/* next */
pPage++;
}
}
}
}
else
#endif
return rc;
}
/**
* #PF Handler for nested paging.
*
* @returns VBox status code (appropriate for trap handling and GC return).
* @param pVM Pointer to the VM.
* @param pVCpu Pointer to the VMCPU.
* @param enmShwPagingMode Paging mode for the nested page tables.
* @param uErr The trap error code.
* @param pRegFrame Trap register frame.
* @param GCPhysFault The fault address.
*/
VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PVM pVM, PVMCPU pVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
{
int rc;
LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
/* AMD uses the host's paging mode; Intel has a single mode (EPT). */
AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
|| enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
("enmShwPagingMode=%d\n", enmShwPagingMode));
/* Reserved shouldn't end up here. */
#ifdef VBOX_WITH_STATISTICS
/*
* Error code stats.
*/
if (uErr & X86_TRAP_PF_US)
{
if (!(uErr & X86_TRAP_PF_P))
{
if (uErr & X86_TRAP_PF_RW)
else
}
else if (uErr & X86_TRAP_PF_RW)
else if (uErr & X86_TRAP_PF_RSVD)
else if (uErr & X86_TRAP_PF_ID)
else
}
else
{ /* Supervisor */
if (!(uErr & X86_TRAP_PF_P))
{
if (uErr & X86_TRAP_PF_RW)
else
}
else if (uErr & X86_TRAP_PF_RW)
else if (uErr & X86_TRAP_PF_ID)
else if (uErr & X86_TRAP_PF_RSVD)
}
#endif
/*
* Call the worker.
*
* Note! We pretend the guest is in protected mode without paging, so we
* can use existing code to build the nested page tables.
*/
bool fLockTaken = false;
switch(enmShwPagingMode)
{
case PGMMODE_32_BIT:
break;
case PGMMODE_PAE:
case PGMMODE_PAE_NX:
break;
case PGMMODE_AMD64:
case PGMMODE_AMD64_NX:
break;
case PGMMODE_EPT:
break;
default:
AssertFailed();
break;
}
if (fLockTaken)
{
}
if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
rc = VINF_SUCCESS;
/* Note: hack alert for difficult to reproduce problem. */
{
Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
/* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
single VCPU VMs though. */
rc = VINF_SUCCESS;
}
pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Misc; });
STAM_PROFILE_STOP_EX(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0e, pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution), a);
return rc;
}
/**
* #PF Handler for deliberate nested paging misconfiguration (/reserved bit)
* employed for MMIO pages.
*
* @returns VBox status code (appropriate for trap handling and GC return).
* @param pVM Pointer to the VM.
* @param pVCpu Pointer to the VMCPU.
* @param enmShwPagingMode Paging mode for the nested page tables.
* @param pRegFrame Trap register frame.
* @param GCPhysFault The fault address.
* @param uErr The error code, UINT32_MAX if not available
* (VT-x).
*/
VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PVM pVM, PVMCPU pVCpu, PGMMODE enmShwPagingMode,
{
#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
/*
* Try lookup the all access physical handler for the address.
*/
{
/*
* If the handle has aliases page or pages that have been temporarily
* disabled, we'll have to take a detour to make sure we resync them
* to avoid lots of unnecessary exits.
*/
if ( ( pHandler->cAliasedPages
|| pHandler->cTmpOffPages)
)
{
Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
}
else
{
{
Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pfnHandler, uErr, GCPhysFault, pvUser));
rc = pfnHandler(pVM, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame, GCPhysFault, GCPhysFault, pvUser);
#ifdef VBOX_WITH_STATISTICS
if (pHandler)
#endif
}
else
{
}
}
}
else
{
/*
* Must be out of sync, so do a SyncPage and restart the instruction.
*
* ASSUMES that ALL handlers are page aligned and covers whole pages
* (assumption asserted in PGMHandlerPhysicalRegisterEx).
*/
}
return rc;
#else
return VERR_PGM_NOT_USED_IN_MODE;
#endif
}