HWVMXR0.cpp revision a09945cf2df03bb8e033258b60c786b0009ca30c
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * HWACCM VMX - Host Context Ring 0.
c7814cf6e1240a519cbec0441e033d0e2470ed00vboxsync * Copyright (C) 2006-2007 Sun Microsystems, Inc.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * This file is part of VirtualBox Open Source Edition (OSE), as
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * available from http://www.virtualbox.org. This file is free software;
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * you can redistribute it and/or modify it under the terms of the GNU
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * General Public License (GPL) as published by the Free Software
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * Foundation, in version 2 as it comes in the "COPYING" file of the
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * Clara, CA 95054 USA or visit http://www.sun.com if you need
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * additional information or have any questions.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync/*******************************************************************************
cc66247640b520463f925a5533fc9e5de06aa982vboxsync* Header Files *
cc66247640b520463f925a5533fc9e5de06aa982vboxsync*******************************************************************************/
cc66247640b520463f925a5533fc9e5de06aa982vboxsync/*******************************************************************************
cc66247640b520463f925a5533fc9e5de06aa982vboxsync* Defined Constants And Macros *
cc66247640b520463f925a5533fc9e5de06aa982vboxsync*******************************************************************************/
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync# define VMX_IS_64BIT_HOST_MODE() (true)
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync# define VMX_IS_64BIT_HOST_MODE() (false)
cc66247640b520463f925a5533fc9e5de06aa982vboxsync/*******************************************************************************
cc66247640b520463f925a5533fc9e5de06aa982vboxsync* Global Variables *
cc66247640b520463f925a5533fc9e5de06aa982vboxsync*******************************************************************************/
cc66247640b520463f925a5533fc9e5de06aa982vboxsync/* IO operation lookup arrays. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsyncstatic uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
f0ed7ab5e7f8d2f73b5aa08e46eb3a04cbb31cb2vboxsync/** See HWACCMR0A.asm. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync/*******************************************************************************
cc66247640b520463f925a5533fc9e5de06aa982vboxsync* Local Functions *
cc66247640b520463f925a5533fc9e5de06aa982vboxsync*******************************************************************************/
cc66247640b520463f925a5533fc9e5de06aa982vboxsyncstatic void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx);
cc66247640b520463f925a5533fc9e5de06aa982vboxsyncstatic void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu);
cc66247640b520463f925a5533fc9e5de06aa982vboxsyncstatic void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu);
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsyncstatic void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu);
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsyncstatic void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys);
cc66247640b520463f925a5533fc9e5de06aa982vboxsyncstatic void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr);
cc66247640b520463f925a5533fc9e5de06aa982vboxsyncstatic void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
cc66247640b520463f925a5533fc9e5de06aa982vboxsyncstatic void VMXR0CheckError(PVM pVM, PVMCPU pVCpu, int rc)
cc66247640b520463f925a5533fc9e5de06aa982vboxsync VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * Sets up and activates VT-x on the current CPU
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @returns VBox status code.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @param pCpu CPU info struct
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @param pVM The VM to operate on. (can be NULL after a resume!!)
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @param pvPageCpu Pointer to the global cpu page
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @param pPageCpuPhys Physical address of the global cpu page
cc66247640b520463f925a5533fc9e5de06aa982vboxsyncVMMR0DECL(int) VMXR0EnableCpu(PHWACCM_CPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync SUPR0Printf("VMXR0EnableCpu cpu %d page (%x) %x\n", pCpu->idCpu, pvPageCpu, (uint32_t)pPageCpuPhys);
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync /* Set revision dword at the beginning of the VMXON structure. */
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync *(uint32_t *)pvPageCpu = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * (which can have very bad consequences!!!)
f0ed7ab5e7f8d2f73b5aa08e46eb3a04cbb31cb2vboxsync /* Make sure the VMX instructions don't cause #UD faults. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Enter VMX Root Mode */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * Deactivates VT-x on the current CPU
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @returns VBox status code.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @param pCpu CPU info struct
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @param pvPageCpu Pointer to the global cpu page
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @param pPageCpuPhys Physical address of the global cpu page
cc66247640b520463f925a5533fc9e5de06aa982vboxsyncVMMR0DECL(int) VMXR0DisableCpu(PHWACCM_CPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
cc66247640b520463f925a5533fc9e5de06aa982vboxsync AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Leave VMX Root Mode. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* And clear the X86_CR4_VMXE bit */
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync SUPR0Printf("VMXR0DisableCpu cpu %d\n", pCpu->idCpu);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * Does Ring-0 per VM VT-x init.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @returns VBox status code.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @param pVM The VM to operate on.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Allocate one page for the virtual APIC mmio cache. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Allocate the MSR bitmap if this feature is supported. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
cc66247640b520463f925a5533fc9e5de06aa982vboxsync rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjMSRBitmap, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync pVM->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjMSRBitmap);
f0ed7ab5e7f8d2f73b5aa08e46eb3a04cbb31cb2vboxsync pVM->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync memset(pVM->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Allocate VMCBs for all guest CPUs. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Allocate one page for the VM control structure (VMCS). */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjVMCS, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync pVCpu->hwaccm.s.vmx.pVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjVMCS);
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync pVCpu->hwaccm.s.vmx.pVMCSPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjVMCS, 0);
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync ASMMemZero32(pVCpu->hwaccm.s.vmx.pVMCS, PAGE_SIZE);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Current guest paging mode. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync pVCpu->hwaccm.s.vmx.enmCurrGuestMode = PGMMODE_REAL;
cc66247640b520463f925a5533fc9e5de06aa982vboxsync SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.pVMCSPhys);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * Does Ring-0 per VM VT-x termination.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @returns VBox status code.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @param pVM The VM to operate on.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync if (pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS != NIL_RTR0MEMOBJ)
cc66247640b520463f925a5533fc9e5de06aa982vboxsync RTR0MemObjFree(pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS, false);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
f0ed7ab5e7f8d2f73b5aa08e46eb3a04cbb31cb2vboxsync if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
cc66247640b520463f925a5533fc9e5de06aa982vboxsync RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync if (pVM->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjMSRBitmap, false);
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync pVM->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * Sets up VT-x for the specified VM
f0ed7ab5e7f8d2f73b5aa08e46eb3a04cbb31cb2vboxsync * @returns VBox status code.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * @param pVM The VM to operate on.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Set revision dword at the beginning of the VMCS structure. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync *(uint32_t *)pVCpu->hwaccm.s.vmx.pVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Clear VM Control Structure. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync Log(("pVMCSPhys = %RHp\n", pVCpu->hwaccm.s.vmx.pVMCSPhys));
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync /* Activate the VM Control Structure. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
c9968c45b31bd2bf5bb6feb365f86ffba15241d7vboxsync * Set required bits to one and zero according to the MSR capabilities.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* External and non-maskable interrupts cause VM-exits. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync val = val | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT;
cc66247640b520463f925a5533fc9e5de06aa982vboxsync val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
cc66247640b520463f925a5533fc9e5de06aa982vboxsync rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
cc66247640b520463f925a5533fc9e5de06aa982vboxsync * Set required bits to one and zero according to the MSR capabilities.
cc66247640b520463f925a5533fc9e5de06aa982vboxsync val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Program which event cause VM-exits and which features we want to use. */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync val = val | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
cc66247640b520463f925a5533fc9e5de06aa982vboxsync | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
cc66247640b520463f925a5533fc9e5de06aa982vboxsync /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
aa4bcf0a4b2db3ac352b56a291d49cb8d4b66d32vboxsync val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
/* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch failure with an invalid control fields error. (combined with some other exit reasons) */
if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
/* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
#ifdef VBOX_WITH_VTX_MSR_BITMAPS
if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
#ifdef HWACCM_VTX_WITH_EPT
#ifdef HWACCM_VTX_WITH_VPID
/* Save debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
if (VMX_IS_64BIT_HOST_MODE())
if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
/* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
#ifdef HWACCM_VTX_WITH_VPID
return rc;
static int VMXR0InjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
int rc;
#ifdef VBOX_STRICT
LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%08x intInfo=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode));
#ifdef HWACCM_VMX_EMULATE_REALMODE
return VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0 /* no error code according to the Intel docs */);
return VINF_EM_RESET;
return VINF_SUCCESS;
rc = VMXWriteCachedVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
return rc;
int rc;
Log(("Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
return VINF_SUCCESS;
return VINF_SUCCESS;
Log(("Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", u8Interrupt, u8Interrupt, rc, pCtx->cs, (RTGCPTR)pCtx->rip));
#ifdef VBOX_STRICT
int rc;
switch (u8Vector) {
return VINF_SUCCESS;
if (VMX_IS_64BIT_HOST_MODE())
if (VMX_IS_64BIT_HOST_MODE())
/* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
if (!VMX_IS_64BIT_HOST_MODE())
if (VMX_IS_64BIT_HOST_MODE())
return VERR_VMX_INVALID_HOST_STATE;
if (VMX_IS_64BIT_HOST_MODE())
if (VMX_IS_64BIT_HOST_MODE())
if (VMX_IS_64BIT_HOST_MODE())
return rc;
#ifndef DEBUG
/* Also catch floating point exceptions as we need to report them to the guest in a different way. */
#ifdef DEBUG
#ifdef VBOX_STRICT
# ifdef HWACCM_VMX_EMULATE_REALMODE
/* Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise). */
#ifdef HWACCM_VMX_EMULATE_REALMODE
/* The base values in the hidden fs & gs registers are not in sync with the msrs; they are cut to 32 bits. */
#ifdef HWACCM_VMX_EMULATE_REALMODE
/* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
attr.u = 0;
val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
/* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
/* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
| X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
| X86_CR0_MP;
#ifdef VBOX_ENABLE_64_BITS_GUESTS
AssertFailed();
AssertFailed();
/* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
#ifdef HWACCM_VMX_EMULATE_REALMODE
/* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
val = 0
#ifdef HWACCM_VMX_EMULATE_REALMODE
| X86_CR4_VMXE;
AssertMsg(PGMGetEPTCR3(pVM) == PGMGetHyperCR3(pVM), ("%RHp vs %RHp\n", PGMGetEPTCR3(pVM), PGMGetHyperCR3(pVM)));
/* We use our identity mapping page table here as we need to map guest virtual to guest physical addresses; EPT will
#ifdef HWACCM_VMX_EMULATE_REALMODE
/* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
/* Load guest debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
return rc;
int rc;
if (uInterruptState != 0)
/* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
#ifdef HWACCM_VMX_EMULATE_REALMODE
/* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
return VINF_SUCCESS;
/* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
/* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
#ifdef VBOX_WITH_STATISTICS
#ifdef HWACCM_VTX_WITH_VPID
/* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
/* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
/* Make sure we flush the TLB when required. Switch ASID to achieve the same thing, but without actually flushing the whole TLB (which is expensive). */
AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
#ifdef VBOX_WITH_STATISTICS
bool fSyncTPR = false;
unsigned cResume = 0;
#ifdef VBOX_STRICT
#ifdef VBOX_WITH_STATISTICS
bool fStatEntryStarted = true;
bool fStatExit2Started = false;
#ifdef VBOX_STRICT
if ((val & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
if ((val & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
if ((val & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
if ((val & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
if (fStatExit2Started) { STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = false; }
if (!fStatEntryStarted) { STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = true; }
goto end;
Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVM)));
* Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
* force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
* break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
goto end;
/* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
goto end;
goto end;
/* Note the 32 bits exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but that appears missing in Intel CPUs */
bool fPending;
* -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
fSyncTPR = true;
# ifdef HWACCM_VTX_WITH_VPID
Log(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
Log(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
#ifdef VBOX_STRICT
#ifdef LOG_LOGGING
goto end;
goto end;
#ifdef VBOX_STRICT
rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n", pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
* IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
goto end;
#ifdef LOG_LOGGING
&& VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
/* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
&& VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%08x pending error=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%08x\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
#ifdef VBOX_STRICT
/* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
&& VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%08x\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
if (fSyncTPR)
switch (exitReason)
switch (vector)
case X86_XCPT_NM:
/* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
goto ResumeExecution;
rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0);
goto ResumeExecution;
#ifdef DEBUG
Log(("Guest page fault at %RGv cr2=%RGv error code %x rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp));
rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
goto ResumeExecution;
Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
goto ResumeExecution;
rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
goto ResumeExecution;
#ifdef VBOX_STRICT
rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
goto ResumeExecution;
Log(("Trap %x (debug) at %RGv exit qualification %RX64\n", vector, (RTGCPTR)pCtx->rip, exitQualification));
rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
goto ResumeExecution;
#ifdef VBOX_STRICT
rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
goto ResumeExecution;
/* lidt, lgdt can end up here. In the future crx changes as well. Just reload the whole context to be done with it. */
goto ResumeExecution;
AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", rc));
#ifdef VBOX_STRICT
switch(vector)
case X86_XCPT_DE:
case X86_XCPT_UD:
case X86_XCPT_SS:
case X86_XCPT_NP:
rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
goto ResumeExecution;
#ifdef HWACCM_VMX_EMULATE_REALMODE
rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
goto ResumeExecution;
case VMX_EXIT_EPT_VIOLATION: /* 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed by the configuration of the EPT paging structures. */
errCode = 0;
Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
goto ResumeExecution;
#ifdef VBOX_STRICT
case VMX_EXIT_EPT_MISCONFIG:
LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip, VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
goto ResumeExecution;
goto ResumeExecution;
goto ResumeExecution;
goto ResumeExecution;
AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, rc));
/* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
goto ResumeExecution;
AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", rc));
Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
AssertFailed();
rc = PGMSyncCR3(pVM, CPUMGetGuestCR0(pVM), CPUMGetGuestCR3(pVM), CPUMGetGuestCR4(pVM), VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx) || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != USE_REG_CR3);
Assert(VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
rc = EMInterpretLMSW(pVM, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
goto ResumeExecution;
#ifdef VBOX_WITH_STATISTICS
if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
goto ResumeExecution;
/** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first time and restore drx registers afterwards */
if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
Log2(("VMX: mov drx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
goto ResumeExecution;
/* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
if (fIOWrite)
if (fIOWrite)
* Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0);
goto ResumeExecution;
goto ResumeExecution;
#ifdef VBOX_STRICT
AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", rc));
goto ResumeExecution;
switch (exitReason)
case VMX_EXIT_EPT_VIOLATION:
goto ResumeExecution;
/* Note: If we decide to emulate them here, then we must sync the MSRs that could have been changed (sysenter, fs/gs base)!!! */
#ifdef VBOX_STRICT
end:
CPUMSetChangedFlags(pVM, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
/* If we executed vmlaunch/vmresume and an external irq was pending, then we don't have to do a full sync the next time. */
return rc;
return VERR_VMX_X86_CR4_VMXE_CLEARED;
return rc;
return VINF_SUCCESS;
/* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
return VINF_SUCCESS;
#ifdef HWACCM_VTX_WITH_VPID
/* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invvpid probably takes only 32 bits addresses. (@todo) */
#ifdef HWACCM_VTX_WITH_VPID
if ( !fFlushPending
return VINF_SUCCESS;
if (!fFlushPending)
return VINF_SUCCESS;
switch (rc)
AssertFailed();
int rc;
Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason, (uint32_t)instrError));
#ifdef VBOX_STRICT
if (VMX_IS_64BIT_HOST_MODE())
DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.pVMCSPhys >> 32); /* Param 2: VMCS physical address - Hi. */
VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, uint32_t *paParam)
/* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
VMXDisable();
if (pVM)
return VERR_VMX_VMXON_FAILED;
return rc;
#ifdef VMX_USE_CACHED_VMCS_ACCESSES