HMSVMR0.cpp revision 9f6988748efd4a82de06fee39ddf2e03a9ed66c2
/* $Id$ */
/** @file
* HM SVM (AMD-V) - Host Context Ring-0.
*/
/*
* Copyright (C) 2013 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* you can redistribute it and/or modify it under the terms of the GNU
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#ifdef DEBUG_ramshankar
# define HMSVM_ALWAYS_TRAP_ALL_XCPTS
# define HMSVM_ALWAYS_TRAP_PF
#endif
/*******************************************************************************
* Defined Constants And Macros *
*******************************************************************************/
/**
* MSR-bitmap read permissions.
*/
typedef enum SVMMSREXITREAD
{
/** Reading this MSR causes a VM-exit. */
SVMMSREXIT_INTERCEPT_READ = 0xb,
/** Reading this MSR does not cause a VM-exit. */
SVMMSREXIT_PASSTHRU_READ
} VMXMSREXITREAD;
/**
* MSR-bitmap write permissions.
*/
typedef enum SVMMSREXITWRITE
{
/** Writing to this MSR causes a VM-exit. */
SVMMSREXIT_INTERCEPT_WRITE = 0xd,
/** Writing to this MSR does not cause a VM-exit. */
SVMMSREXIT_PASSTHRU_WRITE
} VMXMSREXITWRITE;
/*******************************************************************************
* Internal Functions *
*******************************************************************************/
static void hmR0SvmSetMSRPermission(PVMCPU pVCpu, unsigned uMsr, SVMMSREXITREAD enmRead, SVMMSREXITWRITE enmWrite);
/*******************************************************************************
* Global Variables *
*******************************************************************************/
/** Ring-0 memory object for the IO bitmap. */
RTR0MEMOBJ g_hMemObjIOBitmap = NIL_RTR0MEMOBJ;
/** Physical address of the IO bitmap. */
RTHCPHYS g_HCPhysIOBitmap = 0;
/** Virtual address of the IO bitmap. */
R0PTRTYPE(void *) g_pvIOBitmap = NULL;
/**
* Sets up and activates AMD-V on the current CPU.
*
* @returns VBox status code.
* @param pCpu Pointer to the CPU info struct.
* @param pVM Pointer to the VM (can be NULL after a resume!).
* @param pvCpuPage Pointer to the global CPU page.
* @param HCPhysCpuPage Physical address of the global CPU page.
*/
VMMR0DECL(int) SVMR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost)
{
AssertReturn(!fEnabledByHost, VERR_INVALID_PARAMETER);
AssertReturn( HCPhysCpuPage
&& HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
/*
* We must turn on AMD-V and setup the host state physical address, as those MSRs are per CPU.
*/
uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER);
if (u64HostEfer & MSR_K6_EFER_SVME)
{
/* If the VBOX_HWVIRTEX_IGNORE_SVM_IN_USE is active, then we blindly use AMD-V. */
if ( pVM
&& pVM->hm.s.svm.fIgnoreInUseError)
{
pCpu->fIgnoreAMDVInUseError = true;
}
if (!pCpu->fIgnoreAMDVInUseError)
return VERR_SVM_IN_USE;
}
/* Turn on AMD-V in the EFER MSR. */
ASMWrMsr(MSR_K6_EFER, u64HostEfer | MSR_K6_EFER_SVME);
/* Write the physical page address where the CPU will store the host state while executing the VM. */
ASMWrMsr(MSR_K8_VM_HSAVE_PA, HCPhysCpuPage);
/*
* Theoretically, other hypervisors may have used ASIDs, ideally we should flush all non-zero ASIDs
* when enabling SVM. AMD doesn't have an SVM instruction to flush all ASIDs (flushing is done
* upon VMRUN). Therefore, just set the fFlushAsidBeforeUse flag which instructs hmR0SvmSetupTLB()
* to flush the TLB with before using a new ASID.
*/
pCpu->fFlushAsidBeforeUse = true;
/*
* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}.
*/
++pCpu->cTlbFlushes;
return VINF_SUCCESS;
}
/**
* Deactivates AMD-V on the current CPU.
*
* @returns VBox status code.
* @param pCpu Pointer to the CPU info struct.
* @param pvCpuPage Pointer to the global CPU page.
* @param HCPhysCpuPage Physical address of the global CPU page.
*/
VMMR0DECL(int) SVMR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
{
AssertReturn( HCPhysCpuPage
&& HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
NOREF(pCpu);
/* Turn off AMD-V in the EFER MSR if AMD-V is active. */
uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER);
if (u64HostEfer & MSR_K6_EFER_SVME)
{
ASMWrMsr(MSR_K6_EFER, u64HostEfer & ~MSR_K6_EFER_SVME);
/* Invalidate host state physical address. */
ASMWrMsr(MSR_K8_VM_HSAVE_PA, 0);
}
return VINF_SUCCESS;
}
/**
* Does global AMD-V initialization (called during module initialization).
*
* @returns VBox status code.
*/
VMMR0DECL(int) SVMR0GlobalInit(void)
{
/*
* Allocate 12 KB for the IO bitmap. Since this is non-optional and we always intercept all IO accesses, it's done
* once globally here instead of per-VM.
*/
int rc = RTR0MemObjAllocCont(&g_hMemObjIOBitmap, 3 << PAGE_SHIFT, false /* fExecutable */);
if (RT_FAILURE(rc))
return rc;
g_pvIOBitmap = RTR0MemObjAddress(g_hMemObjIOBitmap);
g_HCPhysIOBitmap = RTR0MemObjGetPagePhysAddr(g_hMemObjIOBitmap, 0 /* iPage */);
/* Set all bits to intercept all IO accesses. */
ASMMemFill32(pVM->hm.s.svm.pvIOBitmap, 3 << PAGE_SHIFT, UINT32_C(0xffffffff));
}
/**
* Does global VT-x termination (called during module termination).
*/
VMMR0DECL(void) SVMR0GlobalTerm(void)
{
if (g_hMemObjIOBitmap != NIL_RTR0MEMOBJ)
{
RTR0MemObjFree(pVM->hm.s.svm.hMemObjIOBitmap, false /* fFreeMappings */);
g_pvIOBitmap = NULL;
g_HCPhysIOBitmap = 0;
g_hMemObjIOBitmap = NIL_RTR0MEMOBJ;
}
}
/**
* Frees any allocated per-VCPU structures for a VM.
*
* @param pVM Pointer to the VM.
*/
DECLINLINE(void) hmR0SvmFreeStructs(PVM pVM)
{
for (uint32_t i = 0; i < pVM->cCpus; i++)
{
PVMCPU pVCpu = &pVM->aCpus[i];
if (pVCpu->hm.s.svm.hMemObjVmcbHost != NIL_RTR0MEMOBJ)
{
RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcbHost, false);
pVCpu->hm.s.svm.pvVmcbHost = 0;
pVCpu->hm.s.svm.HCPhysVmcbHost = 0;
pVCpu->hm.s.svm.hMemObjVmcbHost = NIL_RTR0MEMOBJ;
}
if (pVCpu->hm.s.svm.hMemObjVmcb != NIL_RTR0MEMOBJ)
{
RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcb, false);
pVCpu->hm.s.svm.pvVmcb = 0;
pVCpu->hm.s.svm.HCPhysVmcb = 0;
pVCpu->hm.s.svm.hMemObjVmcb = NIL_RTR0MEMOBJ;
}
if (pVCpu->hm.s.svm.hMemObjMsrBitmap != NIL_RTR0MEMOBJ)
{
RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjMsrBitmap, false);
pVCpu->hm.s.svm.pvMsrBitmap = 0;
pVCpu->hm.s.svm.HCPhysMsrBitmap = 0;
pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
}
}
}
/**
* Does per-VM AMD-V initialization.
*
* @returns VBox status code.
* @param pVM Pointer to the VM.
*/
VMMR0DECL(int) SVMR0InitVM(PVM pVM)
{
int rc = VERR_INTERNAL_ERROR_5;
/* Check for an AMD CPU erratum which requires us to flush the TLB before every world-switch. */
uint32_t u32Family;
uint32_t u32Model;
uint32_t u32Stepping;
if (HMAmdIsSubjectToErratum170(&u32Family, &u32Model, &u32Stepping))
{
Log(("SVMR0InitVM: AMD cpu with erratum 170 family %#x model %#x stepping %#x\n", u32Family, u32Model, u32Stepping));
pVM->hm.s.svm.fAlwaysFlushTLB = true;
}
/* Initialize the memory objects up-front so we can cleanup on allocation failures properly. */
for (uint32_t i = 0; i < pVM->cCpus; i++)
{
PVMCPU pVCpu = &pVM->aCpus[i];
pVCpu->hm.s.svm.hMemObjVmcbHost = NIL_RTR0MEMOBJ;
pVCpu->hm.s.svm.hMemObjVmcb = NIL_RTR0MEMOBJ;
pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
}
/* Allocate a VMCB for each VCPU. */
for (uint32_t i = 0; i < pVM->cCpus; i++)
{
/* Allocate one page for the host context */
rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVmcbHost, 1 << PAGE_SHIFT, false /* fExecutable */);
if (RT_FAILURE(rc))
goto failure_cleanup;
pVCpu->hm.s.svm.pvVmcbHost = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcbHost);
pVCpu->hm.s.svm.HCPhysVmcbHost = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcbHost, 0 /* iPage */);
Assert(pVCpu->hm.s.svm.HCPhysVmcbHost < _4G);
ASMMemZeroPage(pVCpu->hm.s.svm.pvVmcbHost);
/* Allocate one page for the VM control block (VMCB). */
rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVmcb, 1 << PAGE_SHIFT, false /* fExecutable */);
if (RT_FAILURE(rc))
goto failure_cleanup;
pVCpu->hm.s.svm.pvVmcb = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcb);
pVCpu->hm.s.svm.HCPhysVmcb = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcb, 0 /* iPage */);
Assert(pVCpu->hm.s.svm.HCPhysVmcb < _4G);
ASMMemZeroPage(pVCpu->hm.s.svm.pvVmcb);
/* Allocate 8 KB for the MSR bitmap (doesn't seem to be a way to convince SVM not to use it) */
rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjMsrBitmap, 2 << PAGE_SHIFT, false /* fExecutable */);
if (RT_FAILURE(rc))
failure_cleanup;
pVCpu->hm.s.svm.pvMsrBitmap = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjMsrBitmap);
pVCpu->hm.s.svm.HCPhysMsrBitmap = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjMsrBitmap, 0 /* iPage */);
/* Set all bits to intercept all MSR accesses. */
ASMMemFill32(pVCpu->hm.s.svm.pvMsrBitmap, 2 << PAGE_SHIFT, 0xffffffff);
}
return VINF_SUCCESS;
failure_cleanup:
hmR0SvmFreeVMStructs(pVM);
return rc;
}
/**
* Does per-VM AMD-V termination.
*
* @returns VBox status code.
* @param pVM Pointer to the VM.
*/
VMMR0DECL(int) SVMR0TermVM(PVM pVM)
{
hmR0SvmFreeVMStructs(pVM);
return VINF_SUCCESS;
}
/**
* Sets up AMD-V for the specified VM.
* This function is only called once per-VM during initalization.
*
* @returns VBox status code.
* @param pVM Pointer to the VM.
*/
VMMR0DECL(int) SVMR0SetupVM(PVM pVM)
{
int rc = VINF_SUCCESS;
AssertReturn(pVM, VERR_INVALID_PARAMETER);
Assert(pVM->hm.s.svm.fSupported);
for (VMCPUID i = 0; i < pVM->cCpus; i++)
{
PVMCPU pVCpu = &pVM->aCpus[i];
PSVMVMCB pVmcb = (PSVMVMCB)pVM->aCpus[i].hm.s.svm.pvVmcb;
AssertMsgReturn(pVmcb, ("Invalid pVmcb\n"), VERR_SVM_INVALID_PVMCB);
/* Trap exceptions unconditionally (debug purposes). */
#ifdef HMSVM_ALWAYS_TRAP_PF
pVmcb->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF);
#endif
#ifdef HMSVM_ALWAYS_TRAP_ALL_XCPTS
pVmcb->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_BP)
| RT_BIT(X86_XCPT_DB)
| RT_BIT(X86_XCPT_DE)
| RT_BIT(X86_XCPT_NM)
| RT_BIT(X86_XCPT_UD)
| RT_BIT(X86_XCPT_NP)
| RT_BIT(X86_XCPT_SS)
| RT_BIT(X86_XCPT_GP)
| RT_BIT(X86_XCPT_PF)
| RT_BIT(X86_XCPT_MF);
#endif
/* Set up unconditional intercepts and conditions. */
pVmcb->ctrl.u32InterceptCtrl1 = SVM_CTRL1_INTERCEPT_INTR /* External interrupt causes a VM-exit. */
| SVM_CTRL1_INTERCEPT_VINTR /* When guest enabled interrupts cause a VM-exit. */
| SVM_CTRL1_INTERCEPT_NMI /* Non-Maskable Interrupts causes a VM-exit. */
| SVM_CTRL1_INTERCEPT_SMI /* System Management Interrupt cause a VM-exit. */
| SVM_CTRL1_INTERCEPT_INIT /* INIT signal causes a VM-exit. */
| SVM_CTRL1_INTERCEPT_RDPMC /* RDPMC causes a VM-exit. */
| SVM_CTRL1_INTERCEPT_CPUID /* CPUID causes a VM-exit. */
| SVM_CTRL1_INTERCEPT_RSM /* RSM causes a VM-exit. */
| SVM_CTRL1_INTERCEPT_HLT /* HLT causes a VM-exit. */
| SVM_CTRL1_INTERCEPT_INOUT_BITMAP /* Use the IOPM to cause IOIO VM-exits. */
| SVM_CTRL1_INTERCEPT_MSR_SHADOW /* MSR access not covered by MSRPM causes a VM-exit.*/
| SVM_CTRL1_INTERCEPT_INVLPGA /* INVLPGA causes a VM-exit. */
| SVM_CTRL1_INTERCEPT_SHUTDOWN /* Shutdown events causes a VM-exit. */
| SVM_CTRL1_INTERCEPT_FERR_FREEZE; /* Intercept "freezing" during legacy FPU handling. */
pVmcb->ctrl.u32InterceptCtrl2 = SVM_CTRL2_INTERCEPT_VMRUN /* VMRUN causes a VM-exit. */
| SVM_CTRL2_INTERCEPT_VMMCALL /* VMMCALL causes a VM-exit. */
| SVM_CTRL2_INTERCEPT_VMLOAD /* VMLOAD causes a VM-exit. */
| SVM_CTRL2_INTERCEPT_VMSAVE /* VMSAVE causes a VM-exit. */
| SVM_CTRL2_INTERCEPT_STGI /* STGI causes a VM-exit. */
| SVM_CTRL2_INTERCEPT_CLGI /* CLGI causes a VM-exit. */
| SVM_CTRL2_INTERCEPT_SKINIT /* SKINIT causes a VM-exit. */
| SVM_CTRL2_INTERCEPT_WBINVD /* WBINVD causes a VM-exit. */
| SVM_CTRL2_INTERCEPT_MONITOR /* MONITOR causes a VM-exit. */
| SVM_CTRL2_INTERCEPT_MWAIT_UNCOND; /* MWAIT causes a VM-exit. */
/* CR0, CR4 reads must be intercepted, our shadow values are not necessarily the same as the guest's. */
pVmcb->ctrl.u16InterceptRdCRx = RT_BIT(0) | RT_BIT(4);
/* CR0, CR4 writes must be intercepted for obvious reasons. */
pVmcb->ctrl.u16InterceptWrCRx = RT_BIT(0) | RT_BIT(4);
/* Intercept all DRx reads and writes by default. Changed later on. */
pVmcb->ctrl.u16InterceptRdDRx = 0xffff;
pVmcb->ctrl.u16InterceptWrDRx = 0xffff;
/* Virtualize masking of INTR interrupts. (reads/writes from/to CR8 go to the V_TPR register) */
pVmcb->ctrl.IntCtrl.n.u1VIrqMasking = 1;
/* Ignore the priority in the TPR; just deliver it to the guest when we tell it to. */
pVmcb->ctrl.IntCtrl.n.u1IgnoreTPR = 1;
/* Set IO and MSR bitmap permission bitmap physical addresses. */
pVmcb->ctrl.u64IOPMPhysAddr = g_HCPhysIOBitmap;
pVmcb->ctrl.u64MSRPMPhysAddr = pVCpu->hm.s.svm.HCPhysMsrBitmap;
/* No LBR virtualization. */
pVmcb->ctrl.u64LBRVirt = 0;
/* The ASID must start at 1; the host uses 0. */
pVmcb->ctrl.TLBCtrl.n.u32ASID = 1;
/*
* Setup the PAT MSR (applicable for Nested Paging only).
* The default value should be 0x0007040600070406ULL, but we want to treat all guest memory as WB,
* so choose type 6 for all PAT slots.
*/
pVmcb->guest.u64GPAT = UINT64_C(0x0006060606060606);
/* Without Nested Paging, we need additionally intercepts. */
if (!pVM->hm.s.fNestedPaging)
{
/* CR3 reads/writes must be intercepted; our shadow values differ from the guest values. */
pVmcb->ctrl.u16InterceptRdCRx |= RT_BIT(3);
pVmcb->ctrl.u16InterceptWrCRx |= RT_BIT(3);
/* Intercept INVLPG and task switches (may change CR3, EFLAGS, LDT). */
pVmcb->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_INVLPG
| SVM_CTRL1_INTERCEPT_TASK_SWITCH;
/* Page faults must be intercepted to implement shadow paging. */
pVmcb->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF);
}
/*
* The following MSRs are saved/restored automatically during the world-switch.
* Don't intercept guest read/write accesses to these MSRs.
*/
hmR0SvmSetMSRPermission(pVCpu, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
hmR0SvmSetMSRPermission(pVCpu, MSR_K8_CSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
hmR0SvmSetMSRPermission(pVCpu, MSR_K6_STAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
hmR0SvmSetMSRPermission(pVCpu, MSR_K8_SF_MASK, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
hmR0SvmSetMSRPermission(pVCpu, MSR_K8_FS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
hmR0SvmSetMSRPermission(pVCpu, MSR_K8_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
hmR0SvmSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
}
return rc;
}
/**
* Sets the permission bits for the specified MSR.
*
* @param pVCpu Pointer to the VMCPU.
* @param uMsr The MSR.
* @param fRead Whether reading is allowed.
* @param fWrite Whether writing is allowed.
*/
static void hmR0SvmSetMSRPermission(PVMCPU pVCpu, uint32_t uMsr, SVMMSREXITREAD enmRead, SVMMSREXITWRITE enmWrite)
{
unsigned ulBit;
uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
/*
* Layout:
* Byte offset MSR range
* 0x000 - 0x7ff 0x00000000 - 0x00001fff
* 0x800 - 0xfff 0xc0000000 - 0xc0001fff
* 0x1000 - 0x17ff 0xc0010000 - 0xc0011fff
* 0x1800 - 0x1fff Reserved
*/
if (uMsr <= 0x00001FFF)
{
/* Pentium-compatible MSRs */
ulBit = uMsr * 2;
}
else if ( uMsr >= 0xC0000000
&& uMsr <= 0xC0001FFF)
{
/* AMD Sixth Generation x86 Processor MSRs and SYSCALL */
ulBit = (uMsr - 0xC0000000) * 2;
pbMsrBitmap += 0x800;
}
else if ( uMsr >= 0xC0010000
&& uMsr <= 0xC0011FFF)
{
/* AMD Seventh and Eighth Generation Processor MSRs */
ulBit = (uMsr - 0xC0001000) * 2;
pbMsrBitmap += 0x1000;
}
else
{
AssertFailed();
return;
}
Assert(ulBit < 0x3fff /* 16 * 1024 - 1 */);
if (enmRead == SVMMSREXIT_INTERCEPT_READ)
ASMBitSet(pbMsrBitmap, ulBit);
else
ASMBitClear(pbMsrBitmap, ulBit);
if (enmWrite == SVMMSREXIT_INTERCEPT_WRITE)
ASMBitSet(pbMsrBitmap, ulBit + 1);
else
ASMBitClear(pbMsrBitmap, ulBit + 1);
}