HWSVMR0.cpp revision 5ab279c951084868bab25b6bdebf86ad9aafcab7
989d2489ff3cb4750510f8c9ea4ab95986d472a3vboxsync * HWACCM SVM - Host Context Ring 0.
989d2489ff3cb4750510f8c9ea4ab95986d472a3vboxsync * Copyright (C) 2006-2007 Sun Microsystems, Inc.
* available from http://www.virtualbox.org. This file is free software;
* Clara, CA 95054 USA or visit http://www.sun.com if you need
#include "HWACCMInternal.h"
#include "HWSVMR0.h"
VMMR0DECL(int) SVMR0EnableCpu(PHWACCM_CPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
/* We must turn on AMD-V and setup the host state physical address, as those MSRs are per-cpu/core. */
SUPR0Printf("SVMR0EnableCpu cpu %d page (%x) %x\n", pCpu->idCpu, pvPageCpu, (uint32_t)pPageCpuPhys);
/* Write the physical page address where the CPU will store the host state while executing the VM. */
return VINF_SUCCESS;
return VINF_SUCCESS;
int rc;
rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.svm.pMemObjVMCBHost, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
return rc;
rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.svm.pMemObjIOBitmap, 3 << PAGE_SHIFT, true /* executable R0 mapping */);
return rc;
rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.svm.pMemObjMSRBitmap, 2 << PAGE_SHIFT, true /* executable R0 mapping */);
return rc;
pVM->hwaccm.s.svm.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.svm.pMemObjMSRBitmap, 0);
Log(("SVMR0InitVM: AMD cpu with erratum 170 family %x model %x stepping %x\n", u32Family, u32Model, u32Stepping));
rc = RTR0MemObjAllocCont(&pVM->aCpus[i].hwaccm.s.svm.pMemObjVMCB, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
return rc;
pVM->aCpus[i].hwaccm.s.svm.pVMCBPhys = RTR0MemObjGetPagePhysAddr(pVM->aCpus[i].hwaccm.s.svm.pMemObjVMCB, 0);
return VINF_SUCCESS;
return VINF_SUCCESS;
/* CR0/3/4 reads must be intercepted, our shadow values are not necessarily the same as the guest's. */
#ifndef DEBUG
| SVM_CTRL2_INTERCEPT_MWAIT_UNCOND; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
return rc;
#ifdef VBOX_STRICT
Log(("SVM: Inject int %d at %RGv error code=%02x CR2=%RGv intInfo=%08x\n", pEvent->n.u8Vector, (RTGCPTR)pCtx->rip, pEvent->n.u32ErrorCode, (RTGCPTR)pCtx->cr2, pEvent->au64[0]));
Log(("SVM: Inject int %d at %RGv error code=%08x\n", pEvent->n.u8Vector, (RTGCPTR)pCtx->rip, pEvent->n.u32ErrorCode));
int rc;
Log(("Reinjecting event %08x %08x at %RGv\n", pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip));
return VINF_SUCCESS;
return VINF_SUCCESS;
Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS -> irq window exit\n", (RTGCPTR)pCtx->rip));
/** @todo use virtual interrupt method to inject a pending irq; dispatched as soon as guest.IF is set. */
#ifdef VBOX_STRICT
int rc;
switch (u8Vector) {
return VINF_SUCCESS;
return VINF_SUCCESS;
return VERR_INVALID_PARAMETER;
/* Also catch floating point exceptions as we need to report them to the guest in a different way. */
val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
/* Note: WP is not relevant in nested paging mode as we catch accesses on the (guest) physical level. */
/* Note: In nested paging mode the guest is allowed to run with paging disabled; the guest physical to host physical translation will remain active. */
val |= X86_CR0_PG; /* Paging is always enabled; even when the guest is running in real mode or PE without paging. */
val |= X86_CR0_WP; /* Must set this as we rely on protect various pages and supervisor writes must be caught. */
Assert(pVMCB->guest.u64CR3 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
case PGMMODE_REAL:
AssertFailed();
#ifdef VBOX_ENABLE_64_BITS_GUESTS
AssertFailed();
AssertFailed();
#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
/* Unconditionally update these as wrmsr might have changed them. (HWACCM_CHANGED_GUEST_SEGMENT_REGS will not be set) */
#ifdef DEBUG
return VINF_SUCCESS;
bool fSyncTPR = false;
unsigned cResume = 0;
#ifdef VBOX_STRICT
goto end;
Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
* Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
* force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
* break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
#ifdef DEBUG
rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
goto end;
/* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
goto end;
goto end;
goto end;
/* Note the 32 bits exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but that appears missing in Intel CPUs */
/* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!!!!! */
bool fPending;
if (fPending)
#ifdef LOG_ENABLED
Log(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
Log(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
#ifdef VBOX_STRICT
#ifdef LOG_LOGGING
goto end;
/* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
/* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
/* Make sure we flush the TLB when required. Switch ASID to achieve the same thing, but without actually flushing the whole TLB (which is expensive). */
AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
#ifdef VBOX_WITH_STATISTICS
#ifdef VBOX_STRICT
pVCpu->hwaccm.s.svm.pfnVMRun(pVM->hwaccm.s.svm.pVMCBHostPhys, pVCpu->hwaccm.s.svm.pVMCBPhys, pCtx, pVM, pVCpu);
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
* IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#ifdef DEBUG
goto end;
/* Remaining guest CPU context: TR, IDTR, GDTR, LDTR; must sync everything otherwise we can get out of sync when jumping to ring 3. */
#ifdef LOG_LOGGING
&& pVMCB->ctrl.ExitIntInfo.n.u3Type != SVM_EVENT_SOFTWARE_INT /* we don't care about 'int xx' as the instruction will be restarted. */)
Log(("Pending inject %RX64 at %RGv exit=%08x\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitCode));
#ifdef LOG_ENABLED
#ifdef VBOX_WITH_STATISTICS
if (fSyncTPR)
switch (exitCode)
case SVM_EXIT_EXCEPTION_0: case SVM_EXIT_EXCEPTION_1: case SVM_EXIT_EXCEPTION_2: case SVM_EXIT_EXCEPTION_3:
case SVM_EXIT_EXCEPTION_4: case SVM_EXIT_EXCEPTION_5: case SVM_EXIT_EXCEPTION_6: case SVM_EXIT_EXCEPTION_7:
case SVM_EXIT_EXCEPTION_8: case SVM_EXIT_EXCEPTION_9: case SVM_EXIT_EXCEPTION_A: case SVM_EXIT_EXCEPTION_B:
case SVM_EXIT_EXCEPTION_C: case SVM_EXIT_EXCEPTION_D: case SVM_EXIT_EXCEPTION_E: case SVM_EXIT_EXCEPTION_F:
case SVM_EXIT_EXCEPTION_10: case SVM_EXIT_EXCEPTION_11: case SVM_EXIT_EXCEPTION_12: case SVM_EXIT_EXCEPTION_13:
case SVM_EXIT_EXCEPTION_14: case SVM_EXIT_EXCEPTION_15: case SVM_EXIT_EXCEPTION_16: case SVM_EXIT_EXCEPTION_17:
case SVM_EXIT_EXCEPTION_18: case SVM_EXIT_EXCEPTION_19: case SVM_EXIT_EXCEPTION_1A: case SVM_EXIT_EXCEPTION_1B:
case SVM_EXIT_EXCEPTION_1C: case SVM_EXIT_EXCEPTION_1D: case SVM_EXIT_EXCEPTION_1E: case SVM_EXIT_EXCEPTION_1F:
switch (vector)
case X86_XCPT_DB:
goto ResumeExecution;
case X86_XCPT_NM:
/* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
goto ResumeExecution;
goto ResumeExecution;
#ifdef DEBUG
Log(("Guest page fault at %RGv cr2=%RGv error code %x rsp=%RGv\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode, (RTGCPTR)pCtx->rsp));
goto ResumeExecution;
Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode));
goto ResumeExecution;
goto ResumeExecution;
#ifdef VBOX_STRICT
goto ResumeExecution;
#ifdef VBOX_STRICT
switch(vector)
case X86_XCPT_GP:
case X86_XCPT_DE:
case X86_XCPT_UD:
case X86_XCPT_SS:
case X86_XCPT_NP:
goto ResumeExecution;
case SVM_EXIT_NPF:
/* EXITINFO1 contains fault errorcode; EXITINFO2 contains the guest physical address causing the fault. */
Log(("Nested page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode));
rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, enmShwPagingMode, errCode, CPUMCTX2CORE(pCtx), uFaultAddress);
Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode));
goto ResumeExecution;
#ifdef VBOX_STRICT
case SVM_EXIT_VINTR:
goto ResumeExecution;
case SVM_EXIT_FERR_FREEZE:
case SVM_EXIT_INTR:
case SVM_EXIT_NMI:
case SVM_EXIT_SMI:
case SVM_EXIT_INIT:
case SVM_EXIT_WBINVD:
goto ResumeExecution;
goto ResumeExecution;
goto ResumeExecution;
goto ResumeExecution;
goto ResumeExecution;
case SVM_EXIT_WRITE_CR8: case SVM_EXIT_WRITE_CR9: case SVM_EXIT_WRITE_CR10: case SVM_EXIT_WRITE_CR11:
case SVM_EXIT_WRITE_CR12: case SVM_EXIT_WRITE_CR13: case SVM_EXIT_WRITE_CR14: case SVM_EXIT_WRITE_CR15:
AssertFailed();
rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
AssertMsg(rc == VINF_SUCCESS || PGMGetGuestMode(pVCpu) <= PGMMODE_PROTECTED || pVCpu->hwaccm.s.fForceTLBFlush,
("rc=%Rrc mode=%d fForceTLBFlush=%RTbool\n", rc, PGMGetGuestMode(pVCpu), pVCpu->hwaccm.s.fForceTLBFlush));
goto ResumeExecution;
goto ResumeExecution;
case SVM_EXIT_WRITE_DR8: case SVM_EXIT_WRITE_DR9: case SVM_EXIT_WRITE_DR10: case SVM_EXIT_WRITE_DR11:
case SVM_EXIT_WRITE_DR12: case SVM_EXIT_WRITE_DR13: case SVM_EXIT_WRITE_DR14: case SVM_EXIT_WRITE_DR15:
goto ResumeExecution;
goto ResumeExecution;
goto ResumeExecution;
goto ResumeExecution;
/* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
Log2(("IOMIOPortWrite %RGv %x %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, uIOSize));
Log2(("IOMIOPortRead %RGv %x %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, u32Val & uAndVal, uIOSize));
* Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
goto ResumeExecution;
goto ResumeExecution;
Log2(("EM status from IO at %RGv %x size %d: %Rrc\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize, rc));
#ifdef VBOX_STRICT
AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", rc));
case SVM_EXIT_HLT:
goto ResumeExecution;
case SVM_EXIT_MWAIT_UNCOND:
goto ResumeExecution;
case SVM_EXIT_RSM:
case SVM_EXIT_INVLPGA:
case SVM_EXIT_VMRUN:
case SVM_EXIT_VMMCALL:
case SVM_EXIT_VMLOAD:
case SVM_EXIT_VMSAVE:
case SVM_EXIT_STGI:
case SVM_EXIT_CLGI:
case SVM_EXIT_SKINIT:
goto ResumeExecution;
case SVM_EXIT_MSR:
/* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
STAM_COUNTER_INC((pVMCB->ctrl.u64ExitInfo1 == 0) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr);
goto ResumeExecution;
AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (pVMCB->ctrl.u64ExitInfo1 == 0) ? "rdmsr" : "wrmsr", rc));
case SVM_EXIT_MONITOR:
case SVM_EXIT_PAUSE:
case SVM_EXIT_MWAIT_ARMED:
case SVM_EXIT_SHUTDOWN:
case SVM_EXIT_IDTR_READ:
case SVM_EXIT_GDTR_READ:
case SVM_EXIT_LDTR_READ:
case SVM_EXIT_TR_READ:
case SVM_EXIT_IDTR_WRITE:
case SVM_EXIT_GDTR_WRITE:
case SVM_EXIT_LDTR_WRITE:
case SVM_EXIT_TR_WRITE:
case SVM_EXIT_CR0_SEL_WRITE:
end:
CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
/* If we executed vmrun and an external irq was pending, then we don't have to do a full sync the next time. */
return rc;
LogFlow(("SVMR0Enter cpu%d last=%d asid=%d\n", pCpu->idCpu, pVCpu->hwaccm.s.idLastCpu, pVCpu->hwaccm.s.uCurrentASID));
return VINF_SUCCESS;
return VINF_SUCCESS;
static int svmR0InterpretInvlPg(PVMCPU pVCpu, PDISCPUSTATE pCpu, PCPUMCTXCORE pRegFrame, uint32_t uASID)
return VERR_EM_INTERPRETER;
case PARMTYPE_IMMEDIATE:
case PARMTYPE_ADDRESS:
return VERR_EM_INTERPRETER;
return VERR_EM_INTERPRETER;
return VINF_SUCCESS;
return rc;
DISCPUMODE enmMode = SELMGetCpuModeFromSelector(pVM, pRegFrame->eflags, pRegFrame->cs, &pRegFrame->csHid);
int rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->rip, &pbCode);
return rc;
return VERR_EM_INTERPRETER;
if (!fFlushPending)
/* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invlpga takes only 32 bits addresses. */
return VINF_SUCCESS;
/* invlpga only invalidates TLB entries for guest virtual addresses; we have no choice but to force a TLB flush here. */
return VINF_SUCCESS;
#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
DECLASM(int) SVMR0VMSwitcherRun64(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu)
VMMR0DECL(int) SVMR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, uint32_t *paParam)
int rc;
return rc;