SUPDrvGip.cpp revision a3bfe717fb4cf4a2335a2673e38022f0b05187be
/* $Id$ */
/** @file
* VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
*/
/*
* Copyright (C) 2006-2015 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#define LOG_GROUP LOG_GROUP_SUP_DRV
#define SUPDRV_AGNOSTIC
#include "SUPDrvInternal.h"
#ifndef PAGE_SHIFT
#endif
#include <iprt/asm-amd64-x86.h>
#include <iprt/asm-math.h>
#include <iprt/handletable.h>
#include <iprt/semaphore.h>
#include <iprt/spinlock.h>
#endif
#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
#else
/* ... */
#endif
/*******************************************************************************
* Defined Constants And Macros *
*******************************************************************************/
/** The frequency by which we recalculate the u32UpdateHz and
* u32UpdateIntervalNS GIP members. The value must be a power of 2.
*
* Warning: Bumping this too high might overflow u32UpdateIntervalNS.
*/
#define GIP_UPDATEHZ_RECALC_FREQ 0x800
/** A reserved TSC value used for synchronization as well as measurement of
* TSC deltas. */
#define GIP_TSC_DELTA_RSVD UINT64_MAX
/** The number of TSC delta measurement loops in total (includes primer and
* read-time loops). */
#define GIP_TSC_DELTA_LOOPS 96
/** The number of cache primer loops. */
#define GIP_TSC_DELTA_PRIMER_LOOPS 4
/** The number of loops until we keep computing the minumum read time. */
#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
/** Stop measurement of TSC delta. */
#define GIP_TSC_DELTA_SYNC_STOP 0
/** Start measurement of TSC delta. */
#define GIP_TSC_DELTA_SYNC_START 1
/** Worker thread is ready for reading the TSC. */
#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
/** Worker thread is done updating TSC delta info. */
#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
* with a timeout. */
#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
* master with a timeout. */
#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
/** The TSC-refinement interval in seconds. */
#define GIP_TSC_REFINE_INTERVAL 5
/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
/** The TSC delta value for the initial GIP master - 0 in regular builds.
* To test the delta code this can be set to a non-zero value. */
#if 1
#else
# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
#endif
/** @def VBOX_SVN_REV
* The makefile should define this if it can. */
#ifndef VBOX_SVN_REV
# define VBOX_SVN_REV 0
#endif
#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
# define DO_NOT_START_GIP
#endif
/*******************************************************************************
* Internal Functions *
*******************************************************************************/
static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
#ifdef SUPDRV_USE_TSC_DELTA_THREAD
#endif
/*******************************************************************************
* Global Variables *
*******************************************************************************/
/*
*
* Misc Common GIP Code
* Misc Common GIP Code
* Misc Common GIP Code
*
*
*/
/**
* Finds the GIP CPU index corresponding to @a idCpu.
*
* @returns GIP CPU array index, UINT32_MAX if not found.
* @param pGip The GIP.
* @param idCpu The CPU ID.
*/
{
uint32_t i;
return i;
return UINT32_MAX;
}
/**
* Applies the TSC delta to the supplied raw TSC value.
*
* @returns VBox status code. (Ignored by all users, just FYI.)
* @param pGip Pointer to the GIP.
* @param puTsc Pointer to a valid TSC value before the TSC delta has been applied.
* @param idApic The APIC ID of the CPU @c puTsc corresponds to.
* @param fDeltaApplied Where to store whether the TSC delta was succesfully
* applied or not (optional, can be NULL).
*
* @remarks Maybe called with interrupts disabled in ring-0!
*
* @note Don't you dare change the delta calculation. If you really do, make
* sure you update all places where it's used (IPRT, SUPLibAll.cpp,
* SUPDrv.c, supdrvGipMpEvent, and more).
*/
DECLINLINE(int) supdrvTscDeltaApply(PSUPGLOBALINFOPAGE pGip, uint64_t *puTsc, uint16_t idApic, bool *pfDeltaApplied)
{
int rc;
/*
* Validate input.
*/
/*
* Carefully convert the idApic into a GIPCPU entry.
*/
{
{
/*
* Apply the delta if valid.
*/
{
if (pfDeltaApplied)
*pfDeltaApplied = true;
return VINF_SUCCESS;
}
rc = VINF_SUCCESS;
}
else
{
}
}
else
{
}
if (pfDeltaApplied)
*pfDeltaApplied = false;
return rc;
}
/*
*
* GIP Mapping and Unmapping Related Code.
* GIP Mapping and Unmapping Related Code.
* GIP Mapping and Unmapping Related Code.
*
*
*/
/**
* (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
* updating.
*
* @param pGip Pointer to the GIP.
* @param pGipCpu The per CPU structure for this CPU.
* @param u64NanoTS The current time.
*/
{
/*
* Here we don't really care about applying the TSC delta. The re-initialization of this
* value is not relevant especially while (re)starting the GIP as the first few ones will
* be ignored anyway, see supdrvGipDoUpdateCpu().
*/
}
/**
* Set the current TSC and NanoTS value for the CPU.
*
* @param idCpu The CPU ID. Unused - we have to use the APIC ID.
* @param pvUser1 Pointer to the ring-0 GIP mapping.
* @param pvUser2 Pointer to the variable holding the current time.
*/
{
}
/**
* State structure for supdrvGipDetectGetGipCpuCallback.
*/
typedef struct SUPDRVGIPDETECTGETCPU
{
/** Bitmap of APIC IDs that has been seen (initialized to zero).
* Used to detect duplicate APIC IDs (paranoia). */
/** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
* initially). The callback clears the methods not detected. */
uint32_t volatile fSupported;
/** The first callback detecting any kind of range issues (initialized to
* NIL_RTCPUID). */
RTCPUID volatile idCpuProblem;
/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
/**
* Checks for alternative ways of getting the CPU ID.
*
* This also checks the APIC ID, CPU ID and CPU set index values against the
* GIP tables.
*
* @param idCpu The CPU ID. Unused - we have to use the APIC ID.
* @param pvUser1 Pointer to the state structure.
* @param pvUser2 Pointer to the GIP.
*/
static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
{
uint32_t fSupported = 0;
int iCpuSet;
AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
/*
* Check that the CPU ID and CPU set index are interchangable.
*/
{
if ( iCpuSet >= 0
&& iCpuSet < RTCPUSET_MAX_CPUS
{
/*
* Check whether the IDTR.LIMIT contains a CPU number.
*/
#ifdef RT_ARCH_X86
#else
#endif
ASMGetIDTR(&Idtr);
{
{
ASMGetIDTR(&Idtr2);
}
}
/*
* Check whether RDTSCP is an option.
*/
if (ASMHasCpuId())
{
{
{
ASMNopPause();
}
}
}
}
}
/*
* Check that the APIC ID is unique.
*/
idApic = ASMGetApicId();
else
{
LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
}
/*
* Check that the iCpuSet is within the expected range.
*/
if (RT_UNLIKELY( iCpuSet < 0
|| (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
{
LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
}
else
{
{
LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
}
}
/*
* Update the supported feature mask before we return.
*/
}
/**
* Increase the timer freqency on hosts where this is possible (NT).
*
* The idea is that more interrupts is better for us... Also, it's better than
* we increase the timer frequence, because we might end up getting inaccurate
* callbacks if someone else does it.
*
* @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
*/
{
if (pDevExt->u32SystemTimerGranularityGrant == 0)
{
)
{
}
}
}
/**
* Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
*
* @param pDevExt Clears u32SystemTimerGranularityGrant.
*/
{
{
}
}
/**
*
* @returns IPRT status code.
* @param pSession Session to which the GIP mapping should belong.
* @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
* @param pHCPhysGip Where to store the physical address. (optional)
*
* @remark There is no reference counting on the mapping, so one call to this function
* count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
* and remove the session as a GIP user.
*/
{
int rc;
/*
* Validate
*/
#ifdef SUPDRV_USE_MUTEX_FOR_GIP
#else
#endif
{
/*
* Map it?
*/
rc = VINF_SUCCESS;
if (ppGipR3)
{
if (RT_SUCCESS(rc))
}
/*
* Get physical address.
*/
/*
* Reference globally.
*/
{
{
/*
* host timer frequency to make sure we don't get stuck in guest
* mode and to get better timer (and possibly clock) accuracy.
*/
LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
/*
* document me
*/
{
unsigned i;
}
/*
* document me
*/
|| RTMpGetOnlineCount() == 1)
else
/*
* Detect alternative ways to figure the CPU ID in ring-3 and
* raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
* and CPU set indexes while we're at it.
*/
if (RT_SUCCESS(rc))
{
{
&& DetectState.fSupported != 0)
{
{
}
}
else
{
LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
}
}
else
{
LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
}
}
/*
* Start the GIP timer if all is well..
*/
if (RT_SUCCESS(rc))
{
#ifndef DO_NOT_START_GIP
#endif
rc = VINF_SUCCESS;
}
/*
* Bail out on error.
*/
if (RT_FAILURE(rc))
{
pSession->fGipReferenced = 0;
{
if (RT_SUCCESS(rc2))
}
}
}
}
}
else
{
Log(("SUPR0GipMap: GIP is not available!\n"));
}
#ifdef SUPDRV_USE_MUTEX_FOR_GIP
#else
#endif
/*
* Write returns.
*/
if (pHCPhysGip)
*pHCPhysGip = HCPhys;
if (ppGipR3)
#ifdef DEBUG_DARWIN_GIP
OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
#else
LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
#endif
return rc;
}
/**
* Unmaps any user mapping of the GIP and terminates all GIP access
* from this session.
*
* @returns IPRT status code.
* @param pSession Session to which the GIP mapping should belong.
*/
{
int rc = VINF_SUCCESS;
#ifdef DEBUG_DARWIN_GIP
OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
pSession->GipMapObjR3));
#else
#endif
#ifdef SUPDRV_USE_MUTEX_FOR_GIP
#else
#endif
/*
* Unmap anything?
*/
{
if (RT_SUCCESS(rc))
}
/*
* Dereference global GIP.
*/
{
pSession->fGipReferenced = 0;
{
LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
#ifndef DO_NOT_START_GIP
#endif
}
}
#ifdef SUPDRV_USE_MUTEX_FOR_GIP
#else
#endif
return rc;
}
/**
* Gets the GIP pointer.
*
* @returns Pointer to the GIP or NULL.
*/
{
return g_pSUPGlobalInfoPage;
}
/*
*
*
* GIP Initialization, Termination and CPU Offline / Online Related Code.
* GIP Initialization, Termination and CPU Offline / Online Related Code.
* GIP Initialization, Termination and CPU Offline / Online Related Code.
*
*
*/
/**
* Timer callback function for TSC frequency refinement in invariant GIP mode.
*
* @param pTimer The timer.
* @param pvUser Opaque pointer to the device instance data.
* @param iTick The timer tick.
*/
static DECLCALLBACK(void) supdrvInitAsyncRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
{
bool fDeltaApplied = false;
/* Paranoia. */
#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
while (RTTimeSystemNanoTS() == u64NanoTS)
ASMNopPause();
#endif
uFlags = ASMIntDisableFlags();
idApic = ASMGetApicId();
u64Tsc = ASMReadTSC();
&& !fDeltaApplied))
{
Log(("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
return;
}
/* Calculate the TSC frequency. */
&& u64DeltaNanoTS < UINT32_MAX)
else
{
}
/* Update rest of GIP. */
}
/**
* Starts the TSC-frequency refinement phase asynchronously.
*
* @param pDevExt Pointer to the device instance data.
*/
{
int rc;
/* Validate. */
#ifdef SUPDRV_USE_TSC_DELTA_THREAD
/*
* If the TSC-delta thread is created, wait until it's done calculating
* the TSC-deltas on the relevant online CPUs before we start the TSC refinement.
*/
{
if (rc == VERR_TIMEOUT)
{
SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
return;
}
}
#endif
/*
* Record the TSC and NanoTS as the starting anchor point for refinement of the
* TSC. We deliberately avoid using SUPReadTSC() here as we want to keep the
* reading of the TSC and the NanoTS as close as possible.
*/
while (RTTimeSystemNanoTS() == u64NanoTS)
ASMNopPause();
uFlags = ASMIntDisableFlags();
idApic = ASMGetApicId();
if (RT_SUCCESS(rc))
{
/*
* Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
* interval as small as possible while gaining the most consistent and accurate frequency
* (compared to what the host OS might have measured).
*
* In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
* same TSC frequency whenever possible so we need to keep the interval short.
*/
}
else
}
/**
* Measures the TSC frequency of the system.
*
* Uses a busy-wait method for the async. case as it is intended to help push
* the CPU frequency up, while for the invariant cases using a sleeping method.
*
* The TSC frequency can vary on systems which are not reported as invariant.
* On such systems the object of this function is to find out what the nominal,
* maximum TSC frequency under 'normal' CPU operation.
*
* @returns VBox status code.
* @param pDevExt Pointer to the device instance.
*
* @remarks Must be called only -after- measuring the TSC deltas.
*/
{
int cTriesLeft = 4;
/* Assert order. */
while (cTriesLeft-- > 0)
{
/*
* Synchronize with the host OS clock tick before reading the TSC.
* Especially important on older Windows version where the granularity is terrible.
*/
while (RTTimeSystemNanoTS() == u64NanoTsBefore)
ASMNopPause();
uFlags = ASMIntDisableFlags();
idApicBefore = ASMGetApicId();
u64TscBefore = ASMReadTSC();
{
/*
* Sleep-wait since the TSC frequency is constant, it eases host load.
* Shorter interval produces more variance in the frequency (esp. Windows).
*/
RTThreadSleep(200);
while (RTTimeSystemNanoTS() == u64NanoTsAfter)
ASMNopPause();
}
else
{
/* Busy-wait keeping the frequency up and measure. */
for (;;)
{
ASMNopPause();
else
break;
}
}
uFlags = ASMIntDisableFlags();
idApicAfter = ASMGetApicId();
u64TscAfter = ASMReadTSC();
{
int rc;
bool fAppliedBefore;
bool fAppliedAfter;
rc = supdrvTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
if ( !fAppliedBefore
|| !fAppliedAfter)
{
#ifdef SUPDRV_USE_TSC_DELTA_THREAD
/*
* The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
* Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
* and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
* proceed. This should be triggered just once if we're rather unlucky.
*/
if (rc == VERR_TIMEOUT)
{
SUPR0Printf("vboxdrv: supdrvGipInitMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
}
#else
SUPR0Printf("vboxdrv: supdrvGipInitMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
#endif
continue;
}
}
/*
* Update GIP.
*/
pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
return VINF_SUCCESS;
}
}
/**
* Finds our (@a idCpu) entry, or allocates a new one if not found.
*
* @returns Index of the CPU in the cache set.
* @param pGip The GIP.
* @param idCpu The CPU ID.
*/
{
/*
* ASSUMES that CPU IDs are constant.
*/
return i;
cTries = 0;
do
{
{
bool fRc;
if (fRc)
return i;
}
} while (cTries++ < 32);
return i - 1;
}
/**
* The calling CPU should be accounted as online, update GIP accordingly.
*
* This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
*
* @param pDevExt The device extension.
* @param idCpu The CPU ID.
*/
{
int iCpuSet = 0;
uint32_t i = 0;
/*
* Do this behind a spinlock with interrupts disabled as this can fire
* on all CPUs simultaneously, see @bugref{6110}.
*/
/*
* Update the globals.
*/
if (iCpuSet >= 0)
{
}
/*
* Update the entry.
*/
idApic = ASMGetApicId();
/*
* Update the APIC ID and CPU set index mappings.
*/
/* Add this CPU to the set of CPUs for which we need to calculate their TSC-deltas. */
{
#ifdef SUPDRV_USE_TSC_DELTA_THREAD
{
}
#endif
}
/* commit it */
}
/**
* The CPU should be accounted as offline, update the GIP accordingly.
*
* This is used by supdrvGipMpEvent.
*
* @param pDevExt The device extension.
* @param idCpu The CPU ID.
*/
{
int iCpuSet;
unsigned i;
AssertReturnVoid(iCpuSet >= 0);
/* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
{
}
{
/* Reset the TSC delta, we will recalculate it lazily. */
/* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
}
/* commit it */
}
/**
* Multiprocessor event notification callback.
*
* This is used to make sure that the GIP master gets passed on to
* another CPU. It also updates the associated CPU data.
*
* @param enmEvent The event.
* @param idCpu The cpu it applies to.
* @param pvUser Pointer to the device extension.
*
* @remarks This function -must- fire on the newly online'd CPU for the
* RTMPEVENT_ONLINE case and can fire on any CPU for the
* RTMPEVENT_OFFLINE case.
*/
{
/*
* Update the GIP CPU data.
*/
if (pGip)
{
switch (enmEvent)
{
case RTMPEVENT_ONLINE:
break;
case RTMPEVENT_OFFLINE:
break;
}
}
/*
* Make sure there is a master GIP.
*/
if (enmEvent == RTMPEVENT_OFFLINE)
{
if (idGipMaster == idCpu)
{
/*
* The GIP master is going offline, find a new one.
*/
bool fIgnored;
unsigned i;
for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
{
if (idCurCpu != idGipMaster)
{
break;
}
}
}
}
}
/**
* On CPU initialization callback for RTMpOnAll.
*
* @param idCpu The CPU ID.
* @param pvUser1 The device extension.
* @param pvUser2 The GIP.
*/
{
/* This is good enough, even though it will update some of the globals a
bit to much. */
}
/**
* Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
*
* @param idCpu Ignored.
* @param pvUser1 Where to put the TSC.
* @param pvUser2 Ignored.
*/
static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
{
}
/**
* Determine if Async GIP mode is required because of TSC drift.
*
* (TSC) runs never backwards, that is, a read operation to the counter should return
* a bigger value than any previous read operation. This is guaranteed by the latest
* AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
* case we have to choose the asynchronous timer mode.
*
* @param poffMin Pointer to the determined difference between different
* cores (optional, can be NULL).
* @return false if the time stamp counters appear to be synchronized, true otherwise.
*/
{
/*
* Just iterate all the cpus 8 times and make sure that the TSC is
* ever increasing. We don't bother taking TSC rollover into account.
*/
int iEndCpu = RTMpGetArraySize();
int iCpu;
int cLoops = 8;
bool fAsync = false;
int rc = VINF_SUCCESS;
while (cLoops-- > 0)
{
{
rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker, &CurTsc, NULL);
if (RT_SUCCESS(rc))
{
{
fAsync = true;
Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
break;
}
/* Gather statistics (except the first time). */
{
}
/* Next */
}
else if (rc == VERR_NOT_SUPPORTED)
break;
else
}
/* broke out of the loop. */
break;
}
if (poffMin)
Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
#endif
return fAsync;
}
/**
* supdrvGipInit() worker that determines the GIP TSC mode.
*
* @returns The most suitable TSC mode.
* @param pDevExt Pointer to the device instance data.
*/
{
/*
* Establish whether the CPU advertises TSC as invariant, we need that in
* a couple of places below.
*/
bool fInvariantTsc = false;
if (ASMHasCpuId())
{
{
fInvariantTsc = true;
}
}
/*
* On single CPU systems, we don't need to consider ASYNC mode.
*/
if (RTMpGetCount() <= 1)
/*
*/
return SUPGIPMODE_ASYNC_TSC;
/*
* Use invariant mode if the CPU says TSC is invariant.
*/
if (fInvariantTsc)
return SUPGIPMODE_INVARIANT_TSC;
/*
* TSC is not invariant and we're on SMP, this presents two problems:
*
* (1) There might be a skew between the CPU, so that cpu0
* returns a TSC that is slightly different from cpu1.
* This screw may be due to (2), bad TSC initialization
* or slightly different TSC rates.
*
* (2) Power management (and other things) may cause the TSC
* to run at a non-constant speed, and cause the speed
* to be different on the cpus. This will result in (1).
*
* If any of the above is detected, we will have to use ASYNC mode.
*/
/* (1). Try check for current differences between the cpus. */
return SUPGIPMODE_ASYNC_TSC;
/* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
if ( ASMIsValidStdRange(uEAX)
{
/* Check for APM support. */
{
return SUPGIPMODE_ASYNC_TSC;
}
}
return SUPGIPMODE_SYNC_TSC;
}
/**
* Initializes per-CPU GIP information.
*
* @param pDevExt Pointer to the device instance data.
* @param pGip Pointer to the GIP.
* @param pCpu Pointer to which GIP CPU to initalize.
* @param u64NanoTS The current nanosecond timestamp.
*/
static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
{
/* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
/*
* We don't know the following values until we've executed updates.
* So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
* the 2nd timer callout.
*/
= pCpu->au32TSCHistory[0]
}
/**
* Initializes the GIP data.
*
* @param pDevExt Pointer to the device instance data.
* @param pGip Pointer to the read-write kernel mapping of the GIP.
* @param HCPhys The physical address of the GIP.
* @param u64NanoTS The current nanosecond timestamp.
* @param uUpdateHz The update frequency.
* @param uUpdateIntervalNS The update interval in nanoseconds.
* @param cCpus The CPU count.
*/
{
unsigned i;
#ifdef DEBUG_DARWIN_GIP
OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
#else
LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
#endif
/*
* Initialize the structure.
*/
/*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
else
for (i = 0; i < cCpus; i++)
/*
* Link it to the device extension.
*/
}
/**
* Creates the GIP.
*
* @returns VBox status code.
* @param pDevExt Instance data. GIP stuff may be updated.
*/
{
unsigned cCpus;
int rc;
LogFlow(("supdrvGipCreate:\n"));
/* Assert order. */
/*
* Check the CPU count.
*/
cCpus = RTMpGetArraySize();
if ( cCpus > RTCPUSET_MAX_CPUS
{
SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
return VERR_TOO_MANY_CPUS;
}
/*
* Allocate a contiguous set of pages with a default kernel mapping.
*/
rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
if (RT_FAILURE(rc))
{
return rc;
}
/*
* Allocate the TSC-delta sync struct on a separate cache line.
*/
/*
* Find a reasonable update interval and initialize the structure.
*/
/** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
* See @bugref{6710}. */
if (uMod)
supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
{
/* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
return VERR_INTERNAL_ERROR_2;
}
#ifdef SUPDRV_USE_TSC_DELTA_THREAD
{
/* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
}
#endif
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
#ifndef SUPDRV_USE_TSC_DELTA_THREAD
{
/*
* Measure the TSC deltas now that we have MP notifications.
*/
int cTries = 5;
do
{
if ( rc != VERR_TRY_AGAIN
&& rc != VERR_CPU_OFFLINE)
break;
} while (--cTries > 0);
}
else
{
AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
}
#endif
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
/*
* Create the timer.
* If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
*/
{
if (rc == VERR_NOT_SUPPORTED)
{
OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
}
}
if (RT_SUCCESS(rc))
{
/*
* We're good.
*/
return VINF_SUCCESS;
}
OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
}
else
}
else
}
else
}
else
}
else
return rc;
}
/**
* Invalidates the GIP data upon termination.
*
* @param pGip Pointer to the read-write kernel mapping of the GIP.
*/
{
unsigned i;
{
}
}
/**
* Terminates the GIP.
*
* @param pDevExt Instance data. GIP stuff may be updated.
*/
{
int rc;
#ifdef DEBUG_DARWIN_GIP
#endif
/*
* Stop receiving MP notifications before tearing anything else down.
*/
#ifdef SUPDRV_USE_TSC_DELTA_THREAD
/*
* Terminate the TSC-delta measurement thread and resources.
*/
#endif
/*
* Destroy the TSC-refinement one-shot timer.
*/
if (pDevExt->pTscRefineTimer)
{
}
if (pDevExt->pvTscDeltaSync)
{
}
/*
* Invalid the GIP data.
*/
{
}
/*
* Destroy the timer and free the GIP memory object.
*/
{
}
{
}
/*
* Finally, make sure we've release the system timer resolution request
* if one actually succeeded and is still pending.
*/
}
/*
*
*
* GIP Update Timer Related Code
* GIP Update Timer Related Code
* GIP Update Timer Related Code
*
*
*/
/**
* Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
* updates all the per cpu data except the transaction id.
*
* @param pDevExt The device extension.
* @param pGipCpu Pointer to the per cpu data.
* @param u64NanoTS The current time stamp.
* @param u64TSC The current TSC.
* @param iTick The current timer tick.
*
* @remarks Can be called with interrupts disabled!
*/
static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
{
unsigned iTSCHistoryHead;
/* Delta between this and the previous update. */
/*
* Update the NanoTS.
*/
/*
* Calc TSC delta.
*/
/* We don't need to keep realculating the frequency when it's invariant. */
return;
if (u64TSCDelta >> 32)
{
}
/*
* On the 2nd and 3rd callout, reset the history with the current TSC
* interval since the values entered by supdrvGipInit are totally off.
* The interval on the 1st callout completely unreliable, the 2nd is a bit
* better, while the 3rd should be most reliable.
*/
|| u32TransactionId == 7)
&& ( iTick == 2
|| iTick == 3) ))
{
unsigned i;
}
/*
* Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
* Wait until we have at least one full history since the above history reset. The
* assumption is that the majority of the previous history values will be tolerable.
* See @bugref{6710} comment #67.
*/
{
{
u32 >>= 2;
u64TSCDelta >>= 2;
u64TSCDelta += u32;
u64TSCDelta >>= 1;
}
}
/*
* TSC History.
*/
/*
* UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
*
* On Windows, we have an occasional (but recurring) sour value that messed up
* the history but taking only 1 interval reduces the precision overall.
* However, this problem existed before the invariant mode was introduced.
*/
{
u32 >>= 2;
u32UpdateIntervalTSC >>= 2;
u32UpdateIntervalTSC >>= 1;
/* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
}
{
u32UpdateIntervalTSC >>= 1;
/* value chosen on a 2GHz thinkpad running windows */
}
else
{
/* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
}
ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
/*
* CpuHz.
*/
}
/**
* Updates the GIP.
*
* @param pDevExt The device extension.
* @param u64NanoTS The current nanosecond timesamp.
* @param u64TSC The current TSC timesamp.
* @param idCpu The CPU ID.
* @param iTick The current timer tick.
*
* @remarks Can be called with interrupts disabled!
*/
static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
{
/*
* Determine the relevant CPU data.
*/
else
{
return;
return;
}
/*
* Start update transaction.
*/
{
/* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
return;
}
/*
* Recalc the update frequency every 0x800th time.
*/
if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
{
if (pGip->u64NanoTSLastUpdateHz)
{
#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
{
/** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
* calculation on non-invariant hosts if it changes the history decision
* taken in supdrvGipDoUpdateCpu(). */
}
#endif
}
}
/*
* Update the data.
*/
/*
* Complete transaction.
*/
}
/**
* Updates the per cpu GIP data for the calling cpu.
*
* @param pDevExt The device extension.
* @param u64NanoTS The current nanosecond timesamp.
* @param u64TSC The current TSC timesamp.
* @param idCpu The CPU ID.
* @param idApic The APIC id for the CPU index.
* @param iTick The current timer tick.
*
* @remarks Can be called with interrupts disabled!
*/
{
/*
* Avoid a potential race when a CPU online notification doesn't fire on
* the onlined CPU but the tick creeps in before the event notification is
* run.
*/
{
}
{
{
/*
* Start update transaction.
*/
{
return;
}
/*
* Update the data.
*/
/*
* Complete transaction.
*/
}
}
}
/**
* Timer callback function for the sync and invariant GIP modes.
*
* @param pTimer The timer.
* @param pvUser Opaque pointer to the device extension.
* @param iTick The timer tick.
*/
static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
{
u64TSC = ASMReadTSC();
{
/*
* The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
* missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
* affected a bit until we get proper TSC deltas than implementing options like
* rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
*
* The likely hood of this happening is really low. On Windows, Linux, and Solaris
* timers fire on the CPU they were registered/started on. Darwin timers doesn't
* necessarily (they are high priority threads waiting).
*/
Assert(!ASMIntAreEnabled());
}
#ifdef SUPDRV_USE_TSC_DELTA_THREAD
{
/** @todo Do the actual poking using -- RTThreadUserSignal() */
}
#endif
}
/**
* Timer callback function for async GIP mode.
* @param pTimer The timer.
* @param pvUser Opaque pointer to the device extension.
* @param iTick The timer tick.
*/
{
/** @todo reset the transaction number and whatnot when iTick == 1. */
else
}
/*
*
*
* TSC Delta Measurements And Related Code
* TSC Delta Measurements And Related Code
* TSC Delta Measurements And Related Code
*
*
*/
/*
* Select TSC delta measurement algorithm.
*/
#if 1
# define GIP_TSC_DELTA_METHOD_1
#else
# define GIP_TSC_DELTA_METHOD_2
#endif
#ifdef GIP_TSC_DELTA_METHOD_2
/**
* TSC delta measurment algorithm \#2 result entry.
*/
typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
{
/**
* TSC delta measurment algorithm \#2 Data.
*/
typedef struct SUPDRVTSCDELTAMETHOD2
{
/** Padding to make sure the iCurSeqNo is in its own cache line.
* ASSUMES cacheline sizes <= 128 bytes. */
/** The current sequence number of this worker. */
/** Padding to make sure the iCurSeqNo is in its own cache line.
* ASSUMES cacheline sizes <= 128 bytes. */
/** Result table. */
/** Pointer to the data for TSC delta mesurment algorithm \#2 .*/
typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
#endif /* GIP_TSC_DELTA_METHOD_2 */
/**
* callback worker.
*/
typedef struct SUPDRVGIPTSCDELTARGS
{
#ifdef GIP_TSC_DELTA_METHOD_2
/*uint32_t cOffByOne;*/
bool fLagMaster;
bool fLagWorker;
#endif
typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
#ifdef GIP_TSC_DELTA_METHOD_2
/*
* TSC delta measurement algorithm \#2 configuration and code - Experimental!!
*/
# define GIP_TSC_DELTA_LOOPS 17
# define GIP_TSC_DELTA_PRIMER_LOOPS 1
# define GIP_TSC_DELTA_READ_TIME_LOOPS GIP_TSC_DELTA_PRIMER_LOOPS /* no read-time-loops necessary */
{
if (RT_SUCCESS(rc))
return rc;
}
{
/*SUPR0Printf("cHits=%d cOffByOne=%d m=%d w=%d\n", pArgs->cHits, pArgs->cOffByOne, pArgs->pMaster->idApic, pArgs->pWorker->idApic);*/
}
{
{
if (iLoop < GIP_TSC_DELTA_PRIMER_LOOPS)
{
if (iLoop == 0)
/* Lag during the priming to be nice to everyone.. */
pArgs->fLagMaster = true;
pArgs->fLagWorker = true;
}
{
/* 25 % of the body without lagging. */
pArgs->fLagMaster = false;
pArgs->fLagWorker = false;
}
{
/* 25 % of the body with both lagging. */
pArgs->fLagMaster = true;
pArgs->fLagWorker = true;
}
else
{
/* 50% of the body with alternating lag. */
}
}
}
/**
* The core function of the 2nd TSC delta mesurment algorithm.
*
* The idea here is that we have the two CPUs execute the exact same code
* collecting a largish set of TSC samples. The code has one data dependency on
* the other CPU which intention it is to synchronize the execution as well as
* help cross references the two sets of TSC samples (the sequence numbers).
*
* The @a fLag parameter is used to modify the execution a tiny bit on one or
* both of the CPUs. When @a fLag differs between the CPUs, it is thought that
* it will help with making the CPUs enter lock step execution occationally.
*
*/
static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
{
while (cLeft-- > 0)
{
ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
uTsc = ASMReadTSC();
pEntry++;
if (fLag)
ASMNopPause();
}
}
static void supdrvTscDeltaMethod2ProcessDataSet(PSUPDRVGIPTSCDELTARGS pArgs, PSUPDRVTSCDELTAMETHOD2 pMyData,
int64_t volatile *piWorkerTscDelta)
{
#if 0
#endif
{
if (idxOther & 1)
{
idxOther >>= 1;
{
{
if (fIsMaster)
else
? iDelta < iBestDelta
iBestDelta = iDelta;
cHits++;
}
}
}
#if 0 /* Can be used to detect battles between threads on the same core. Decided to change the master instead. */
else
{
idxOther >>= 1;
cOffByOne++;
}
#endif
}
if (cHits > 0)
#if 0
#endif
}
{
true /*fIsMaster*/,
false /*fIsMaster*/,
}
#endif /* GIP_TSC_DELTA_METHOD_2 */
/**
* Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
* and compute the delta between them.
*
* @param idCpu The CPU we are current scheduled on.
* @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
* @param pvUser2 Unused.
*
* @remarks Measuring TSC deltas between the CPUs is tricky because we need to
* read the TSC at exactly the same time on both the master and the
* worker CPUs. Due to DMA, bus arbitration, cache locality,
* contention, SMI, pipelining etc. there is no guaranteed way of
* doing this on x86 CPUs.
*
* GIP_TSC_DELTA_METHOD_1:
* We ignore the first few runs of the loop in order to prime the
* cache. Also, we need to be careful about using 'pause' instruction
* in critical busy-wait loops in this code - it can cause undesired
* behaviour with hyperthreading.
*
* We try to minimize the measurement error by computing the minimum
* read time of the compare statement in the worker by taking TSC
* measurements across it.
*
* It must be noted that the computed minimum read time is mostly to
* eliminate huge deltas when the worker is too early and doesn't by
* itself help produce more accurate deltas. We allow two times the
* computed minimum as an arbibtrary acceptable threshold. Therefore,
* it is still possible to get negative deltas where there are none
* when the worker is earlier. As long as these occasional negative
* deltas are lower than the time it takes to exit guest-context and
* the OS to reschedule EMT on a different CPU we won't expose a TSC
* that jumped backwards. It is because of the existence of the
* negative deltas we don't recompute the delta with the master and
* worker interchanged to eliminate the remaining measurement error.
*
* For GIP_TSC_DELTA_METHOD_2, see supdrvTscDeltaMethod2CollectData.
*/
static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
{
int cTriesLeft;
/* A bit of paranoia first. */
if (!pGipCpuMaster || !pGipCpuWorker)
return;
/* If the CPU isn't part of the measurement, return immediately. */
return;
/* If the IPRT API isn't concurrent safe, the master and worker wait for each other
with a timeout to avoid deadlocking the entire system. */
if (!RTMpOnAllIsConcurrentSafe())
{
/** @todo This was introduced for Windows, but since Windows doesn't use this
* code path any longer (as DPC timeouts BSOD regardless of interrupts,
* see @bugref{6710} comment 81), eventually phase it out. */
uTscStart = ASMReadTSC();
{
{
uTscNow = ASMReadTSC();
{
/* Set the worker delta to indicate failure, not the master. */
return;
}
ASMNopPause();
}
}
else
{
{
uTscNow = ASMReadTSC();
{
return;
}
ASMNopPause();
}
}
}
/*
* ...
*/
cTriesLeft = 12;
while (cTriesLeft-- > 0)
{
unsigned i;
for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
{
#ifdef GIP_TSC_DELTA_METHOD_2
#endif
{
/*
* The master.
*/
("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
/* Disable interrupts only in the master for as short a period
as possible, thanks again to Windows. See @bugref{6710} comment #73. */
uFlags = ASMIntDisableFlags();
{ /* nothing */ }
#ifdef GIP_TSC_DELTA_METHOD_1
do
{
#elif defined(GIP_TSC_DELTA_METHOD_2)
supdrvTscDeltaMethod2CollectData(pArgs->pMasterData, &pArgs->pWorkerData->iCurSeqNo, pArgs->fLagMaster);
#else
# error "tsc delta method not selected"
#endif
/* Sync up with worker. */
{ /* nothing */ }
/* Process the data. */
{
#ifdef GIP_TSC_DELTA_METHOD_1
{
}
#elif defined(GIP_TSC_DELTA_METHOD_2)
if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
#else
# error "tsc delta method not selected"
#endif
}
/* Reset our TSC sample and tell the worker to move on. */
}
else
{
/*
* The worker.
*/
{ /* nothing */ }
#ifdef GIP_TSC_DELTA_METHOD_1
/*
* Keep reading the TSC until we notice that the master has read his. Reading
* the TSC -after- the master has updated the memory is way too late. We thus
* compensate by trying to measure how long it took for the worker to notice
* the memory flushed from the master.
*/
do
{
uTscWorker = ASMReadTSC();
{
/* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
{
if (uCmpReadTime < uMinCmpReadTime)
}
else
}
else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
{
if (uCmpReadTime < uMinCmpReadTime)
}
#elif defined(GIP_TSC_DELTA_METHOD_2)
supdrvTscDeltaMethod2CollectData(pArgs->pWorkerData, &pArgs->pMasterData->iCurSeqNo, pArgs->fLagWorker);
#else
# error "tsc delta method not selected"
#endif
/* Tell master we're done collecting our data. */
/* Wait for the master to process the data. */
ASMNopPause();
}
}
/*
* We must reset the worker TSC sample value in case it gets picked as a
* GIP master later on (it's trashed above, naturally).
*/
/*
* Success? If so, stop trying.
*/
{
{
}
else
{
}
break;
}
}
}
/**
* Clears TSC delta related variables.
*
* Clears all TSC samples as well as the delta synchronization variable on the
* all the per-CPU structs. Optionally also clears the per-cpu deltas too.
*
* @param pDevExt Pointer to the device instance data.
* @param fClearDeltas Whether the deltas are also to be cleared.
*/
{
unsigned iCpu;
{
if (fClearDeltas)
}
}
/**
* Measures the TSC delta between the master GIP CPU and one specified worker
* CPU.
*
* @returns VBox status code.
* @param pDevExt Pointer to the device instance data.
* @param idxWorker The index of the worker CPU from the GIP's array of
* CPUs.
*
* @remarks This must be called with preemption enabled!
*/
{
int rc;
/* Validate input a bit. */
/*
* Don't attempt measuring the delta for the GIP master.
*/
{
return VINF_SUCCESS;
}
/*
* If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
* try pick a different master. (This fudge only works with multi core systems.)
* ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
*/
&& ASMHasCpuId()
&& ASMIsValidStdRange(ASMCpuId_EAX(0))
{
uint32_t i;
if ( i != iGipCpuMaster
&& i != idxWorker
{
iGipCpuMaster = i;
break;
}
}
/*
* Set the master TSC as the initiator. This serializes delta measurments.
*/
{
/*
* Sleep here rather than spin as there is a parallel measurement
* being executed and that can take a good while to be done.
*/
RTThreadSleep(1);
}
{
/*
* Initialize data package for the RTMpOnAll callback.
*/
#ifdef GIP_TSC_DELTA_METHOD_1
rc = VINF_SUCCESS;
#elif defined(GIP_TSC_DELTA_METHOD_2)
#else
# error "huh?"
#endif
if (RT_SUCCESS(rc))
{
/*
* Fire TSC-read workers on all CPUs but only synchronize between master
* and one worker to ease memory contention.
*/
if (RT_SUCCESS(rc))
{
{
/*
* Work the TSC delta applicability rating. It starts
* optimistic in supdrvGipInit, we downgrade it here.
*/
else
{
}
}
else
}
}
#ifdef GIP_TSC_DELTA_METHOD_2
#endif
}
else
return rc;
}
/**
* Performs the initial measurements of the TSC deltas between CPUs.
*
* This is called by supdrvGipCreate or triggered by it if threaded.
*
* @returns VBox status code.
* @param pDevExt Pointer to the device instance data.
*
* @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
* idCpu, GIP's online CPU set which are populated in
* supdrvGipInitOnCpu().
*/
{
unsigned iCpu;
unsigned iOddEven;
int rc = VINF_SUCCESS;
/*
* Pick the first CPU online as the master TSC and make it the new GIP master based
* on the APIC ID.
*
* Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
*/
{
if (idxCpu != UINT16_MAX)
{
{
break;
}
}
}
/*
* If there is only a single CPU online we have nothing to do.
*/
{
return VINF_SUCCESS;
}
/*
* Loop thru the GIP CPU array and get deltas for each CPU (except the
* master). We do the CPUs with the even numbered APIC IDs first so that
* we've got alternative master CPUs to pick from on hyper-threaded systems.
*/
{
{
{
if (RT_FAILURE(rc))
{
SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
break;
}
{
SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
rc = VERR_TRY_AGAIN;
break;
}
}
}
}
return rc;
}
#ifdef SUPDRV_USE_TSC_DELTA_THREAD
/**
* Switches the TSC-delta measurement thread into the butchered state.
*
* @returns VBox status code.
* @param pDevExt Pointer to the device instance data.
* @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
* @param pszFailed An error message to log.
* @param rcFailed The error code to exit the thread with.
*/
static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
{
if (!fSpinlockHeld)
return rcFailed;
}
/**
* The TSC-delta measurement thread.
*
* @returns VBox status code.
* @param hThread The thread handle.
* @param pvUser Opaque pointer to the device instance data.
*/
{
bool fInitialMeasurement = true;
int rc = VERR_INTERNAL_ERROR_2;
for (;;)
{
/*
* Switch on the current state.
*/
switch (enmState)
{
{
if (RT_FAILURE(rc))
/* fall thru */
}
{
/* Simple adaptive timeout. */
if (cConsecutiveTimeouts++ == 10)
{
cConsecutiveTimeouts = 0;
}
if ( RT_FAILURE(rc)
&& rc != VERR_TIMEOUT)
break;
}
{
rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
if (RT_FAILURE(rc))
RTThreadSleep(10);
/* fall thru */
}
{
cConsecutiveTimeouts = 0;
if (fInitialMeasurement)
{
int cTries = 8;
int cMsWaitPerTry = 10;
fInitialMeasurement = false;
do
{
if ( RT_SUCCESS(rc)
|| ( RT_FAILURE(rc)
&& rc != VERR_TRY_AGAIN
&& rc != VERR_CPU_OFFLINE))
{
break;
}
} while (cTries-- > 0);
}
else
{
unsigned iCpu;
/* Measure TSC-deltas only for the CPUs that are in the set. */
rc = VINF_SUCCESS;
{
{
}
}
}
break;
}
return VINF_SUCCESS;
default:
return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
}
}
return rc;
}
/**
* Waits for the TSC-delta measurement thread to respond to a state change.
*
* @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
* other error code on internal error.
*
* @param pThis Pointer to the grant service instance data.
* @param enmCurState The current state.
* @param enmNewState The new state we're waiting for it to enter.
*/
{
/*
* Wait a short while for the expected state transition.
*/
int rc;
{
rc = VINF_SUCCESS;
}
{
/*
* Wait longer if the state has not yet transitioned to the one we want.
*/
if ( RT_SUCCESS(rc)
|| rc == VERR_TIMEOUT)
{
/*
* Check the state whether we've succeeded.
*/
if (enmState == enmNewState)
rc = VINF_SUCCESS;
else if (enmState == enmCurState)
{
rc = VERR_TIMEOUT;
OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
enmNewState));
}
else
{
OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
enmState, enmNewState));
}
}
else
}
else
{
OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
}
return rc;
}
/**
* Waits for TSC-delta measurements to be completed for all online CPUs.
*
* @returns VBox status code.
* @param pDevExt Pointer to the device instance data.
*/
{
int cTriesLeft = 5;
int cMsTotalWait;
int cMsWaited = 0;
int cMsWaitGranularity = 1;
return VERR_THREAD_NOT_WAITABLE;
while (cTriesLeft-- > 0)
{
return VINF_SUCCESS;
if (cMsWaited >= cMsTotalWait)
break;
}
return VERR_TIMEOUT;
}
/**
* Terminates the actual thread running supdrvTscDeltaThread().
*
* This is an internal worker function for supdrvTscDeltaThreadInit() and
* supdrvTscDeltaTerm().
*
* @param pDevExt Pointer to the device instance data.
*/
{
int rc;
if (RT_FAILURE(rc))
{
/* Signal a few more times before giving up. */
int cTriesLeft = 5;
while (--cTriesLeft > 0)
{
if (rc != VERR_TIMEOUT)
break;
}
}
}
/**
* Initializes and spawns the TSC-delta measurement thread.
*
* A thread is required for servicing re-measurement requests from events like
* under all contexts on all OSs.
*
* @returns VBox status code.
* @param pDevExt Pointer to the device instance data.
*
* @remarks Must only be called -after- initializing GIP and setting up MP
* notifications!
*/
{
int rc;
rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
if (RT_SUCCESS(rc))
{
return rc;
}
}
else
}
else
}
else
return rc;
}
/**
* Terminates the TSC-delta measurement thread and cleanup.
*
* @param pDevExt Pointer to the device instance data.
*/
{
{
}
{
}
{
}
}
#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
/**
* Service a TSC-delta measurement request.
*
* @returns VBox status code.
* @param pDevExt Pointer to the device instance data.
* @param pSession The support driver session.
* @param pReq Pointer to the TSC-delta measurement request.
*/
int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
{
int rc;
/*
* Validate.
*/
return VERR_WRONG_ORDER;
if (idCpuWorker == NIL_RTCPUID)
return VERR_INVALID_CPU_ID;
/*
* The request is a noop if the TSC delta isn't being used.
*/
return VINF_SUCCESS;
{
{
return VINF_SUCCESS;
#ifdef SUPDRV_USE_TSC_DELTA_THREAD
{
/** @todo Async. doesn't implement options like retries, waiting. We'll need
* to pass those options to the thread somehow and implement it in the
{
}
return VINF_SUCCESS;
}
/*
* If a TSC-delta measurement request is already being serviced by the thread,
* wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
*/
while (cTries-- > 0)
{
{
if ( !cTries
|| !cMsWaitRetry)
if (cMsWaitRetry)
}
}
#endif
while (cTries-- > 0)
{
if (RT_SUCCESS(rc))
{
break;
}
if (cMsWaitRetry)
}
break;
}
}
return rc;
}
/**
* Reads TSC with delta applied.
*
* Will try to resolve delta value INT64_MAX before applying it. This is the
* main purpose of this function, to handle the case where the delta needs to be
* determined.
*
* @returns VBox status code.
* @param pDevExt Pointer to the device instance data.
* @param pSession The support driver session.
* @param pReq Pointer to the TSC-read request.
*/
{
int rc;
/*
* Validate. We require the client to have mapped GIP (no asserting on
* ring-3 preconditions).
*/
return VERR_WRONG_ORDER;
/*
* We're usually here because we need to apply delta, but we shouldn't be
* upset if the GIP is some different mode.
*/
{
for (;;)
{
/*
* Start by gathering the data, using CLI for disabling preemption
* while we do that.
*/
int iGipCpu;
{
/*
* If we're lucky we've got a delta, but no predicitions here
* as this I/O control is normally only used when the TSC delta
* is set to INT64_MAX.
*/
{
rc = VINF_SUCCESS;
break;
}
/* Give up after a few times. */
if (cTries >= 4)
{
break;
}
/* Need to measure the delta an try again. */
}
else
{
/* This really shouldn't happen. */
break;
}
}
}
else
{
/*
* No delta to apply. Easy. Deal with preemption the lazy way.
*/
int iGipCpu;
else
rc = VINF_SUCCESS;
}
return rc;
}