tstRTInlineAsm.cpp revision cae5c7830ccf4437c1560f76493a54a317137270
/* $Id$ */
/** @file
* IPRT Testcase - inline assembly.
*/
/*
* Copyright (C) 2006-2013 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#include <iprt/asm-math.h>
/* See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44018. Only gcc version 4.4
* is affected. No harm for the VBox code: If the cpuid code compiles, it works
* fine. */
# define GCC44_32BIT_PIC
# endif
#endif
# include <iprt/asm-amd64-x86.h>
#else
#endif
/*******************************************************************************
* Defined Constants And Macros *
*******************************************************************************/
do \
{ \
{ \
RTTestFailed(g_hTest, "%s, %d: " #val ": expected " fmt " got " fmt "\n", __FUNCTION__, __LINE__, (expect), (val)); \
} \
} while (0)
do \
{ \
{ \
RTTestFailed(g_hTest, "%s, %d: " #op ": expected " fmt " got " fmt "\n", __FUNCTION__, __LINE__, (type)(expect), val); \
} \
} while (0)
/**
* Calls a worker function with different worker variable storage types.
*/
do \
{ \
RTTestISub(#name); \
\
\
} while (0)
/*******************************************************************************
* Global Variables *
*******************************************************************************/
/** The test instance. */
const char *getCacheAss(unsigned u)
{
if (u == 0)
return "res0 ";
if (u == 1)
return "direct";
if (u >= 256)
return "???";
char *pszRet;
return pszRet;
}
const char *getL2CacheAss(unsigned u)
{
switch (u)
{
case 0: return "off ";
case 1: return "direct";
case 2: return "2 way ";
case 3: return "res3 ";
case 4: return "4 way ";
case 5: return "res5 ";
case 6: return "8 way ";
case 7: return "res7 ";
case 8: return "16 way";
case 9: return "res9 ";
case 10: return "res10 ";
case 11: return "res11 ";
case 12: return "res12 ";
case 13: return "res13 ";
case 14: return "res14 ";
case 15: return "fully ";
default:
return "????";
}
}
/**
* Test and dump all possible info from the CPUID instruction.
*
* @remark Bits shared with the libc cpuid.c program. This all written by me, so no worries.
* @todo transform the dumping into a generic runtime function. We'll need it for logging!
*/
void tstASMCpuId(void)
{
RTTestISub("ASMCpuId");
unsigned iBit;
struct
{
} s;
if (!ASMHasCpuId())
{
return;
}
/*
* Try the 0 function and use that for checking the ASMCpuId_* variants.
*/
u32 = ASMCpuId_EAX(0);
u32 = ASMCpuId_EBX(0);
u32 = ASMCpuId_ECX(0);
u32 = ASMCpuId_EDX(0);
/*
* Done testing, dump the information.
*/
/* raw dump */
"\n"
" RAW Standard CPUIDs\n"
"Function eax ebx ecx edx\n");
{
/* Leaf 04 and leaf 0d output depend on the initial value of ECX
* The same seems to apply to invalid standard functions */
if (iStd > cFunctions)
continue;
{
if (iStd == 1)
}
if (iStd == 0x04)
{
RTTestIPrintf(RTTESTLVL_ALWAYS, " [%02x] %08x %08x %08x %08x\n", uECX, s.uEAX, s.uEBX, s.uECX, s.uEDX);
}
else if (iStd == 0x0b)
{
RTTestIPrintf(RTTESTLVL_ALWAYS, " [%02x] %08x %08x %08x %08x\n", uECX, s.uEAX, s.uEBX, s.uECX, s.uEDX);
}
else if (iStd == 0x0d)
{
RTTestIPrintf(RTTESTLVL_ALWAYS, " [%02x] %08x %08x %08x %08x\n", uECX, s.uEAX, s.uEBX, s.uECX, s.uEDX);
}
}
/*
* Understandable output
*/
"Name: %.04s%.04s%.04s\n"
"Support: 0-%u\n",
/*
* Get Features.
*/
if (cFunctions >= 1)
{
"Family: %#x \tExtended: %#x \tEffective: %#x\n"
"Model: %#x \tExtended: %#x \tEffective: %#x\n"
"Stepping: %d\n"
"Type: %d (%s)\n"
"APIC ID: %#04x\n"
"Logical CPUs: %d\n"
"CLFLUSH Size: %d\n"
"Brand ID: %#04x\n",
(s.uEBX >> 0) & 0xff);
/** @todo check intel docs. */
}
/*
* Extended.
* Implemented after AMD specs.
*/
/** @todo check out the intel specs. */
{
RTTestIPrintf(RTTESTLVL_ALWAYS, "No extended CPUID info? Check the manual on how to detect this...\n");
return;
}
/* raw dump */
"\n"
" RAW Extended CPUIDs\n"
"Function eax ebx ecx edx\n");
{
if (iExt > cExtFunctions)
continue; /* Invalid extended functions seems change the value if ECX changes */
}
/*
* Understandable output
*/
"Ext Name: %.4s%.4s%.4s\n"
"Ext Supports: 0x80000000-%#010x\n",
if (cExtFunctions >= 0x80000001)
{
"Family: %#x \tExtended: %#x \tEffective: %#x\n"
"Model: %#x \tExtended: %#x \tEffective: %#x\n"
"Stepping: %d\n"
"Brand ID: %#05x\n",
s.uEBX & 0xfff);
}
if (cExtFunctions >= 0x80000002)
if (cExtFunctions >= 0x80000003)
if (cExtFunctions >= 0x80000004)
if (cExtFunctions >= 0x80000002)
if (cExtFunctions >= 0x80000005)
{
"TLB 2/4M Data: %s %3d entries\n",
"TLB 4K Data: %s %3d entries\n",
"L1 Instr Cache Line Size: %d bytes\n"
"L1 Instr Cache Lines Per Tag: %d\n"
"L1 Instr Cache Associativity: %s\n"
"L1 Instr Cache Size: %d KB\n",
(s.uEDX >> 0) & 0xff,
"L1 Data Cache Line Size: %d bytes\n"
"L1 Data Cache Lines Per Tag: %d\n"
"L1 Data Cache Associativity: %s\n"
"L1 Data Cache Size: %d KB\n",
(s.uECX >> 0) & 0xff,
}
if (cExtFunctions >= 0x80000006)
{
"L2 TLB 2/4M Data: %s %4d entries\n",
"L2 TLB 4K Data: %s %4d entries\n",
"L2 Cache Line Size: %d bytes\n"
"L2 Cache Lines Per Tag: %d\n"
"L2 Cache Associativity: %s\n"
"L2 Cache Size: %d KB\n",
(s.uEDX >> 0) & 0xff,
}
if (cExtFunctions >= 0x80000007)
{
}
if (cExtFunctions >= 0x80000008)
{
"Physical Address Width: %d bits\n"
"Virtual Address Width: %d bits\n"
"Guest Physical Address Width: %d bits\n",
(s.uEAX >> 0) & 0xff,
"Physical Core Count: %d\n",
}
if (cExtFunctions >= 0x8000000a)
{
"SVM Revision: %d (%#x)\n"
"Number of Address Space IDs: %d (%#x)\n",
}
}
#endif /* AMD64 || X86 */
{
*pu8 = 0;
}
static void tstASMAtomicXchgU8(void)
{
}
{
*pu16 = 0;
}
static void tstASMAtomicXchgU16(void)
{
}
{
*pu32 = 0;
}
static void tstASMAtomicXchgU32(void)
{
}
{
*pu64 = 0;
}
static void tstASMAtomicXchgU64(void)
{
}
{
}
static void tstASMAtomicXchgPtr(void)
{
DO_SIMPLE_TEST(ASMAtomicXchgPtr, void *);
}
{
*pu8 = 0xff;
}
static void tstASMAtomicCmpXchgU8(void)
{
}
{
}
static void tstASMAtomicCmpXchgU32(void)
{
}
{
CHECKOP(ASMAtomicCmpXchgU64(pu64, UINT64_C(0x80040008008efd), UINT64_C(0xffffffff)), false, "%d", bool);
CHECKOP(ASMAtomicCmpXchgU64(pu64, UINT64_C(0x80040008008efd), UINT64_C(0xffffffff00000000)), false, "%d", bool);
}
static void tstASMAtomicCmpXchgU64(void)
{
}
{
CHECKOP(ASMAtomicCmpXchgExU32(pu32, UINT32_C(0x8008efd), UINT32_C(0xffffffff), &u32Old), false, "%d", bool);
}
static void tstASMAtomicCmpXchgExU32(void)
{
}
{
CHECKOP(ASMAtomicCmpXchgExU64(pu64, UINT64_C(0x80040008008efd), 0xffffffff, &u64Old), false, "%d", bool);
CHECKOP(ASMAtomicCmpXchgExU64(pu64, UINT64_C(0x80040008008efd), UINT64_C(0xffffffff00000000), &u64Old), false, "%d", bool);
}
static void tstASMAtomicCmpXchgExU64(void)
{
}
{
*pu64 = 0;
}
static void tstASMAtomicReadU64(void)
{
}
{
*pu64 = 0;
}
static void tstASMAtomicUoReadU64(void)
{
}
{
*pi32 = 10;
do { \
RTTestFailed(g_hTest, "%s, %d: FAILURE: %s -> %d expected %d\n", __FUNCTION__, __LINE__, #op, i32Rc, rc); \
RTTestFailed(g_hTest, "%s, %d: FAILURE: %s => *pi32=%d expected %d\n", __FUNCTION__, __LINE__, #op, *pi32, val); \
} while (0)
}
static void tstASMAtomicAddS32(void)
{
}
{
*pi64 = 10;
do { \
RTTestFailed(g_hTest, "%s, %d: FAILURE: %s -> %llx expected %llx\n", __FUNCTION__, __LINE__, #op, i64Rc, (int64_t)rc); \
RTTestFailed(g_hTest, "%s, %d: FAILURE: %s => *pi64=%llx expected %llx\n", __FUNCTION__, __LINE__, #op, *pi64, (int64_t)(val)); \
} while (0)
}
static void tstASMAtomicAddS64(void)
{
}
{
*pi32 = 10;
do { \
RTTestFailed(g_hTest, "%s, %d: FAILURE: %s -> %d expected %d\n", __FUNCTION__, __LINE__, #op, i32Rc, rc); \
RTTestFailed(g_hTest, "%s, %d: FAILURE: %s => *pi32=%d expected %d\n", __FUNCTION__, __LINE__, #op, *pi32, rc); \
} while (0)
}
static void tstASMAtomicDecIncS32(void)
{
}
{
*pi64 = 10;
do { \
RTTestFailed(g_hTest, "%s, %d: FAILURE: %s -> %lld expected %lld\n", __FUNCTION__, __LINE__, #op, i64Rc, rc); \
RTTestFailed(g_hTest, "%s, %d: FAILURE: %s => *pi64=%lld expected %lld\n", __FUNCTION__, __LINE__, #op, *pi64, rc); \
} while (0)
}
static void tstASMAtomicDecIncS64(void)
{
}
{
}
static void tstASMAtomicAndOrU32(void)
{
}
{
// Same as above, but now 64-bit wide.
}
static void tstASMAtomicAndOrU64(void)
{
}
typedef struct
{
} TSTPAGE;
{
for (unsigned j = 0; j < 16; j++)
{
}
}
static void tstASMMemZeroPage(void)
{
}
{
{
}
}
void tstASMMemZero32(void)
{
struct
{
{
}
}
void tstASMMemFill32(void)
{
struct
{
} Buf1;
struct
{
} Buf2;
struct
{
} Buf3;
}
void tstASMMath(void)
{
#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
u64 = ASMMultU64ByU32DivByU32(UINT64_C(0x0000000000000001), UINT32_C(0x00000001), UINT32_C(0x00000001));
u64 = ASMMultU64ByU32DivByU32(UINT64_C(0x0000000100000000), UINT32_C(0x80000000), UINT32_C(0x00000002));
u64 = ASMMultU64ByU32DivByU32(UINT64_C(0xfedcba9876543210), UINT32_C(0xffffffff), UINT32_C(0xffffffff));
u64 = ASMMultU64ByU32DivByU32(UINT64_C(0xffffffffffffffff), UINT32_C(0xffffffff), UINT32_C(0xffffffff));
u64 = ASMMultU64ByU32DivByU32(UINT64_C(0xffffffffffffffff), UINT32_C(0xfffffff0), UINT32_C(0xffffffff));
u64 = ASMMultU64ByU32DivByU32(UINT64_C(0x3415934810359583), UINT32_C(0x58734981), UINT32_C(0xf8694045));
u64 = ASMMultU64ByU32DivByU32(UINT64_C(0x3415934810359583), UINT32_C(0xf8694045), UINT32_C(0x58734981));
# if 0 /* bird: question is whether this should trap or not:
*
* frank: Of course it must trap:
*
* 0xfffffff8 * 0x77d7daf8 = 0x77d7daf441412840
*
* During the following division, the quotient must fit into a 32-bit register.
* Therefore the smallest valid divisor is
*
* (0x77d7daf441412840 >> 32) + 1 = 0x77d7daf5
*
* which is definitely greater than 0x3b9aca00.
*
* bird: No, the C version does *not* crash. So, the question is whether there's any
* code depending on it not crashing.
*
* Of course the assembly versions of the code crash right now for the reasons you've
* given, but the 32-bit MSC version does not crash.
*
* frank: The C version does not crash but delivers incorrect results for this case.
* The reason is
*
* u.s.Hi = (unsigned long)(u64Hi / u32C);
*
* Here the division is actually 64-bit by 64-bit but the 64-bit result is truncated
* to 32 bit. If using this (optimized and fast) function we should just be sure that
* the operands are in a valid range.
*/
u64 = ASMMultU64ByU32DivByU32(UINT64_C(0xfffffff8c65d6731), UINT32_C(0x77d7daf8), UINT32_C(0x3b9aca00));
# endif
#endif /* AMD64 || X86 */
}
void tstASMByteSwap(void)
{
u64In = 0;
u32In = 0;
u16In = 0;
}
void tstASMBench(void)
{
/*
* Make this static. We don't want to have this located on the stack.
*/
register unsigned i;
register uint64_t u64Elapsed;
do { \
RTThreadYield(); \
u64Elapsed = ASMReadTSC(); \
for (i = cRounds; i > 0; i--) \
op; \
} while (0)
#else
do { \
RTThreadYield(); \
u64Elapsed = RTTimeNanoTS(); \
for (i = cRounds; i > 0; i--) \
op; \
} while (0)
#endif
/* The Darwin gcc does not like this ... */
#if !defined(RT_OS_DARWIN) && !defined(GCC44_32BIT_PIC) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
#endif
}
{
if (rc)
return rc;
/*
* Execute the tests.
*/
tstASMCpuId();
#endif
#if 1
tstASMMath();
tstASMBench();
#endif
/*
* Show the result.
*/
return RTTestSummaryAndDestroy(g_hTest);
}