cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync/* $Id$ */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync/** @file
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * numa - NUMA / memory benchmark.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync/*
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * Copyright (C) 2011-2014 Oracle Corporation
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync *
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * This file is part of VirtualBox Open Source Edition (OSE), as
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * available from http://www.virtualbox.org. This file is free software;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * you can redistribute it and/or modify it under the terms of the GNU
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * General Public License (GPL) as published by the Free Software
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * Foundation, in version 2 as it comes in the "COPYING" file of the
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync *
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * The contents of this file may alternatively be used under the terms
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * of the Common Development and Distribution License Version 1.0
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * VirtualBox OSE distribution, in which case the provisions of the
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * CDDL are applicable instead of those of the GPL.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync *
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * You may elect to license modified versions of this file under the
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * terms and conditions of either the GPL or the CDDL or both.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync/*******************************************************************************
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync* Header Files *
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync*******************************************************************************/
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#include <iprt/test.h>
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#include <iprt/asm.h>
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync//#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync//# include <iprt/asm-amd64-x86.h>
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync//#endif
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#include <iprt/mem.h>
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#include <iprt/mp.h>
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#include <iprt/string.h>
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#include <iprt/thread.h>
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#include <iprt/time.h>
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync/*******************************************************************************
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync* Global Variables *
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync*******************************************************************************/
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync/** The number of threads to skip when testing. */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsyncstatic uint32_t g_cThreadsToSkip = 1;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync/**
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * Gets the next online CPU.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync *
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * @returns Next CPU index or RTCPUSET_MAX_CPUS.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * @param iCurCpu The current CPU (index).
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsyncstatic int getNextCpu(unsigned iCurCpu)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync{
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /* Skip to the next chip. */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync iCurCpu = (iCurCpu / g_cThreadsToSkip) * g_cThreadsToSkip;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync iCurCpu += g_cThreadsToSkip;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /* Skip offline cpus. */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync while ( iCurCpu < RTCPUSET_MAX_CPUS
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync && !RTMpIsCpuOnline(iCurCpu) )
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync iCurCpu++;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /* Make sure we're within bounds (in case of bad input). */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync if (iCurCpu > RTCPUSET_MAX_CPUS)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync iCurCpu = RTCPUSET_MAX_CPUS;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync return iCurCpu;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync}
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsyncstatic void doTest(RTTEST hTest)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync{
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync NOREF(hTest);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync uint32_t iAllocCpu = 0;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync while (iAllocCpu < RTCPUSET_MAX_CPUS)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync {
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync const uint32_t cbTestSet = _1M * 32;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync const uint32_t cIterations = 384;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /*
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * Change CPU and allocate a chunk of memory.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTESTI_CHECK_RC_OK_RETV(RTThreadSetAffinityToCpu(RTMpCpuIdFromSetIndex(iAllocCpu)));
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync void *pvTest = RTMemPageAlloc(cbTestSet); /* may be leaked, who cares */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTESTI_CHECK_RETV(pvTest != NULL);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync memset(pvTest, 0xef, cbTestSet);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /*
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * Do the tests.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync uint32_t iAccessCpu = 0;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync while (iAccessCpu < RTCPUSET_MAX_CPUS)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync {
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTESTI_CHECK_RC_OK_RETV(RTThreadSetAffinityToCpu(RTMpCpuIdFromSetIndex(iAccessCpu)));
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /*
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * The write test.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTimeNanoTS(); RTThreadYield();
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync uint64_t u64StartTS = RTTimeNanoTS();
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync for (uint32_t i = 0; i < cIterations; i++)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync {
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync ASMCompilerBarrier(); /* paranoia */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync memset(pvTest, i, cbTestSet);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync }
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync uint64_t const cNsElapsedWrite = RTTimeNanoTS() - u64StartTS;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync uint64_t cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync / ((long double)cNsElapsedWrite / RT_NS_1SEC_64) /* seconds */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync / _1M /* MB */ );
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-write", iAllocCpu, iAccessCpu);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /*
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * The read test.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync memset(pvTest, 0, cbTestSet);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTimeNanoTS(); RTThreadYield();
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync u64StartTS = RTTimeNanoTS();
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync for (uint32_t i = 0; i < cIterations; i++)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync {
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#if 1
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync size_t register u = 0;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync size_t volatile *puCur = (size_t volatile *)pvTest;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync size_t volatile *puEnd = puCur + cbTestSet / sizeof(size_t);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync while (puCur != puEnd)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync u += *puCur++;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#else
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync ASMCompilerBarrier(); /* paranoia */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync void *pvFound = memchr(pvTest, (i & 127) + 1, cbTestSet);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTESTI_CHECK(pvFound == NULL);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#endif
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync }
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync uint64_t const cNsElapsedRead = RTTimeNanoTS() - u64StartTS;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync / ((long double)cNsElapsedRead / RT_NS_1SEC_64) /* seconds */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync / _1M /* MB */ );
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-read", iAllocCpu, iAccessCpu);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /*
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * The read/write test.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTimeNanoTS(); RTThreadYield();
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync u64StartTS = RTTimeNanoTS();
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync for (uint32_t i = 0; i < cIterations; i++)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync {
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync ASMCompilerBarrier(); /* paranoia */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync memcpy(pvTest, (uint8_t *)pvTest + cbTestSet / 2, cbTestSet / 2);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync }
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync uint64_t const cNsElapsedRW = RTTimeNanoTS() - u64StartTS;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync / ((long double)cNsElapsedRW / RT_NS_1SEC_64) /* seconds */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync / _1M /* MB */ );
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-read-write", iAllocCpu, iAccessCpu);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /*
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * Total time.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTestIValueF(cNsElapsedRead + cNsElapsedWrite + cNsElapsedRW, RTTESTUNIT_NS,
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync "cpu%02u-mem%02u-time", iAllocCpu, iAccessCpu);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /* advance */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync iAccessCpu = getNextCpu(iAccessCpu);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync }
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /*
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync * Clean up and advance to the next CPU.
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTMemPageFree(pvTest, cbTestSet);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync iAllocCpu = getNextCpu(iAllocCpu);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync }
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync}
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsyncint main(int argc, char **argv)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync{
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTEST hTest;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTEXITCODE rcExit = RTTestInitAndCreate("numa-1", &hTest);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync if (rcExit != RTEXITCODE_SUCCESS)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync return rcExit;
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync RTTestBanner(hTest);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync /** @todo figure basic topology. */
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync#endif
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync if (argc == 2)
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync g_cThreadsToSkip = RTStrToUInt8(argv[1]);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync doTest(hTest);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync return RTTestSummaryAndDestroy(hTest);
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync}
cf22150eaeeb72431bf1cf65c309a431454fb22bvboxsync