/** @file
* IPRT - Assembly Routines for Optimizing some Integers Math Operations.
*/
/*
* Copyright (C) 2006-2010 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
#ifndef ___iprt_asm_math_h
#define ___iprt_asm_math_h
#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
# include <intrin.h>
/* Emit the intrinsics at all optimization levels. */
# ifdef RT_ARCH_AMD64
# endif
#endif
/** @defgroup grp_rt_asm_math Interger Math Optimizations
* @ingroup grp_rt_asm
* @{ */
/**
* Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
*
* @returns u32F1 * u32F2.
*/
#else
{
# ifdef RT_ARCH_X86
: "=A" (u64)
# else
{
}
# endif
return u64;
# else /* generic: */
# endif
}
#endif
/**
* Multiplies two signed 32-bit values returning a signed 64-bit result.
*
* @returns u32F1 * u32F2.
*/
#else
{
# ifdef RT_ARCH_X86
: "=A" (i64)
# else
{
}
# endif
return i64;
# else /* generic: */
# endif
}
#endif
#if ARCH_BITS == 64
{
*pu64ProdHi = u64High;
return u64Low;
# else
# error "hmm"
# endif
# else /* generic: */
/*
* F1 * F2 = Prod
* -- --
* ab * cd = b*d + a*d*10 + b*c*10 + a*c*100
*
* Where a, b, c and d are 'digits', and 10 is max digit + 1.
*
* Our digits are 32-bit wide, so instead of 10 we multiply by 4G.
*/
# endif
}
#endif
/**
* Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
*
* @returns u64 / u32.
*/
#if RT_INLINE_ASM_EXTERNAL && defined(RT_ARCH_X86)
#else
{
# ifdef RT_ARCH_X86
# else
{
}
# endif
return u32;
# else /* generic: */
# endif
}
#endif
/**
* Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
*
* @returns u64 / u32.
*/
#if RT_INLINE_ASM_EXTERNAL && defined(RT_ARCH_X86)
#else
{
# ifdef RT_ARCH_X86
# else
{
}
# endif
return i32;
# else /* generic: */
# endif
}
#endif
/**
* Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
* returning the rest.
*
* @returns u64 % u32.
*
* @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
*/
#if RT_INLINE_ASM_EXTERNAL && defined(RT_ARCH_X86)
#else
{
# ifdef RT_ARCH_X86
# else
{
}
# endif
return u32;
# else /* generic: */
# endif
}
#endif
/**
* Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
* returning the rest.
*
* @returns u64 % u32.
*
* @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
*/
#if RT_INLINE_ASM_EXTERNAL && defined(RT_ARCH_X86)
#else
{
# ifdef RT_ARCH_X86
# else
{
}
# endif
return i32;
# else /* generic: */
# endif
}
#endif
/**
* Multiple a 32-bit by a 32-bit integer and divide the result by a 32-bit integer
* using a 64 bit intermediate result.
*
* @returns (u32A * u32B) / u32C.
* @param u32A The 32-bit value (A).
* @param u32B The 32-bit value to multiple by A.
* @param u32C The 32-bit value to divide A*B by.
*
* @remarks Architecture specific.
* @remarks Make sure the result won't ever exceed 32-bit, because hardware
* exception may be raised if it does.
* @remarks On x86 this may be used to avoid dragging in 64-bit builtin
* arithmetics functions.
*/
#else
{
"divl %3\n\t"
: "=&a" (u32Result),
"=&d" (u32Spill)
: "r" (u32B),
"r" (u32C),
"0" (u32A));
return u32Result;
# else
# endif
}
#endif
/**
* Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
* using a 96 bit intermediate result.
*
* @returns (u64A * u32B) / u32C.
* @param u64A The 64-bit value.
* @param u32B The 32-bit value to multiple by A.
* @param u32C The 32-bit value to divide A*B by.
*
* @remarks Architecture specific.
* @remarks Make sure the result won't ever exceed 64-bit, because hardware
* exception may be raised if it does.
* @remarks On x86 this may be used to avoid dragging in 64-bit builtin
* arithmetics function.
*/
#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
#else
{
# ifdef RT_ARCH_AMD64
"divq %3\n\t"
: "=&a" (u64Result),
"=&d" (u64Spill)
"0" (u64A));
return u64Result;
# else
edx = u64Lo.hi = (u64A.lo * u32B).hi */
"xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
eax = u64A.hi */
"xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
edx = u32C */
"xchg %%edx,%%ecx \n\t" /* ecx = u32C
edx = u32B */
"mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
"addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
"adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
"divl %%ecx \n\t" /* eax = u64Hi / u32C
edx = u64Hi % u32C */
"movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
"movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
"divl %%ecx \n\t" /* u64Result.lo */
"movl %%edi,%%edx \n\t" /* u64Result.hi */
"c"(u32B),
"D"(u32C));
return u64Result;
# endif
# else
RTUINT64U u;
return u.u;
# endif
}
#endif
/** @} */
#endif