ASMMultU64ByU32DivByU32.asm revision 6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; $Id$
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;; @file
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; innotek Portable Runtime - Assembly Functions, ASMMultU64ByU32DivByU32.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; Copyright (C) 2006-2007 InnoTek Systemberatung GmbH
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; This file is part of VirtualBox Open Source Edition (OSE), as
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; available from http://www.virtualbox.org. This file is free software;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; you can redistribute it and/or modify it under the terms of the GNU
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; General Public License as published by the Free Software Foundation,
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; distribution. VirtualBox OSE is distributed in the hope that it will
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; be useful, but WITHOUT ANY WARRANTY of any kind.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%include "iprt/asmdefs.mac"
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; using a 96 bit intermediate result.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; @returns (u64A * u32B) / u32C.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; @param u64A/rcx/rdi The 64-bit value.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; @param u32B/edx/esi The 32-bit value to multiple by A.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; @param u32C/r8d/edx The 32-bit value to divide A*B by.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; @cproto DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsyncBEGINPROC_EXPORTED ASMMultU64ByU32DivByU32
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%ifdef RT_ARCH_AMD64
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync %ifdef ASM_CALL64_MSC
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov rax, rcx ; rax = u64A
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov r9d, edx ; should check the specs wrt to the high bits one day...
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov r8d, r8d ; be paranoid for the time being.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync %else
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov rax, rdi ; rax = u64A
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov r9d, esi ; r9d = u32B
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov r8d, edx ; r8d = u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync %endif
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mul r9
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync div r8
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%else ; X86
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; This implementation is convered from the GCC inline
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; version of the code. Nothing additional has been done
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; performance wise.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync push esi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync push edi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%define u64A_Lo [esp + 04h + 08h]
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%define u64A_Hi [esp + 08h + 08h]
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%define u32B [esp + 0ch + 08h]
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%define u32C [esp + 10h + 08h]
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; Load parameters into registers.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov eax, u64A_Lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov esi, u64A_Hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov ecx, u32B
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov edi, u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; The body, just like the in
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mul ecx ; eax = u64Lo.lo = (u64A.lo * u32B).lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; edx = u64Lo.hi = (u64A.lo * u32B).hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync xchg eax, esi ; esi = u64Lo.lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; eax = u64A.hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync xchg edx, edi ; edi = u64Low.hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; edx = u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync xchg edx, ecx ; ecx = u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; edx = u32B
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mul edx ; eax = u64Hi.lo = (u64A.hi * u32B).lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; edx = u64Hi.hi = (u64A.hi * u32B).hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync add eax, edi ; u64Hi.lo += u64Lo.hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync adc edx, 0 ; u64Hi.hi += carry
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync div ecx ; eax = u64Hi / u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; edx = u64Hi % u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov edi, eax ; edi = u64Result.hi = u64Hi / u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov eax, esi ; eax = u64Lo.lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync div ecx ; u64Result.lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov edx, edi ; u64Result.hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; epilogue
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync pop edi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync pop esi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%endif
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ret
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsyncENDPROC ASMMultU64ByU32DivByU32
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync