6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; $Id$
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;; @file
7db630a55be9d955c8ac125da609b304cbcc6010vboxsync; IPRT - Assembly Functions, ASMMultU64ByU32DivByU32.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
c58f1213e628a545081c70e26c6b67a841cff880vboxsync; Copyright (C) 2006-2010 Oracle Corporation
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; This file is part of VirtualBox Open Source Edition (OSE), as
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; available from http://www.virtualbox.org. This file is free software;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; you can redistribute it and/or modify it under the terms of the GNU
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; General Public License (GPL) as published by the Free Software
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; Foundation, in version 2 as it comes in the "COPYING" file of the
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync;
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; The contents of this file may alternatively be used under the terms
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; of the Common Development and Distribution License Version 1.0
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; VirtualBox OSE distribution, in which case the provisions of the
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; CDDL are applicable instead of those of the GPL.
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync;
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; You may elect to license modified versions of this file under the
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync; terms and conditions of either the GPL or the CDDL or both.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%include "iprt/asmdefs.mac"
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; using a 96 bit intermediate result.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; @returns (u64A * u32B) / u32C.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; @param u64A/rcx/rdi The 64-bit value.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; @param u32B/edx/esi The 32-bit value to multiple by A.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; @param u32C/r8d/edx The 32-bit value to divide A*B by.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync; @cproto DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsyncBEGINPROC_EXPORTED ASMMultU64ByU32DivByU32
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%ifdef RT_ARCH_AMD64
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync %ifdef ASM_CALL64_MSC
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov rax, rcx ; rax = u64A
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov r9d, edx ; should check the specs wrt to the high bits one day...
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov r8d, r8d ; be paranoid for the time being.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync %else
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov rax, rdi ; rax = u64A
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov r9d, esi ; r9d = u32B
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov r8d, edx ; r8d = u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync %endif
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mul r9
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync div r8
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%else ; X86
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ;
ad27e1d5e48ca41245120c331cc88b50464813cevboxsync ; This implementation is converted from the GCC inline
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; version of the code. Nothing additional has been done
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; performance wise.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ;
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync push esi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync push edi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%define u64A_Lo [esp + 04h + 08h]
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%define u64A_Hi [esp + 08h + 08h]
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%define u32B [esp + 0ch + 08h]
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%define u32C [esp + 10h + 08h]
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; Load parameters into registers.
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov eax, u64A_Lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov esi, u64A_Hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov ecx, u32B
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov edi, u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; The body, just like the in
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mul ecx ; eax = u64Lo.lo = (u64A.lo * u32B).lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; edx = u64Lo.hi = (u64A.lo * u32B).hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync xchg eax, esi ; esi = u64Lo.lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; eax = u64A.hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync xchg edx, edi ; edi = u64Low.hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; edx = u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync xchg edx, ecx ; ecx = u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; edx = u32B
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mul edx ; eax = u64Hi.lo = (u64A.hi * u32B).lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; edx = u64Hi.hi = (u64A.hi * u32B).hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync add eax, edi ; u64Hi.lo += u64Lo.hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync adc edx, 0 ; u64Hi.hi += carry
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync div ecx ; eax = u64Hi / u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; edx = u64Hi % u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov edi, eax ; edi = u64Result.hi = u64Hi / u32C
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov eax, esi ; eax = u64Lo.lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync div ecx ; u64Result.lo
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync mov edx, edi ; u64Result.hi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ; epilogue
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync pop edi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync pop esi
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync%endif
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync ret
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsyncENDPROC ASMMultU64ByU32DivByU32
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync
6b68b1cfa19c6e90d8ea0ef58e6abda445d518f4vboxsync