; $Id$
;; @file
; IPRT - Big Integer Numbers, AMD64 and X86 Assembly Workers
;
;
; Copyright (C) 2006-2014 Oracle Corporation
;
; This file is part of VirtualBox Open Source Edition (OSE), as
; available from http://www.virtualbox.org. This file is free software;
; you can redistribute it and/or modify it under the terms of the GNU
; General Public License (GPL) as published by the Free Software
; Foundation, in version 2 as it comes in the "COPYING" file of the
; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
;
; The contents of this file may alternatively be used under the terms
; of the Common Development and Distribution License Version 1.0
; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
; VirtualBox OSE distribution, in which case the provisions of the
; CDDL are applicable instead of those of the GPL.
;
; You may elect to license modified versions of this file under the
; terms and conditions of either the GPL or the CDDL or both.
;
%define RT_ASM_WITH_SEH64
%include "iprt/asmdefs.mac"
%include "internal/bignum.mac"
BEGINCODE
;;
; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and
; stores the result in pauResult.
;
; All three numbers are zero padded such that a borrow can be carried one (or
; two for 64-bit) elements beyond the end of the largest number.
;
; @returns nothing.
; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx
; @param pauMinuend x86:[ebp + 12] gcc:rsi msc:rdx
; @param pauSubtrahend x86:[ebp + 16] gcc:rdx msc:r8
; @param cUsed x86:[ebp + 20] gcc:rcx msc:r9
;
BEGINPROC rtBigNumMagnitudeSubAssemblyWorker
push xBP
SEH64_PUSH_xBP
mov xBP, xSP
SEH64_SET_FRAME_xBP 0
SEH64_END_PROLOGUE
%ifdef RT_ARCH_AMD64
%ifdef ASM_CALL64_GCC
%define pauResult rdi
%define pauMinuend rsi
%define pauSubtrahend rdx
%define cUsed ecx
%else
%define pauResult rcx
%define pauMinuend rdx
%define pauSubtrahend r8
%define cUsed r9d
%endif
xor r11d, r11d ; index register.
%if RTBIGNUM_ELEMENT_SIZE == 4
add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2
shr cUsed, 1
%endif
cmp cUsed, 8 ; Skip the big loop if small number.
jb .small_job
mov r10d, cUsed
shr r10d, 3
clc
.big_loop:
mov rax, [pauMinuend + r11]
sbb rax, [pauSubtrahend + r11]
mov [pauResult + r11], rax
mov rax, [pauMinuend + r11 + 8]
sbb rax, [pauSubtrahend + r11 + 8]
mov [pauResult + r11 + 8], rax
mov rax, [pauMinuend + r11 + 16]
sbb rax, [pauSubtrahend + r11 + 16]
mov [pauResult + r11 + 16], rax
mov rax, [pauMinuend + r11 + 24]
sbb rax, [pauSubtrahend + r11 + 24]
mov [pauResult + r11 + 24], rax
mov rax, [pauMinuend + r11 + 32]
sbb rax, [pauSubtrahend + r11 + 32]
mov [pauResult + r11 + 32], rax
mov rax, [pauMinuend + r11 + 40]
sbb rax, [pauSubtrahend + r11 + 40]
mov [pauResult + r11 + 40], rax
mov rax, [pauMinuend + r11 + 48]
sbb rax, [pauSubtrahend + r11 + 48]
mov [pauResult + r11 + 48], rax
mov rax, [pauMinuend + r11 + 56]
sbb rax, [pauSubtrahend + r11 + 56]
mov [pauResult + r11 + 56], rax
lea r11, [r11 + 64]
dec r10d ; Does not change CF.
jnz .big_loop
lahf ; Save CF
and cUsed, 7 ; Up to seven odd rounds.
jz .done
sahf ; Restore CF.
jmp .small_loop ; Skip CF=1 (clc).
.small_job:
clc
.small_loop:
mov rax, [pauMinuend + r11]
sbb rax, [pauSubtrahend + r11]
mov [pauResult + r11], rax
lea r11, [r11 + 8]
dec cUsed ; does not change CF.
jnz .small_loop
%ifdef RT_STRICT
jnc .done
int3
%endif
.done:
%elifdef RT_ARCH_X86
push edi
push esi
push ebx
mov edi, [ebp + 08h] ; pauResult
%define pauResult edi
mov ecx, [ebp + 0ch] ; pauMinuend
%define pauMinuend ecx
mov edx, [ebp + 10h] ; pauSubtrahend
%define pauSubtrahend edx
mov esi, [ebp + 14h] ; cUsed
%define cUsed esi
xor ebx, ebx ; index register.
cmp cUsed, 8 ; Skip the big loop if small number.
jb .small_job
shr cUsed, 3
clc
.big_loop:
mov eax, [pauMinuend + ebx]
sbb eax, [pauSubtrahend + ebx]
mov [pauResult + ebx], eax
mov eax, [pauMinuend + ebx + 4]
sbb eax, [pauSubtrahend + ebx + 4]
mov [pauResult + ebx + 4], eax
mov eax, [pauMinuend + ebx + 8]
sbb eax, [pauSubtrahend + ebx + 8]
mov [pauResult + ebx + 8], eax
mov eax, [pauMinuend + ebx + 12]
sbb eax, [pauSubtrahend + ebx + 12]
mov [pauResult + ebx + 12], eax
mov eax, [pauMinuend + ebx + 16]
sbb eax, [pauSubtrahend + ebx + 16]
mov [pauResult + ebx + 16], eax
mov eax, [pauMinuend + ebx + 20]
sbb eax, [pauSubtrahend + ebx + 20]
mov [pauResult + ebx + 20], eax
mov eax, [pauMinuend + ebx + 24]
sbb eax, [pauSubtrahend + ebx + 24]
mov [pauResult + ebx + 24], eax
mov eax, [pauMinuend + ebx + 28]
sbb eax, [pauSubtrahend + ebx + 28]
mov [pauResult + ebx + 28], eax
lea ebx, [ebx + 32]
dec cUsed ; Does not change CF.
jnz .big_loop
lahf ; Save CF
mov cUsed, [ebp + 14h] ; Up to three final rounds.
and cUsed, 7
jz .done
sahf ; Restore CF.
jmp .small_loop ; Skip CF=1 (clc).
.small_job:
clc
.small_loop:
mov eax, [pauMinuend + ebx]
sbb eax, [pauSubtrahend + ebx]
mov [pauResult + ebx], eax
lea ebx, [ebx + 4]
dec cUsed ; Does not change CF
jnz .small_loop
%ifdef RT_STRICT
jnc .done
int3
%endif
.done:
pop ebx
pop esi
pop edi
%else
%error "Unsupported arch"
%endif
leave
ret
%undef pauResult
%undef pauMinuend
%undef pauSubtrahend
%undef cUsed
ENDPROC rtBigNumMagnitudeSubAssemblyWorker
;;
; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and
; stores the result in pauResult.
;
; All three numbers are zero padded such that a borrow can be carried one (or
; two for 64-bit) elements beyond the end of the largest number.
;
; @returns nothing.
; @param pauResultMinuend x86:[ebp + 8] gcc:rdi msc:rcx
; @param pauSubtrahend x86:[ebp + 12] gcc:rsi msc:rdx
; @param cUsed x86:[ebp + 16] gcc:rdx msc:r8
;
BEGINPROC rtBigNumMagnitudeSubThisAssemblyWorker
push xBP
SEH64_PUSH_xBP
mov xBP, xSP
SEH64_SET_FRAME_xBP 0
SEH64_END_PROLOGUE
%ifdef RT_ARCH_AMD64
%ifdef ASM_CALL64_GCC
%define pauResultMinuend rdi
%define pauSubtrahend rsi
%define cUsed edx
%else
%define pauResultMinuend rcx
%define pauSubtrahend rdx
%define cUsed r8d
%endif
xor r11d, r11d ; index register.
%if RTBIGNUM_ELEMENT_SIZE == 4
add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2
shr cUsed, 1
%endif
cmp cUsed, 8 ; Skip the big loop if small number.
jb .small_job
mov r10d, cUsed
shr r10d, 3
clc
.big_loop:
mov rax, [pauSubtrahend + r11]
sbb [pauResultMinuend + r11], rax
mov rax, [pauSubtrahend + r11 + 8]
sbb [pauResultMinuend + r11 + 8], rax
mov rax, [pauSubtrahend + r11 + 16]
sbb [pauResultMinuend + r11 + 16], rax
mov rax, [pauSubtrahend + r11 + 24]
sbb [pauResultMinuend + r11 + 24], rax
mov rax, [pauSubtrahend + r11 + 32]
sbb [pauResultMinuend + r11 + 32], rax
mov rax, [pauSubtrahend + r11 + 40]
sbb [pauResultMinuend + r11 + 40], rax
mov rax, [pauSubtrahend + r11 + 48]
sbb [pauResultMinuend + r11 + 48], rax
mov rax, [pauSubtrahend + r11 + 56]
sbb [pauResultMinuend + r11 + 56], rax
lea r11, [r11 + 64]
dec r10d ; Does not change CF.
jnz .big_loop
lahf ; Save CF
and cUsed, 7 ; Up to seven odd rounds.
jz .done
sahf ; Restore CF.
jmp .small_loop ; Skip CF=1 (clc).
.small_job:
clc
.small_loop:
mov rax, [pauSubtrahend + r11]
sbb [pauResultMinuend + r11], rax
lea r11, [r11 + 8]
dec cUsed ; does not change CF.
jnz .small_loop
%ifdef RT_STRICT
jnc .done
int3
%endif
.done:
%elifdef RT_ARCH_X86
push edi
push ebx
mov edi, [ebp + 08h] ; pauResultMinuend
%define pauResultMinuend edi
mov edx, [ebp + 0ch] ; pauSubtrahend
%define pauSubtrahend edx
mov ecx, [ebp + 10h] ; cUsed
%define cUsed ecx
xor ebx, ebx ; index register.
cmp cUsed, 8 ; Skip the big loop if small number.
jb .small_job
shr cUsed, 3
clc
.big_loop:
mov eax, [pauSubtrahend + ebx]
sbb [pauResultMinuend + ebx], eax
mov eax, [pauSubtrahend + ebx + 4]
sbb [pauResultMinuend + ebx + 4], eax
mov eax, [pauSubtrahend + ebx + 8]
sbb [pauResultMinuend + ebx + 8], eax
mov eax, [pauSubtrahend + ebx + 12]
sbb [pauResultMinuend + ebx + 12], eax
mov eax, [pauSubtrahend + ebx + 16]
sbb [pauResultMinuend + ebx + 16], eax
mov eax, [pauSubtrahend + ebx + 20]
sbb [pauResultMinuend + ebx + 20], eax
mov eax, [pauSubtrahend + ebx + 24]
sbb [pauResultMinuend + ebx + 24], eax
mov eax, [pauSubtrahend + ebx + 28]
sbb [pauResultMinuend + ebx + 28], eax
lea ebx, [ebx + 32]
dec cUsed ; Does not change CF.
jnz .big_loop
lahf ; Save CF
mov cUsed, [ebp + 10h] ; Up to seven odd rounds.
and cUsed, 7
jz .done
sahf ; Restore CF.
jmp .small_loop ; Skip CF=1 (clc).
.small_job:
clc
.small_loop:
mov eax, [pauSubtrahend + ebx]
sbb [pauResultMinuend + ebx], eax
lea ebx, [ebx + 4]
dec cUsed ; Does not change CF
jnz .small_loop
%ifdef RT_STRICT
jnc .done
int3
%endif
.done:
pop ebx
pop edi
%else
%error "Unsupported arch"
%endif
leave
ret
ENDPROC rtBigNumMagnitudeSubThisAssemblyWorker
;;
; Shifts an element array one bit to the left, returning the final carry value.
;
; On 64-bit hosts the array is always zero padded to a multiple of 8 bytes, so
; we can use 64-bit operand sizes even if the element type is 32-bit.
;
; @returns The final carry value.
; @param pauElements x86:[ebp + 8] gcc:rdi msc:rcx
; @param cUsed x86:[ebp + 12] gcc:rsi msc:rdx
; @param uCarry x86:[ebp + 16] gcc:rdx msc:r8
;
BEGINPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker
push xBP
SEH64_PUSH_xBP
mov xBP, xSP
SEH64_SET_FRAME_xBP 0
SEH64_END_PROLOGUE
%ifdef RT_ARCH_AMD64
%ifdef ASM_CALL64_GCC
%define pauElements rdi
%define cUsed esi
%define uCarry edx
%else
%define pauElements rcx
%define cUsed edx
%define uCarry r8d
%endif
%elifdef RT_ARCH_X86
%define pauElements ecx
mov pauElements, [ebp + 08h]
%define cUsed edx
mov cUsed, [ebp + 0ch]
%define uCarry eax
mov uCarry, [ebp + 10h]
%else
%error "Unsupported arch."
%endif
; Lots to do?
cmp cUsed, 8
jae .big_loop_init
; Check for empty array.
test cUsed, cUsed
jz .no_elements
jmp .small_loop_init
; Big loop - 8 unrolled loop iterations.
.big_loop_init:
%ifdef RT_ARCH_AMD64
mov r11d, cUsed
%endif
shr cUsed, 3
test uCarry, uCarry ; clear the carry flag
jz .big_loop
stc
.big_loop:
%if RTBIGNUM_ELEMENT_SIZE == 8
rcl qword [pauElements], 1
rcl qword [pauElements + 8], 1
rcl qword [pauElements + 16], 1
rcl qword [pauElements + 24], 1
rcl qword [pauElements + 32], 1
rcl qword [pauElements + 40], 1
rcl qword [pauElements + 48], 1
rcl qword [pauElements + 56], 1
lea pauElements, [pauElements + 64]
%else
rcl dword [pauElements], 1
rcl dword [pauElements + 4], 1
rcl dword [pauElements + 8], 1
rcl dword [pauElements + 12], 1
rcl dword [pauElements + 16], 1
rcl dword [pauElements + 20], 1
rcl dword [pauElements + 24], 1
rcl dword [pauElements + 28], 1
lea pauElements, [pauElements + 32]
%endif
dec cUsed
jnz .big_loop
; More to do?
lahf ; save carry flag (uCarry no longer used on x86).
%ifdef RT_ARCH_AMD64
mov cUsed, r11d
%else
mov cUsed, [ebp + 0ch]
%endif
and cUsed, 7
jz .restore_cf_and_return ; Jump if we're good and done.
sahf ; Restore CF.
jmp .small_loop ; Deal with the odd rounds.
.restore_cf_and_return:
sahf
jmp .carry_to_eax
; Small loop - One round at the time.
.small_loop_init:
test uCarry, uCarry ; clear the carry flag
jz .small_loop
stc
.small_loop:
%if RTBIGNUM_ELEMENT_SIZE == 8
rcl qword [pauElements], 1
lea pauElements, [pauElements + 8]
%else
rcl dword [pauElements], 1
lea pauElements, [pauElements + 4]
%endif
dec cUsed
jnz .small_loop
; Calculate return value.
.carry_to_eax:
mov eax, 0
jnc .return
inc eax
.return:
leave
ret
.no_elements:
mov eax, uCarry
jmp .return
ENDPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker
;;
; Performs a 128-bit by 64-bit division on 64-bit and
; a 64-bit by 32-bit divison on 32-bit.
;
; @returns nothing.
; @param puQuotient x86:[ebp + 8] gcc:rdi msc:rcx Double element.
; @param puRemainder x86:[ebp + 12] gcc:rsi msc:rdx Normal element.
; @param uDividendHi x86:[ebp + 16] gcc:rdx msc:r8
; @param uDividendLo x86:[ebp + 20] gcc:rcx msc:r9
; @param uDivisior x86:[ebp + 24] gcc:r8 msc:[rbp + 30h]
;
BEGINPROC rtBigNumElement2xDiv2xBy1x
push xBP
SEH64_PUSH_xBP
mov xBP, xSP
SEH64_SET_FRAME_xBP 0
SEH64_END_PROLOGUE
%ifdef RT_ARCH_AMD64
%if RTBIGNUM_ELEMENT_SIZE == 4
%error "sorry not implemented yet."
sorry not implemented yet.
%endif
%define uDividendHi rdx
%define uDividendLo rax
%ifdef ASM_CALL64_GCC
%define uDivisor r8
%define puQuotient rdi
%define puRemainder rsi
mov rax, rcx
%else
%define puQuotient rcx
%define puRemainder r11
%define uDivisor r10
mov r11, rdx
mov r10, [rbp + 30h]
mov rdx, r8
mov rax, r9
%endif
%elifdef RT_ARCH_X86
push edi
push ebx
%define uDividendHi edx
mov uDividendHi, [ebp + 10h]
%define uDividendLo eax
mov uDividendLo, [ebp + 14h]
%define uDivisor ecx
mov uDivisor, [ebp + 18h]
%define puQuotient edi
mov puQuotient, [ebp + 08h]
%define puRemainder ebx
mov puRemainder, [ebp + 0ch]
%else
%error "Unsupported arch."
%endif
%ifdef RT_STRICT
;
; The dividend shall not be zero.
;
test uDivisor, uDivisor
jnz .divisor_not_zero
int3
.divisor_not_zero:
%endif
;
; Avoid division overflow. This will calculate the high part of the quotient.
;
mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], 0
cmp uDividendHi, uDivisor
jb .do_divide
push xAX
mov xAX, xDX
xor edx, edx
div uDivisor
mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], xAX
pop xAX
;
; Perform the division and store the result.
;
.do_divide:
div uDivisor
mov RTBIGNUM_ELEMENT_PRE [puQuotient], xAX
mov RTBIGNUM_ELEMENT_PRE [puRemainder], xDX
%ifdef RT_ARCH_X86
pop ebx
pop edi
%endif
leave
ret
ENDPROC rtBigNumElement2xDiv2xBy1x
;;
; Performs the core of long multiplication.
;
; @returns nothing.
; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero.
; @param pauMultiplier x86:[ebp + 12] gcc:rsi msc:rdx
; @param cMultiplier x86:[ebp + 16] gcc:rdx msc:r8
; @param pauMultiplicand x86:[ebp + 20] gcc:rcx msc:r9
; @param cMultiplicand x86:[ebp + 24] gcc:r8 msc:[rbp + 30h]
;
BEGINPROC rtBigNumMagnitudeMultiplyAssemblyWorker
push xBP
SEH64_PUSH_xBP
mov xBP, xSP
SEH64_SET_FRAME_xBP 0
SEH64_END_PROLOGUE
%ifdef RT_ARCH_AMD64
%if RTBIGNUM_ELEMENT_SIZE == 4
%error "sorry not implemented yet."
sorry not implemented yet.
%endif
%ifdef ASM_CALL64_GCC
%define pauResult rdi
%define pauMultiplier rsi
%define cMultiplier r9
%define pauMultiplicand rcx
%define cMultiplicand r8
mov r9d, edx ; cMultiplier
mov r8d, r8d ; cMultiplicand - paranoia
%define uMultiplier r10
%define iMultiplicand r11
%else
%define pauResult rcx
%define pauMultiplier r11
%define cMultiplier r8
%define pauMultiplicand r9
%define cMultiplicand r10
mov pauMultiplier, rdx
mov r10d, dword [rbp + 30h] ; cMultiplicand
mov r8d, r8d ; cMultiplier - paranoia
%define uMultiplier r12
push r12
%define iMultiplicand r13
push r13
%endif
%elifdef RT_ARCH_X86
push edi
push esi
push ebx
sub esp, 10h
%define pauResult edi
mov pauResult, [ebp + 08h]
%define pauMultiplier dword [ebp + 0ch]
%define cMultiplier dword [ebp + 10h]
%define pauMultiplicand ecx
mov pauMultiplicand, [ebp + 14h]
%define cMultiplicand dword [ebp + 18h]
%define uMultiplier dword [ebp - 10h]
%define iMultiplicand ebx
%else
%error "Unsupported arch."
%endif
;
; Check that the multiplicand isn't empty (avoids an extra jump in the inner loop).
;
cmp cMultiplicand, 0
je .done
;
; Loop thru each element in the multiplier.
;
; while (cMultiplier-- > 0)
.multiplier_loop:
cmp cMultiplier, 0
jz .done
dec cMultiplier
; uMultiplier = *pauMultiplier
%ifdef RT_ARCH_X86
mov edx, pauMultiplier
mov eax, [edx]
mov uMultiplier, eax
%else
mov uMultiplier, [pauMultiplier]
%endif
; for (iMultiplicand = 0; iMultiplicand < cMultiplicand; iMultiplicand++)
xor iMultiplicand, iMultiplicand
.multiplicand_loop:
mov xAX, [pauMultiplicand + iMultiplicand * RTBIGNUM_ELEMENT_SIZE]
mul uMultiplier
add [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE], xAX
adc [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE + RTBIGNUM_ELEMENT_SIZE], xDX
jnc .next_multiplicand
lea xDX, [iMultiplicand + 2]
.next_adc:
adc RTBIGNUM_ELEMENT_PRE [pauResult + xDX * RTBIGNUM_ELEMENT_SIZE], 0
inc xDX
jc .next_adc
.next_multiplicand:
inc iMultiplicand ; iMultiplicand++
cmp iMultiplicand, cMultiplicand ; iMultiplicand < cMultiplicand
jb .multiplicand_loop
; Advance and loop on multiplier.
add pauMultiplier, RTBIGNUM_ELEMENT_SIZE
add pauResult, RTBIGNUM_ELEMENT_SIZE
jmp .multiplier_loop
.done:
%ifdef RT_ARCH_AMD64
%ifdef ASM_CALL64_GCC
%else
pop r13
pop r12
%endif
%elifdef RT_ARCH_X86
add esp, 10h
pop ebx
pop esi
pop edi
%endif
leave
ret
ENDPROC rtBigNumMagnitudeMultiplyAssemblyWorker
;;
; Assembly implementation of the D4 step of Knuth's division algorithm.
;
; This subtracts Divisor * Qhat from the dividend at the current J index.
;
; @returns true if negative result (unlikely), false if positive.
; @param pauDividendJ x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero.
; @param pauDivisor x86:[ebp + 12] gcc:rsi msc:rdx
; @param cDivisor x86:[ebp + 16] gcc:edx msc:r8d
; @param uQhat x86:[ebp + 16] gcc:rcx msc:r9
;
BEGINPROC rtBigNumKnuthD4_MulSub
push xBP
SEH64_PUSH_xBP
mov xBP, xSP
SEH64_SET_FRAME_xBP 0
SEH64_END_PROLOGUE
%ifdef RT_ARCH_AMD64
%if RTBIGNUM_ELEMENT_SIZE == 4
%error "sorry not implemented yet."
sorry not implemented yet.
%endif
%ifdef ASM_CALL64_GCC
%define pauDividendJ rdi
%define pauDivisor rsi
%define cDivisor r8
%define uQhat rcx
mov r8d, edx ; cDivisor
%define uMulCarry r11
%else
%define pauDividendJ rcx
%define pauDivisor r10
%define cDivisor r8
%define uQhat r9
mov r10, rdx ; pauDivisor
mov r8d, r8d ; cDivisor - paranoia
%define uMulCarry r11
%endif
%elifdef RT_ARCH_X86
push edi
push esi
push ebx
%define pauDividendJ edi
mov pauDividendJ, [ebp + 08h]
%define pauDivisor esi
mov pauDivisor, [ebp + 0ch]
%define cDivisor ecx
mov cDivisor, [ebp + 10h]
%define uQhat dword [ebp + 14h]
%define uMulCarry ebx
%else
%error "Unsupported arch."
%endif
%ifdef RT_STRICT
;
; Some sanity checks.
;
cmp cDivisor, 0
jne .cDivisor_not_zero
int3
.cDivisor_not_zero:
%endif
;
; Initialize the loop.
;
xor uMulCarry, uMulCarry
;
; do ... while (cDivisor-- > 0);
;
.the_loop:
; RTUInt128MulU64ByU64(&uSub, uQhat, pauDivisor[i]);
mov xAX, uQhat
mul RTBIGNUM_ELEMENT_PRE [pauDivisor]
; RTUInt128AssignAddU64(&uSub, uMulCarry);
add xAX, uMulCarry
adc xDX, 0
mov uMulCarry, xDX
; Subtract uSub.s.Lo+fCarry from pauDividendJ[i]
sub [pauDividendJ], xAX
adc uMulCarry, 0
%ifdef RT_STRICT
jnc .uMulCarry_did_not_overflow
int3
.uMulCarry_did_not_overflow
%endif
; Advance.
add pauDividendJ, RTBIGNUM_ELEMENT_SIZE
add pauDivisor, RTBIGNUM_ELEMENT_SIZE
dec cDivisor
jnz .the_loop
;
; Final dividend element (no corresponding divisor element).
;
sub [pauDividendJ], uMulCarry
sbb eax, eax
and eax, 1
.done:
%ifdef RT_ARCH_AMD64
%elifdef RT_ARCH_X86
pop ebx
pop esi
pop edi
%endif
leave
ret
ENDPROC rtBigNumKnuthD4_MulSub