0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; $Id$
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;; @file
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; IPRT - Big Integer Numbers, AMD64 and X86 Assembly Workers
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; Copyright (C) 2006-2014 Oracle Corporation
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; This file is part of VirtualBox Open Source Edition (OSE), as
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; available from http://www.virtualbox.org. This file is free software;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; you can redistribute it and/or modify it under the terms of the GNU
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; General Public License (GPL) as published by the Free Software
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; Foundation, in version 2 as it comes in the "COPYING" file of the
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; The contents of this file may alternatively be used under the terms
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; of the Common Development and Distribution License Version 1.0
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; VirtualBox OSE distribution, in which case the provisions of the
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; CDDL are applicable instead of those of the GPL.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; You may elect to license modified versions of this file under the
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; terms and conditions of either the GPL or the CDDL or both.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%define RT_ASM_WITH_SEH64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%include "iprt/asmdefs.mac"
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%include "internal/bignum.mac"
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncBEGINCODE
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; stores the result in pauResult.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; All three numbers are zero padded such that a borrow can be carried one (or
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; two for 64-bit) elements beyond the end of the largest number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @returns nothing.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauMinuend x86:[ebp + 12] gcc:rsi msc:rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauSubtrahend x86:[ebp + 16] gcc:rdx msc:r8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param cUsed x86:[ebp + 20] gcc:rcx msc:r9
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncBEGINPROC rtBigNumMagnitudeSubAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_PUSH_xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov xBP, xSP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_SET_FRAME_xBP 0
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncSEH64_END_PROLOGUE
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%ifdef RT_ARCH_AMD64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef ASM_CALL64_GCC
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResult rdi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauMinuend rsi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed ecx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResult rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauMinuend rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend r8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed r9d
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync xor r11d, r11d ; index register.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%if RTBIGNUM_ELEMENT_SIZE == 4
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr cUsed, 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync cmp cUsed, 8 ; Skip the big loop if small number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jb .small_job
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov r10d, cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr r10d, 3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 8], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 16], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 24], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 32], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 40]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 40]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 40], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 48]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 48]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 48], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 56]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 56]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 56], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea r11, [r11 + 64]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec r10d ; Does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lahf ; Save CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync and cUsed, 7 ; Up to seven odd rounds.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf ; Restore CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop ; Skip CF=1 (clc).
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_job:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea r11, [r11 + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef RT_STRICT
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnc .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync int3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.done:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%elifdef RT_ARCH_X86
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push esi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push ebx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov edi, [ebp + 08h] ; pauResult
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResult edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov ecx, [ebp + 0ch] ; pauMinuend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauMinuend ecx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov edx, [ebp + 10h] ; pauSubtrahend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov esi, [ebp + 14h] ; cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed esi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync xor ebx, ebx ; index register.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync cmp cUsed, 8 ; Skip the big loop if small number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jb .small_job
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr cUsed, 3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 4], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 8], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 12]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 12]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 12], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 16], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 20]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 20]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 20], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 24], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 28]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 28]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 28], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea ebx, [ebx + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; Does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lahf ; Save CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov cUsed, [ebp + 14h] ; Up to three final rounds.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync and cUsed, 7
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf ; Restore CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop ; Skip CF=1 (clc).
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_job:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea ebx, [ebx + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; Does not change CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef RT_STRICT
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnc .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync int3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.done:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync pop ebx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync pop esi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync pop edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %error "Unsupported arch"
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync leave
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ret
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%undef pauResult
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%undef pauMinuend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%undef pauSubtrahend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%undef cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncENDPROC rtBigNumMagnitudeSubAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; stores the result in pauResult.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; All three numbers are zero padded such that a borrow can be carried one (or
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; two for 64-bit) elements beyond the end of the largest number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @returns nothing.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauResultMinuend x86:[ebp + 8] gcc:rdi msc:rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauSubtrahend x86:[ebp + 12] gcc:rsi msc:rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param cUsed x86:[ebp + 16] gcc:rdx msc:r8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncBEGINPROC rtBigNumMagnitudeSubThisAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_PUSH_xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov xBP, xSP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_SET_FRAME_xBP 0
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncSEH64_END_PROLOGUE
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%ifdef RT_ARCH_AMD64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef ASM_CALL64_GCC
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResultMinuend rdi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend rsi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResultMinuend rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed r8d
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync xor r11d, r11d ; index register.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%if RTBIGNUM_ELEMENT_SIZE == 4
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr cUsed, 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
b1ff2f29b99d66655ba234323059cd774e19cba0vboxsync cmp cUsed, 8 ; Skip the big loop if small number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jb .small_job
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov r10d, cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr r10d, 3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 8], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 16], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 24], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 32], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 40]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 40], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 48]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 48], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 56]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 56], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea r11, [r11 + 64]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec r10d ; Does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lahf ; Save CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync and cUsed, 7 ; Up to seven odd rounds.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf ; Restore CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop ; Skip CF=1 (clc).
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_job:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea r11, [r11 + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef RT_STRICT
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnc .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync int3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.done:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%elifdef RT_ARCH_X86
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push ebx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov edi, [ebp + 08h] ; pauResultMinuend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResultMinuend edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov edx, [ebp + 0ch] ; pauSubtrahend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov ecx, [ebp + 10h] ; cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed ecx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync xor ebx, ebx ; index register.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync cmp cUsed, 8 ; Skip the big loop if small number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jb .small_job
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr cUsed, 3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 4], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 8], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 12]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 12], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 16], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 20]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 20], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 24], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 28]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 28], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea ebx, [ebx + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; Does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lahf ; Save CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov cUsed, [ebp + 10h] ; Up to seven odd rounds.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync and cUsed, 7
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf ; Restore CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop ; Skip CF=1 (clc).
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_job:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea ebx, [ebx + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; Does not change CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef RT_STRICT
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnc .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync int3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.done:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync pop ebx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync pop edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %error "Unsupported arch"
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync leave
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ret
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncENDPROC rtBigNumMagnitudeSubThisAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; Shifts an element array one bit to the left, returning the final carry value.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; On 64-bit hosts the array is always zero padded to a multiple of 8 bytes, so
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; we can use 64-bit operand sizes even if the element type is 32-bit.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @returns The final carry value.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauElements x86:[ebp + 8] gcc:rdi msc:rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param cUsed x86:[ebp + 12] gcc:rsi msc:rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param uCarry x86:[ebp + 16] gcc:rdx msc:r8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncBEGINPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_PUSH_xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov xBP, xSP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_SET_FRAME_xBP 0
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncSEH64_END_PROLOGUE
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%ifdef RT_ARCH_AMD64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef ASM_CALL64_GCC
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauElements rdi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed esi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define uCarry edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauElements rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define uCarry r8d
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%elifdef RT_ARCH_X86
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauElements ecx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov pauElements, [ebp + 08h]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov cUsed, [ebp + 0ch]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define uCarry eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov uCarry, [ebp + 10h]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %error "Unsupported arch."
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; Lots to do?
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync cmp cUsed, 8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jae .big_loop_init
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; Check for empty array.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync test cUsed, cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .no_elements
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop_init
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; Big loop - 8 unrolled loop iterations.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop_init:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%ifdef RT_ARCH_AMD64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov r11d, cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr cUsed, 3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync test uCarry, uCarry ; clear the carry flag
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync stc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%if RTBIGNUM_ELEMENT_SIZE == 8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 8], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 16], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 24], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 32], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 40], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 48], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 56], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea pauElements, [pauElements + 64]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 4], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 8], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 12], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 16], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 20], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 24], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 28], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea pauElements, [pauElements + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; More to do?
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lahf ; save carry flag (uCarry no longer used on x86).
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%ifdef RT_ARCH_AMD64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov cUsed, r11d
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov cUsed, [ebp + 0ch]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync and cUsed, 7
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .restore_cf_and_return ; Jump if we're good and done.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf ; Restore CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop ; Deal with the odd rounds.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.restore_cf_and_return:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .carry_to_eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; Small loop - One round at the time.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop_init:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync test uCarry, uCarry ; clear the carry flag
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync stc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%if RTBIGNUM_ELEMENT_SIZE == 8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea pauElements, [pauElements + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea pauElements, [pauElements + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; Calculate return value.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.carry_to_eax:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, 0
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnc .return
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync inc eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.return:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync leave
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ret
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.no_elements:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, uCarry
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .return
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncENDPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync;;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; Performs a 128-bit by 64-bit division on 64-bit and
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; a 64-bit by 32-bit divison on 32-bit.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @returns nothing.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param puQuotient x86:[ebp + 8] gcc:rdi msc:rcx Double element.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param puRemainder x86:[ebp + 12] gcc:rsi msc:rdx Normal element.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param uDividendHi x86:[ebp + 16] gcc:rdx msc:r8
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param uDividendLo x86:[ebp + 20] gcc:rcx msc:r9
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param uDivisior x86:[ebp + 24] gcc:r8 msc:[rbp + 30h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsyncBEGINPROC rtBigNumElement2xDiv2xBy1x
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push xBP
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync SEH64_PUSH_xBP
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov xBP, xSP
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync SEH64_SET_FRAME_xBP 0
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsyncSEH64_END_PROLOGUE
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%ifdef RT_ARCH_AMD64
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %if RTBIGNUM_ELEMENT_SIZE == 4
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %error "sorry not implemented yet."
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync sorry not implemented yet.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uDividendHi rdx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uDividendLo rax
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %ifdef ASM_CALL64_GCC
9f99bdcc51148d37d53717d3ff6296bbd8a01c42vboxsync %define uDivisor r8
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define puQuotient rdi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define puRemainder rsi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov rax, rcx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %else
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define puQuotient rcx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define puRemainder r11
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uDivisor r10
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov r11, rdx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov r10, [rbp + 30h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov rdx, r8
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov rax, r9
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%elifdef RT_ARCH_X86
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push edi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push ebx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uDividendHi edx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov uDividendHi, [ebp + 10h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uDividendLo eax
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov uDividendLo, [ebp + 14h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uDivisor ecx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov uDivisor, [ebp + 18h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define puQuotient edi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov puQuotient, [ebp + 08h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define puRemainder ebx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov puRemainder, [ebp + 0ch]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%else
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %error "Unsupported arch."
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%ifdef RT_STRICT
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; The dividend shall not be zero.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync test uDivisor, uDivisor
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync jnz .divisor_not_zero
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync int3
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.divisor_not_zero:
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; Avoid division overflow. This will calculate the high part of the quotient.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], 0
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync cmp uDividendHi, uDivisor
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync jb .do_divide
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push xAX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov xAX, xDX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync xor edx, edx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync div uDivisor
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], xAX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop xAX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; Perform the division and store the result.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.do_divide:
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync div uDivisor
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov RTBIGNUM_ELEMENT_PRE [puQuotient], xAX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov RTBIGNUM_ELEMENT_PRE [puRemainder], xDX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%ifdef RT_ARCH_X86
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop ebx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop edi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync leave
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ret
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsyncENDPROC rtBigNumElement2xDiv2xBy1x
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync;;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; Performs the core of long multiplication.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @returns nothing.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param pauMultiplier x86:[ebp + 12] gcc:rsi msc:rdx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param cMultiplier x86:[ebp + 16] gcc:rdx msc:r8
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param pauMultiplicand x86:[ebp + 20] gcc:rcx msc:r9
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param cMultiplicand x86:[ebp + 24] gcc:r8 msc:[rbp + 30h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsyncBEGINPROC rtBigNumMagnitudeMultiplyAssemblyWorker
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push xBP
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync SEH64_PUSH_xBP
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov xBP, xSP
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync SEH64_SET_FRAME_xBP 0
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsyncSEH64_END_PROLOGUE
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%ifdef RT_ARCH_AMD64
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %if RTBIGNUM_ELEMENT_SIZE == 4
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %error "sorry not implemented yet."
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync sorry not implemented yet.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %ifdef ASM_CALL64_GCC
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauResult rdi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauMultiplier rsi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define cMultiplier r9
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauMultiplicand rcx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define cMultiplicand r8
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov r9d, edx ; cMultiplier
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov r8d, r8d ; cMultiplicand - paranoia
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uMultiplier r10
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define iMultiplicand r11
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %else
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauResult rcx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauMultiplier r11
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define cMultiplier r8
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauMultiplicand r9
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define cMultiplicand r10
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov pauMultiplier, rdx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov r10d, dword [rbp + 30h] ; cMultiplicand
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov r8d, r8d ; cMultiplier - paranoia
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uMultiplier r12
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push r12
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define iMultiplicand r13
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push r13
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%elifdef RT_ARCH_X86
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push edi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push esi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push ebx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync sub esp, 10h
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauResult edi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov pauResult, [ebp + 08h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauMultiplier dword [ebp + 0ch]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define cMultiplier dword [ebp + 10h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauMultiplicand ecx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov pauMultiplicand, [ebp + 14h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define cMultiplicand dword [ebp + 18h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uMultiplier dword [ebp - 10h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define iMultiplicand ebx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%else
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %error "Unsupported arch."
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; Check that the multiplicand isn't empty (avoids an extra jump in the inner loop).
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync cmp cMultiplicand, 0
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync je .done
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; Loop thru each element in the multiplier.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; while (cMultiplier-- > 0)
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.multiplier_loop:
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync cmp cMultiplier, 0
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync jz .done
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync dec cMultiplier
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; uMultiplier = *pauMultiplier
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%ifdef RT_ARCH_X86
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov edx, pauMultiplier
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov eax, [edx]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov uMultiplier, eax
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%else
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov uMultiplier, [pauMultiplier]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; for (iMultiplicand = 0; iMultiplicand < cMultiplicand; iMultiplicand++)
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync xor iMultiplicand, iMultiplicand
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.multiplicand_loop:
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov xAX, [pauMultiplicand + iMultiplicand * RTBIGNUM_ELEMENT_SIZE]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mul uMultiplier
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync add [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE], xAX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync adc [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE + RTBIGNUM_ELEMENT_SIZE], xDX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync jnc .next_multiplicand
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync lea xDX, [iMultiplicand + 2]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.next_adc:
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync adc RTBIGNUM_ELEMENT_PRE [pauResult + xDX * RTBIGNUM_ELEMENT_SIZE], 0
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync inc xDX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync jc .next_adc
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.next_multiplicand:
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync inc iMultiplicand ; iMultiplicand++
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync cmp iMultiplicand, cMultiplicand ; iMultiplicand < cMultiplicand
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync jb .multiplicand_loop
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; Advance and loop on multiplier.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync add pauMultiplier, RTBIGNUM_ELEMENT_SIZE
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync add pauResult, RTBIGNUM_ELEMENT_SIZE
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync jmp .multiplier_loop
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.done:
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%ifdef RT_ARCH_AMD64
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %ifdef ASM_CALL64_GCC
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %else
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop r13
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop r12
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%elifdef RT_ARCH_X86
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync add esp, 10h
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop ebx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop esi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop edi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync leave
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ret
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsyncENDPROC rtBigNumMagnitudeMultiplyAssemblyWorker
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync;;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; Assembly implementation of the D4 step of Knuth's division algorithm.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; This subtracts Divisor * Qhat from the dividend at the current J index.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @returns true if negative result (unlikely), false if positive.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param pauDividendJ x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param pauDivisor x86:[ebp + 12] gcc:rsi msc:rdx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param cDivisor x86:[ebp + 16] gcc:edx msc:r8d
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync; @param uQhat x86:[ebp + 16] gcc:rcx msc:r9
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsyncBEGINPROC rtBigNumKnuthD4_MulSub
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push xBP
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync SEH64_PUSH_xBP
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov xBP, xSP
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync SEH64_SET_FRAME_xBP 0
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsyncSEH64_END_PROLOGUE
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%ifdef RT_ARCH_AMD64
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %if RTBIGNUM_ELEMENT_SIZE == 4
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %error "sorry not implemented yet."
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync sorry not implemented yet.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %ifdef ASM_CALL64_GCC
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauDividendJ rdi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauDivisor rsi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define cDivisor r8
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uQhat rcx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov r8d, edx ; cDivisor
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uMulCarry r11
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %else
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauDividendJ rcx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauDivisor r10
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define cDivisor r8
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uQhat r9
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov r10, rdx ; pauDivisor
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov r8d, r8d ; cDivisor - paranoia
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uMulCarry r11
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%elifdef RT_ARCH_X86
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push edi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push esi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync push ebx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauDividendJ edi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov pauDividendJ, [ebp + 08h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define pauDivisor esi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov pauDivisor, [ebp + 0ch]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define cDivisor ecx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov cDivisor, [ebp + 10h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uQhat dword [ebp + 14h]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %define uMulCarry ebx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%else
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync %error "Unsupported arch."
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%ifdef RT_STRICT
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; Some sanity checks.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync cmp cDivisor, 0
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync jne .cDivisor_not_zero
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync int3
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.cDivisor_not_zero:
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; Initialize the loop.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync xor uMulCarry, uMulCarry
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; do ... while (cDivisor-- > 0);
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.the_loop:
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; RTUInt128MulU64ByU64(&uSub, uQhat, pauDivisor[i]);
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov xAX, uQhat
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mul RTBIGNUM_ELEMENT_PRE [pauDivisor]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; RTUInt128AssignAddU64(&uSub, uMulCarry);
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync add xAX, uMulCarry
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync adc xDX, 0
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync mov uMulCarry, xDX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; Subtract uSub.s.Lo+fCarry from pauDividendJ[i]
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync sub [pauDividendJ], xAX
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync adc uMulCarry, 0
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%ifdef RT_STRICT
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync jnc .uMulCarry_did_not_overflow
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync int3
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.uMulCarry_did_not_overflow
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; Advance.
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync add pauDividendJ, RTBIGNUM_ELEMENT_SIZE
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync add pauDivisor, RTBIGNUM_ELEMENT_SIZE
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync dec cDivisor
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync jnz .the_loop
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ; Final dividend element (no corresponding divisor element).
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ;
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync sub [pauDividendJ], uMulCarry
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync sbb eax, eax
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync and eax, 1
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync.done:
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%ifdef RT_ARCH_AMD64
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%elifdef RT_ARCH_X86
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop ebx
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop esi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync pop edi
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync%endif
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync leave
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync ret
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsyncENDPROC rtBigNumKnuthD4_MulSub
972c3ecf2c929440ce70e51af38ba021101c8f7bvboxsync