bignum-amd64-x86.asm revision 0aa6cc1aa05aa4f72b9403f59641a83111617872
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; $Id$
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;; @file
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; IPRT - Big Integer Numbers, AMD64 and X86 Assembly Workers
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; Copyright (C) 2006-2014 Oracle Corporation
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; This file is part of VirtualBox Open Source Edition (OSE), as
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; available from http://www.virtualbox.org. This file is free software;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; you can redistribute it and/or modify it under the terms of the GNU
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; General Public License (GPL) as published by the Free Software
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; Foundation, in version 2 as it comes in the "COPYING" file of the
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; The contents of this file may alternatively be used under the terms
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; of the Common Development and Distribution License Version 1.0
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; VirtualBox OSE distribution, in which case the provisions of the
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; CDDL are applicable instead of those of the GPL.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; You may elect to license modified versions of this file under the
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; terms and conditions of either the GPL or the CDDL or both.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%define RT_ASM_WITH_SEH64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%include "iprt/asmdefs.mac"
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%include "internal/bignum.mac"
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncBEGINCODE
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; stores the result in pauResult.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; All three numbers are zero padded such that a borrow can be carried one (or
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; two for 64-bit) elements beyond the end of the largest number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @returns nothing.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauMinuend x86:[ebp + 12] gcc:rsi msc:rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauSubtrahend x86:[ebp + 16] gcc:rdx msc:r8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param cUsed x86:[ebp + 20] gcc:rcx msc:r9
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncBEGINPROC rtBigNumMagnitudeSubAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_PUSH_xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov xBP, xSP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_SET_FRAME_xBP 0
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncSEH64_END_PROLOGUE
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%ifdef RT_ARCH_AMD64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef ASM_CALL64_GCC
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResult rdi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauMinuend rsi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed ecx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResult rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauMinuend rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend r8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed r9d
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync xor r11d, r11d ; index register.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%if RTBIGNUM_ELEMENT_SIZE == 4
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr cUsed, 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync cmp cUsed, 8 ; Skip the big loop if small number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jb .small_job
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov r10d, cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr r10d, 3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 8], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 16], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 24], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 32], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 40]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 40]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 40], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 48]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 48]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 48], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11 + 56]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11 + 56]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11 + 56], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea r11, [r11 + 64]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec r10d ; Does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lahf ; Save CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync and cUsed, 7 ; Up to seven odd rounds.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf ; Restore CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop ; Skip CF=1 (clc).
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_job:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauMinuend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb rax, [pauSubtrahend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + r11], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea r11, [r11 + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef RT_STRICT
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnc .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync int3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.done:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%elifdef RT_ARCH_X86
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push esi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push ebx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov edi, [ebp + 08h] ; pauResult
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResult edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov ecx, [ebp + 0ch] ; pauMinuend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauMinuend ecx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov edx, [ebp + 10h] ; pauSubtrahend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov esi, [ebp + 14h] ; cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed esi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync xor ebx, ebx ; index register.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync cmp cUsed, 8 ; Skip the big loop if small number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jb .small_job
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr cUsed, 3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 4], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 8], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 12]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 12]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 12], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 16], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 20]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 20]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 20], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 24], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx + 28]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx + 28]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx + 28], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea ebx, [ebx + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; Does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lahf ; Save CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov cUsed, [ebp + 14h] ; Up to three final rounds.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync and cUsed, 7
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf ; Restore CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop ; Skip CF=1 (clc).
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_job:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauMinuend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb eax, [pauSubtrahend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov [pauResult + ebx], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea ebx, [ebx + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; Does not change CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef RT_STRICT
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnc .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync int3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.done:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync pop ebx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync pop esi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync pop edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %error "Unsupported arch"
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync leave
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ret
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%undef pauResult
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%undef pauMinuend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%undef pauSubtrahend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%undef cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncENDPROC rtBigNumMagnitudeSubAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; stores the result in pauResult.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; All three numbers are zero padded such that a borrow can be carried one (or
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; two for 64-bit) elements beyond the end of the largest number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @returns nothing.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauResultMinuend x86:[ebp + 8] gcc:rdi msc:rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauSubtrahend x86:[ebp + 12] gcc:rsi msc:rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param cUsed x86:[ebp + 16] gcc:rdx msc:r8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncBEGINPROC rtBigNumMagnitudeSubThisAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_PUSH_xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov xBP, xSP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_SET_FRAME_xBP 0
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncSEH64_END_PROLOGUE
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%ifdef RT_ARCH_AMD64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef ASM_CALL64_GCC
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResultMinuend rdi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend rsi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResultMinuend rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed r8d
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync xor r11d, r11d ; index register.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%if RTBIGNUM_ELEMENT_SIZE == 4
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr cUsed, 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync cmp cUsed, 4 ; Skip the big loop if small number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jb .small_job
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov r10d, cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr r10d, 3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 8], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 16], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 24], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 32], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 40]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 40], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 48]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 48], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11 + 56]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11 + 56], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea r11, [r11 + 64]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec r10d ; Does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lahf ; Save CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync and cUsed, 7 ; Up to seven odd rounds.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf ; Restore CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop ; Skip CF=1 (clc).
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_job:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov rax, [pauSubtrahend + r11]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + r11], rax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea r11, [r11 + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef RT_STRICT
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnc .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync int3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.done:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%elifdef RT_ARCH_X86
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push ebx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov edi, [ebp + 08h] ; pauResultMinuend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauResultMinuend edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov edx, [ebp + 0ch] ; pauSubtrahend
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauSubtrahend edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov ecx, [ebp + 10h] ; cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed ecx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync xor ebx, ebx ; index register.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync cmp cUsed, 8 ; Skip the big loop if small number.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jb .small_job
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr cUsed, 3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 4], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 8], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 12]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 12], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 16]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 16], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 20]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 20], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 24]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 24], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx + 28]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx + 28], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea ebx, [ebx + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; Does not change CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lahf ; Save CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov cUsed, [ebp + 10h] ; Up to seven odd rounds.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync and cUsed, 7
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf ; Restore CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop ; Skip CF=1 (clc).
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_job:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync clc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, [pauSubtrahend + ebx]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sbb [pauResultMinuend + ebx], eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea ebx, [ebx + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed ; Does not change CF
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef RT_STRICT
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnc .done
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync int3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.done:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync pop ebx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync pop edi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %error "Unsupported arch"
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync leave
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ret
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncENDPROC rtBigNumMagnitudeSubThisAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; Shifts an element array one bit to the left, returning the final carry value.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; On 64-bit hosts the array is always zero padded to a multiple of 8 bytes, so
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; we can use 64-bit operand sizes even if the element type is 32-bit.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @returns The final carry value.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param pauElements x86:[ebp + 8] gcc:rdi msc:rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param cUsed x86:[ebp + 12] gcc:rsi msc:rdx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync; @param uCarry x86:[ebp + 16] gcc:rdx msc:r8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync;
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncBEGINPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync push xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_PUSH_xBP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov xBP, xSP
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync SEH64_SET_FRAME_xBP 0
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncSEH64_END_PROLOGUE
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%ifdef RT_ARCH_AMD64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %ifdef ASM_CALL64_GCC
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauElements rdi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed esi
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define uCarry edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauElements rcx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define uCarry r8d
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%elifdef RT_ARCH_X86
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define pauElements ecx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov pauElements, [ebp + 08h]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define cUsed edx
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov cUsed, [ebp + 0ch]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %define uCarry eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov uCarry, [ebp + 10h]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync %error "Unsupported arch."
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; Lots to do?
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync cmp cUsed, 8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jae .big_loop_init
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; Check for empty array.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync test cUsed, cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .no_elements
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop_init
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; Big loop - 8 unrolled loop iterations.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop_init:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%ifdef RT_ARCH_AMD64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov r11d, cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync shr cUsed, 3
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync test uCarry, uCarry ; clear the carry flag
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync stc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.big_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%if RTBIGNUM_ELEMENT_SIZE == 8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 8], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 16], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 24], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 32], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 40], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 48], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements + 56], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea pauElements, [pauElements + 64]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 4], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 8], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 12], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 16], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 20], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 24], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements + 28], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea pauElements, [pauElements + 32]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .big_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; More to do?
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lahf ; save carry flag (uCarry no longer used on x86).
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%ifdef RT_ARCH_AMD64
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov cUsed, r11d
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov cUsed, [ebp + 0ch]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync and cUsed, 7
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .restore_cf_and_return ; Jump if we're good and done.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf ; Restore CF.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .small_loop ; Deal with the odd rounds.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.restore_cf_and_return:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync sahf
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .carry_to_eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; Small loop - One round at the time.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop_init:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync test uCarry, uCarry ; clear the carry flag
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync stc
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.small_loop:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%if RTBIGNUM_ELEMENT_SIZE == 8
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl qword [pauElements], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea pauElements, [pauElements + 8]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%else
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync rcl dword [pauElements], 1
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync lea pauElements, [pauElements + 4]
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync%endif
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync dec cUsed
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnz .small_loop
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ; Calculate return value.
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.carry_to_eax:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, 0
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jnc .return
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync inc eax
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.return:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync leave
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync ret
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync.no_elements:
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync mov eax, uCarry
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync jmp .return
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsyncENDPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker
0aa6cc1aa05aa4f72b9403f59641a83111617872vboxsync