i386/gen/_div64.s

2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A/*
2N/A * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
2N/A * Use is subject to license terms.
2N/A */
2N/A
2N/A    .file   "_div64.s"
2N/A
2N/A#include "SYS.h"
2N/A
2N/A/*
2N/A * C support for 64-bit modulo and division.
2N/A * Hand-customized compiler output - see comments for details.
2N/A */
2N/A
2N/A/*
2N/A * int32_t/int64_t division/manipulation
2N/A *
2N/A * Hand-customized compiler output: the non-GCC entry points depart from
2N/A * the SYS V ABI by requiring their arguments to be popped, and in the
2N/A * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
2N/A * compiler-generated use of %edx:%eax for the first argument of
2N/A * internal entry points.
2N/A *
2N/A * Inlines for speed:
2N/A * - counting the number of leading zeros in a word
2N/A * - multiplying two 32-bit numbers giving a 64-bit result
2N/A * - dividing a 64-bit number by a 32-bit number, giving both quotient
2N/A *  and remainder
2N/A * - subtracting two 64-bit results
2N/A */
2N/A/ #define   LO(X)       ((uint32_t)(X) & 0xffffffff)
2N/A/ #define   HI(X)       ((uint32_t)((X) >> 32) & 0xffffffff)
2N/A/ #define   HILO(H, L)  (((uint64_t)(H) << 32) + (L))
2N/A/
2N/A/ /* give index of highest bit */
2N/A/ #define   HIBIT(a, r) \
2N/A/     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
2N/A/
2N/A/ /* multiply two uint32_ts resulting in a uint64_t */
2N/A/ #define   A_MUL32(a, b, lo, hi) \
2N/A/     asm("mull %2" \
2N/A/   : "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
2N/A/
2N/A/ /* divide a uint64_t by a uint32_t */
2N/A/ #define   A_DIV32(lo, hi, b, q, r) \
2N/A/     asm("divl %2" \
2N/A/   : "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
2N/A/   : "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
2N/A/
2N/A/ /* subtract two uint64_ts (with borrow) */
2N/A/ #define   A_SUB2(bl, bh, al, ah) \
2N/A/     asm("subl %4,%0\n\tsbbl %5,%1" \
2N/A/   : "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
2N/A/   : "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
2N/A/   "g"((uint32_t)(bh)))
2N/A/
2N/A/ /*
2N/A/  * Unsigned division with remainder.
2N/A/  * Divide two uint64_ts, and calculate remainder.
2N/A/  */
2N/A/ uint64_t
2N/A/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
2N/A/ {
2N/A/   /* simple cases: y is a single uint32_t */
2N/A/   if (HI(y) == 0) {
2N/A/       uint32_t    div_hi, div_rem;
2N/A/       uint32_t    q0, q1;
2N/A/
2N/A/       /* calculate q1 */
2N/A/       if (HI(x) < LO(y)) {
2N/A/           /* result is a single uint32_t, use one division */
2N/A/           q1 = 0;
2N/A/           div_hi = HI(x);
2N/A/       } else {
2N/A/           /* result is a double uint32_t, use two divisions */
2N/A/           A_DIV32(HI(x), 0, LO(y), q1, div_hi);
2N/A/       }
2N/A/
2N/A/       /* calculate q0 and remainder */
2N/A/       A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
2N/A/
2N/A/       /* return remainder */
2N/A/       *pmod = div_rem;
2N/A/
2N/A/       /* return result */
2N/A/       return (HILO(q1, q0));
2N/A/
2N/A/   } else if (HI(x) < HI(y)) {
2N/A/       /* HI(x) < HI(y) => x < y => result is 0 */
2N/A/
2N/A/       /* return remainder */
2N/A/       *pmod = x;
2N/A/
2N/A/       /* return result */
2N/A/       return (0);
2N/A/
2N/A/   } else {
2N/A/       /*
2N/A/        * uint64_t by uint64_t division, resulting in a one-uint32_t
2N/A/        * result
2N/A/        */
2N/A/       uint32_t        y0, y1;
2N/A/       uint32_t        x1, x0;
2N/A/       uint32_t        q0;
2N/A/       uint32_t        normshift;
2N/A/
2N/A/       /* normalize by shifting x and y so MSB(y) == 1 */
2N/A/       HIBIT(HI(y), normshift);    /* index of highest 1 bit */
2N/A/       normshift = 31 - normshift;
2N/A/
2N/A/       if (normshift == 0) {
2N/A/           /* no shifting needed, and x < 2*y so q <= 1 */
2N/A/           y1 = HI(y);
2N/A/           y0 = LO(y);
2N/A/           x1 = HI(x);
2N/A/           x0 = LO(x);
2N/A/
2N/A/           /* if x >= y then q = 1 (note x1 >= y1) */
2N/A/           if (x1 > y1 || x0 >= y0) {
2N/A/               q0 = 1;
2N/A/               /* subtract y from x to get remainder */
2N/A/               A_SUB2(y0, y1, x0, x1);
2N/A/           } else {
2N/A/               q0 = 0;
2N/A/           }
2N/A/
2N/A/           /* return remainder */
2N/A/           *pmod = HILO(x1, x0);
2N/A/
2N/A/           /* return result */
2N/A/           return (q0);
2N/A/
2N/A/       } else {
2N/A/           /*
2N/A/            * the last case: result is one uint32_t, but we need to
2N/A/            * normalize
2N/A/            */
2N/A/           uint64_t    dt;
2N/A/           uint32_t        t0, t1, x2;
2N/A/
2N/A/           /* normalize y */
2N/A/           dt = (y << normshift);
2N/A/           y1 = HI(dt);
2N/A/           y0 = LO(dt);
2N/A/
2N/A/           /* normalize x (we need 3 uint32_ts!!!) */
2N/A/           x2 = (HI(x) >> (32 - normshift));
2N/A/           dt = (x << normshift);
2N/A/           x1 = HI(dt);
2N/A/           x0 = LO(dt);
2N/A/
2N/A/           /* estimate q0, and reduce x to a two uint32_t value */
2N/A/           A_DIV32(x1, x2, y1, q0, x1);
2N/A/
2N/A/           /* adjust q0 down if too high */
2N/A/           /*
2N/A/            * because of the limited range of x2 we can only be
2N/A/            * one off
2N/A/            */
2N/A/           A_MUL32(y0, q0, t0, t1);
2N/A/           if (t1 > x1 || (t1 == x1 && t0 > x0)) {
2N/A/               q0--;
2N/A/               A_SUB2(y0, y1, t0, t1);
2N/A/           }
2N/A/           /* return remainder */
2N/A/           /* subtract product from x to get remainder */
2N/A/           A_SUB2(t0, t1, x0, x1);
2N/A/           *pmod = (HILO(x1, x0) >> normshift);
2N/A/
2N/A/           /* return result */
2N/A/           return (q0);
2N/A/       }
2N/A/   }
2N/A/ }
2N/A    ENTRY(UDivRem)
2N/A    pushl   %ebp
2N/A    pushl   %edi
2N/A    pushl   %esi
2N/A    subl    $48, %esp
2N/A    movl    68(%esp), %edi  / y,
2N/A    testl   %edi, %edi  / tmp63
2N/A    movl    %eax, 40(%esp)  / x, x
2N/A    movl    %edx, 44(%esp)  / x, x
2N/A    movl    %edi, %esi  /, tmp62
2N/A    movl    %edi, %ecx  / tmp62, tmp63
2N/A    jne .LL2
2N/A    movl    %edx, %eax  /, tmp68
2N/A    cmpl    64(%esp), %eax  / y, tmp68
2N/A    jae .LL21
2N/A.LL4:
2N/A    movl    72(%esp), %ebp  / pmod,
2N/A    xorl    %esi, %esi  / <result>
2N/A    movl    40(%esp), %eax  / x, q0
2N/A    movl    %ecx, %edi  / <result>, <result>
2N/A    divl    64(%esp)    / y
2N/A    movl    %edx, (%ebp)    / div_rem,
2N/A    xorl    %edx, %edx  / q0
2N/A    addl    %eax, %esi  / q0, <result>
2N/A    movl    $0, 4(%ebp)
2N/A    adcl    %edx, %edi  / q0, <result>
2N/A    addl    $48, %esp
2N/A    movl    %esi, %eax  / <result>, <result>
2N/A    popl    %esi
2N/A    movl    %edi, %edx  / <result>, <result>
2N/A    popl    %edi
2N/A    popl    %ebp
2N/A    ret
2N/A    .align  16
2N/A.LL2:
2N/A    movl    44(%esp), %eax  / x,
2N/A    xorl    %edx, %edx
2N/A    cmpl    %esi, %eax  / tmp62, tmp5
2N/A    movl    %eax, 32(%esp)  / tmp5,
2N/A    movl    %edx, 36(%esp)
2N/A    jae .LL6
2N/A    movl    72(%esp), %esi  / pmod,
2N/A    movl    40(%esp), %ebp  / x,
2N/A    movl    44(%esp), %ecx  / x,
2N/A    movl    %ebp, (%esi)
2N/A    movl    %ecx, 4(%esi)
2N/A    xorl    %edi, %edi  / <result>
2N/A    xorl    %esi, %esi  / <result>
2N/A.LL22:
2N/A    addl    $48, %esp
2N/A    movl    %esi, %eax  / <result>, <result>
2N/A    popl    %esi
2N/A    movl    %edi, %edx  / <result>, <result>
2N/A    popl    %edi
2N/A    popl    %ebp
2N/A    ret
2N/A    .align  16
2N/A.LL21:
2N/A    movl    %edi, %edx  / tmp63, div_hi
2N/A    divl    64(%esp)    / y
2N/A    movl    %eax, %ecx  /, q1
2N/A    jmp .LL4
2N/A    .align  16
2N/A.LL6:
2N/A    movl    $31, %edi   /, tmp87
2N/A    bsrl    %esi,%edx   / tmp62, normshift
2N/A    subl    %edx, %edi  / normshift, tmp87
2N/A    movl    %edi, 28(%esp)  / tmp87,
2N/A    jne .LL8
2N/A    movl    32(%esp), %edx  /, x1
2N/A    cmpl    %ecx, %edx  / y1, x1
2N/A    movl    64(%esp), %edi  / y, y0
2N/A    movl    40(%esp), %esi  / x, x0
2N/A    ja  .LL10
2N/A    xorl    %ebp, %ebp  / q0
2N/A    cmpl    %edi, %esi  / y0, x0
2N/A    jb  .LL11
2N/A.LL10:
2N/A    movl    $1, %ebp    /, q0
2N/A    subl    %edi,%esi   / y0, x0
2N/A    sbbl    %ecx,%edx   / tmp63, x1
2N/A.LL11:
2N/A    movl    %edx, %ecx  / x1, x1
2N/A    xorl    %edx, %edx  / x1
2N/A    xorl    %edi, %edi  / x0
2N/A    addl    %esi, %edx  / x0, x1
2N/A    adcl    %edi, %ecx  / x0, x1
2N/A    movl    72(%esp), %esi  / pmod,
2N/A    movl    %edx, (%esi)    / x1,
2N/A    movl    %ecx, 4(%esi)   / x1,
2N/A    xorl    %edi, %edi  / <result>
2N/A    movl    %ebp, %esi  / q0, <result>
2N/A    jmp .LL22
2N/A    .align  16
2N/A.LL8:
2N/A    movb    28(%esp), %cl
2N/A    movl    64(%esp), %esi  / y, dt
2N/A    movl    68(%esp), %edi  / y, dt
2N/A    shldl   %esi, %edi  /, dt, dt
2N/A    sall    %cl, %esi   /, dt
2N/A    andl    $32, %ecx
2N/A    jne .LL23
2N/A.LL17:
2N/A    movl    $32, %ecx   /, tmp102
2N/A    subl    28(%esp), %ecx  /, tmp102
2N/A    movl    %esi, %ebp  / dt, y0
2N/A    movl    32(%esp), %esi
2N/A    shrl    %cl, %esi   / tmp102,
2N/A    movl    %edi, 24(%esp)  / tmp99,
2N/A    movb    28(%esp), %cl
2N/A    movl    %esi, 12(%esp)  /, x2
2N/A    movl    44(%esp), %edi  / x, dt
2N/A    movl    40(%esp), %esi  / x, dt
2N/A    shldl   %esi, %edi  /, dt, dt
2N/A    sall    %cl, %esi   /, dt
2N/A    andl    $32, %ecx
2N/A    je  .LL18
2N/A    movl    %esi, %edi  / dt, dt
2N/A    xorl    %esi, %esi  / dt
2N/A.LL18:
2N/A    movl    %edi, %ecx  / dt,
2N/A    movl    %edi, %eax  / tmp2,
2N/A    movl    %ecx, (%esp)
2N/A    movl    12(%esp), %edx  / x2,
2N/A    divl    24(%esp)
2N/A    movl    %edx, %ecx  /, x1
2N/A    xorl    %edi, %edi
2N/A    movl    %eax, 20(%esp)
2N/A    movl    %ebp, %eax  / y0, t0
2N/A    mull    20(%esp)
2N/A    cmpl    %ecx, %edx  / x1, t1
2N/A    movl    %edi, 4(%esp)
2N/A    ja  .LL14
2N/A    je  .LL24
2N/A.LL15:
2N/A    movl    %ecx, %edi  / x1,
2N/A    subl    %eax,%esi   / t0, x0
2N/A    sbbl    %edx,%edi   / t1,
2N/A    movl    %edi, %eax  /, x1
2N/A    movl    %eax, %edx  / x1, x1
2N/A    xorl    %eax, %eax  / x1
2N/A    xorl    %ebp, %ebp  / x0
2N/A    addl    %esi, %eax  / x0, x1
2N/A    adcl    %ebp, %edx  / x0, x1
2N/A    movb    28(%esp), %cl
2N/A    shrdl   %edx, %eax  /, x1, x1
2N/A    shrl    %cl, %edx   /, x1
2N/A    andl    $32, %ecx
2N/A    je  .LL16
2N/A    movl    %edx, %eax  / x1, x1
2N/A    xorl    %edx, %edx  / x1
2N/A.LL16:
2N/A    movl    72(%esp), %ecx  / pmod,
2N/A    movl    20(%esp), %esi  /, <result>
2N/A    xorl    %edi, %edi  / <result>
2N/A    movl    %eax, (%ecx)    / x1,
2N/A    movl    %edx, 4(%ecx)   / x1,
2N/A    jmp .LL22
2N/A    .align  16
2N/A.LL24:
2N/A    cmpl    %esi, %eax  / x0, t0
2N/A    jbe .LL15
2N/A.LL14:
2N/A    decl    20(%esp)
2N/A    subl    %ebp,%eax   / y0, t0
2N/A    sbbl    24(%esp),%edx   /, t1
2N/A    jmp .LL15
2N/A.LL23:
2N/A    movl    %esi, %edi  / dt, dt
2N/A    xorl    %esi, %esi  / dt
2N/A    jmp .LL17
2N/A    SET_SIZE(UDivRem)
2N/A
2N/A/*
2N/A * Unsigned division without remainder.
2N/A */
2N/A/ uint64_t
2N/A/ UDiv(uint64_t x, uint64_t y)
2N/A/ {
2N/A/   if (HI(y) == 0) {
2N/A/       /* simple cases: y is a single uint32_t */
2N/A/       uint32_t    div_hi, div_rem;
2N/A/       uint32_t    q0, q1;
2N/A/
2N/A/       /* calculate q1 */
2N/A/       if (HI(x) < LO(y)) {
2N/A/           /* result is a single uint32_t, use one division */
2N/A/           q1 = 0;
2N/A/           div_hi = HI(x);
2N/A/       } else {
2N/A/           /* result is a double uint32_t, use two divisions */
2N/A/           A_DIV32(HI(x), 0, LO(y), q1, div_hi);
2N/A/       }
2N/A/
2N/A/       /* calculate q0 and remainder */
2N/A/       A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
2N/A/
2N/A/       /* return result */
2N/A/       return (HILO(q1, q0));
2N/A/
2N/A/   } else if (HI(x) < HI(y)) {
2N/A/       /* HI(x) < HI(y) => x < y => result is 0 */
2N/A/
2N/A/       /* return result */
2N/A/       return (0);
2N/A/
2N/A/   } else {
2N/A/       /*
2N/A/        * uint64_t by uint64_t division, resulting in a one-uint32_t
2N/A/        * result
2N/A/        */
2N/A/       uint32_t        y0, y1;
2N/A/       uint32_t        x1, x0;
2N/A/       uint32_t        q0;
2N/A/       unsigned        normshift;
2N/A/
2N/A/       /* normalize by shifting x and y so MSB(y) == 1 */
2N/A/       HIBIT(HI(y), normshift);    /* index of highest 1 bit */
2N/A/       normshift = 31 - normshift;
2N/A/
2N/A/       if (normshift == 0) {
2N/A/           /* no shifting needed, and x < 2*y so q <= 1 */
2N/A/           y1 = HI(y);
2N/A/           y0 = LO(y);
2N/A/           x1 = HI(x);
2N/A/           x0 = LO(x);
2N/A/
2N/A/           /* if x >= y then q = 1 (note x1 >= y1) */
2N/A/           if (x1 > y1 || x0 >= y0) {
2N/A/               q0 = 1;
2N/A/               /* subtract y from x to get remainder */
2N/A/               /* A_SUB2(y0, y1, x0, x1); */
2N/A/           } else {
2N/A/               q0 = 0;
2N/A/           }
2N/A/
2N/A/           /* return result */
2N/A/           return (q0);
2N/A/
2N/A/       } else {
2N/A/           /*
2N/A/            * the last case: result is one uint32_t, but we need to
2N/A/            * normalize
2N/A/            */
2N/A/           uint64_t    dt;
2N/A/           uint32_t        t0, t1, x2;
2N/A/
2N/A/           /* normalize y */
2N/A/           dt = (y << normshift);
2N/A/           y1 = HI(dt);
2N/A/           y0 = LO(dt);
2N/A/
2N/A/           /* normalize x (we need 3 uint32_ts!!!) */
2N/A/           x2 = (HI(x) >> (32 - normshift));
2N/A/           dt = (x << normshift);
2N/A/           x1 = HI(dt);
2N/A/           x0 = LO(dt);
2N/A/
2N/A/           /* estimate q0, and reduce x to a two uint32_t value */
2N/A/           A_DIV32(x1, x2, y1, q0, x1);
2N/A/
2N/A/           /* adjust q0 down if too high */
2N/A/           /*
2N/A/            * because of the limited range of x2 we can only be
2N/A/            * one off
2N/A/            */
2N/A/           A_MUL32(y0, q0, t0, t1);
2N/A/           if (t1 > x1 || (t1 == x1 && t0 > x0)) {
2N/A/               q0--;
2N/A/           }
2N/A/           /* return result */
2N/A/           return (q0);
2N/A/       }
2N/A/   }
2N/A/ }
2N/A    ENTRY(UDiv)
2N/A    pushl   %ebp
2N/A    pushl   %edi
2N/A    pushl   %esi
2N/A    subl    $40, %esp
2N/A    movl    %edx, 36(%esp)  / x, x
2N/A    movl    60(%esp), %edx  / y,
2N/A    testl   %edx, %edx  / tmp62
2N/A    movl    %eax, 32(%esp)  / x, x
2N/A    movl    %edx, %ecx  / tmp61, tmp62
2N/A    movl    %edx, %eax  /, tmp61
2N/A    jne .LL26
2N/A    movl    36(%esp), %esi  / x,
2N/A    cmpl    56(%esp), %esi  / y, tmp67
2N/A    movl    %esi, %eax  /, tmp67
2N/A    movl    %esi, %edx  / tmp67, div_hi
2N/A    jb  .LL28
2N/A    movl    %ecx, %edx  / tmp62, div_hi
2N/A    divl    56(%esp)    / y
2N/A    movl    %eax, %ecx  /, q1
2N/A.LL28:
2N/A    xorl    %esi, %esi  / <result>
2N/A    movl    %ecx, %edi  / <result>, <result>
2N/A    movl    32(%esp), %eax  / x, q0
2N/A    xorl    %ecx, %ecx  / q0
2N/A    divl    56(%esp)    / y
2N/A    addl    %eax, %esi  / q0, <result>
2N/A    adcl    %ecx, %edi  / q0, <result>
2N/A.LL25:
2N/A    addl    $40, %esp
2N/A    movl    %esi, %eax  / <result>, <result>
2N/A    popl    %esi
2N/A    movl    %edi, %edx  / <result>, <result>
2N/A    popl    %edi
2N/A    popl    %ebp
2N/A    ret
2N/A    .align  16
2N/A.LL26:
2N/A    movl    36(%esp), %esi  / x,
2N/A    xorl    %edi, %edi
2N/A    movl    %esi, 24(%esp)  / tmp1,
2N/A    movl    %edi, 28(%esp)
2N/A    xorl    %esi, %esi  / <result>
2N/A    xorl    %edi, %edi  / <result>
2N/A    cmpl    %eax, 24(%esp)  / tmp61,
2N/A    jb  .LL25
2N/A    bsrl    %eax,%ebp   / tmp61, normshift
2N/A    movl    $31, %eax   /, tmp85
2N/A    subl    %ebp, %eax  / normshift, normshift
2N/A    jne .LL32
2N/A    movl    24(%esp), %eax  /, x1
2N/A    cmpl    %ecx, %eax  / tmp62, x1
2N/A    movl    56(%esp), %esi  / y, y0
2N/A    movl    32(%esp), %edx  / x, x0
2N/A    ja  .LL34
2N/A    xorl    %eax, %eax  / q0
2N/A    cmpl    %esi, %edx  / y0, x0
2N/A    jb  .LL35
2N/A.LL34:
2N/A    movl    $1, %eax    /, q0
2N/A.LL35:
2N/A    movl    %eax, %esi  / q0, <result>
2N/A    xorl    %edi, %edi  / <result>
2N/A.LL45:
2N/A    addl    $40, %esp
2N/A    movl    %esi, %eax  / <result>, <result>
2N/A    popl    %esi
2N/A    movl    %edi, %edx  / <result>, <result>
2N/A    popl    %edi
2N/A    popl    %ebp
2N/A    ret
2N/A    .align  16
2N/A.LL32:
2N/A    movb    %al, %cl
2N/A    movl    56(%esp), %esi  / y,
2N/A    movl    60(%esp), %edi  / y,
2N/A    shldl   %esi, %edi
2N/A    sall    %cl, %esi
2N/A    andl    $32, %ecx
2N/A    jne .LL43
2N/A.LL40:
2N/A    movl    $32, %ecx   /, tmp96
2N/A    subl    %eax, %ecx  / normshift, tmp96
2N/A    movl    %edi, %edx
2N/A    movl    %edi, 20(%esp)  /, dt
2N/A    movl    24(%esp), %ebp  /, x2
2N/A    xorl    %edi, %edi
2N/A    shrl    %cl, %ebp   / tmp96, x2
2N/A    movl    %esi, 16(%esp)  /, dt
2N/A    movb    %al, %cl
2N/A    movl    32(%esp), %esi  / x, dt
2N/A    movl    %edi, 12(%esp)
2N/A    movl    36(%esp), %edi  / x, dt
2N/A    shldl   %esi, %edi  /, dt, dt
2N/A    sall    %cl, %esi   /, dt
2N/A    andl    $32, %ecx
2N/A    movl    %edx, 8(%esp)
2N/A    je  .LL41
2N/A    movl    %esi, %edi  / dt, dt
2N/A    xorl    %esi, %esi  / dt
2N/A.LL41:
2N/A    xorl    %ecx, %ecx
2N/A    movl    %edi, %eax  / tmp1,
2N/A    movl    %ebp, %edx  / x2,
2N/A    divl    8(%esp)
2N/A    movl    %edx, %ebp  /, x1
2N/A    movl    %ecx, 4(%esp)
2N/A    movl    %eax, %ecx  /, q0
2N/A    movl    16(%esp), %eax  / dt,
2N/A    mull    %ecx    / q0
2N/A    cmpl    %ebp, %edx  / x1, t1
2N/A    movl    %edi, (%esp)
2N/A    movl    %esi, %edi  / dt, x0
2N/A    ja  .LL38
2N/A    je  .LL44
2N/A.LL39:
2N/A    movl    %ecx, %esi  / q0, <result>
2N/A.LL46:
2N/A    xorl    %edi, %edi  / <result>
2N/A    jmp .LL45
2N/A.LL44:
2N/A    cmpl    %edi, %eax  / x0, t0
2N/A    jbe .LL39
2N/A.LL38:
2N/A    decl    %ecx        / q0
2N/A    movl    %ecx, %esi  / q0, <result>
2N/A    jmp .LL46
2N/A.LL43:
2N/A    movl    %esi, %edi
2N/A    xorl    %esi, %esi
2N/A    jmp .LL40
2N/A    SET_SIZE(UDiv)
2N/A
2N/A/*
2N/A * __udiv64
2N/A *
2N/A * Perform division of two unsigned 64-bit quantities, returning the
2N/A * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
2N/A */
2N/A    ENTRY(__udiv64)
2N/A    movl    4(%esp), %eax   / x, x
2N/A    movl    8(%esp), %edx   / x, x
2N/A    pushl   16(%esp)    / y
2N/A    pushl   16(%esp)
2N/A    call    UDiv
2N/A    addl    $8, %esp
2N/A    ret     $16
2N/A    SET_SIZE(__udiv64)
2N/A
2N/A/*
2N/A * __urem64
2N/A *
2N/A * Perform division of two unsigned 64-bit quantities, returning the
2N/A * remainder in %edx:%eax.  __urem64 pops the arguments on return
2N/A */
2N/A    ENTRY(__urem64)
2N/A    subl    $12, %esp
2N/A    movl    %esp, %ecx  /, tmp65
2N/A    movl    16(%esp), %eax  / x, x
2N/A    movl    20(%esp), %edx  / x, x
2N/A    pushl   %ecx        / tmp65
2N/A    pushl   32(%esp)    / y
2N/A    pushl   32(%esp)
2N/A    call    UDivRem
2N/A    movl    12(%esp), %eax  / rem, rem
2N/A    movl    16(%esp), %edx  / rem, rem
2N/A    addl    $24, %esp
2N/A    ret $16
2N/A    SET_SIZE(__urem64)
2N/A
2N/A/*
2N/A * __div64
2N/A *
2N/A * Perform division of two signed 64-bit quantities, returning the
2N/A * quotient in %edx:%eax.  __div64 pops the arguments on return.
2N/A */
2N/A/ int64_t
2N/A/ __div64(int64_t x, int64_t y)
2N/A/ {
2N/A/   int     negative;
2N/A/   uint64_t    xt, yt, r;
2N/A/
2N/A/   if (x < 0) {
2N/A/       xt = -(uint64_t) x;
2N/A/       negative = 1;
2N/A/   } else {
2N/A/       xt = x;
2N/A/       negative = 0;
2N/A/   }
2N/A/   if (y < 0) {
2N/A/       yt = -(uint64_t) y;
2N/A/       negative ^= 1;
2N/A/   } else {
2N/A/       yt = y;
2N/A/   }
2N/A/   r = UDiv(xt, yt);
2N/A/   return (negative ? (int64_t) - r : r);
2N/A/ }
2N/A    ENTRY(__div64)
2N/A    pushl   %ebp
2N/A    pushl   %edi
2N/A    pushl   %esi
2N/A    subl    $8, %esp
2N/A    movl    28(%esp), %edx  / x, x
2N/A    testl   %edx, %edx  / x
2N/A    movl    24(%esp), %eax  / x, x
2N/A    movl    32(%esp), %esi  / y, y
2N/A    movl    36(%esp), %edi  / y, y
2N/A    js  .LL84
2N/A    xorl    %ebp, %ebp  / negative
2N/A    testl   %edi, %edi  / y
2N/A    movl    %eax, (%esp)    / x, xt
2N/A    movl    %edx, 4(%esp)   / x, xt
2N/A    movl    %esi, %eax  / y, yt
2N/A    movl    %edi, %edx  / y, yt
2N/A    js  .LL85
2N/A.LL82:
2N/A    pushl   %edx        / yt
2N/A    pushl   %eax        / yt
2N/A    movl    8(%esp), %eax   / xt, xt
2N/A    movl    12(%esp), %edx  / xt, xt
2N/A    call    UDiv
2N/A    popl    %ecx
2N/A    testl   %ebp, %ebp  / negative
2N/A    popl    %esi
2N/A    je  .LL83
2N/A    negl    %eax        / r
2N/A    adcl    $0, %edx    /, r
2N/A    negl    %edx        / r
2N/A.LL83:
2N/A    addl    $8, %esp
2N/A    popl    %esi
2N/A    popl    %edi
2N/A    popl    %ebp
2N/A    ret $16
2N/A    .align  16
2N/A.LL84:
2N/A    negl    %eax        / x
2N/A    adcl    $0, %edx    /, x
2N/A    negl    %edx        / x
2N/A    testl   %edi, %edi  / y
2N/A    movl    %eax, (%esp)    / x, xt
2N/A    movl    %edx, 4(%esp)   / x, xt
2N/A    movl    $1, %ebp    /, negative
2N/A    movl    %esi, %eax  / y, yt
2N/A    movl    %edi, %edx  / y, yt
2N/A    jns .LL82
2N/A    .align  16
2N/A.LL85:
2N/A    negl    %eax        / yt
2N/A    adcl    $0, %edx    /, yt
2N/A    negl    %edx        / yt
2N/A    xorl    $1, %ebp    /, negative
2N/A    jmp .LL82
2N/A    SET_SIZE(__div64)
2N/A
2N/A/*
2N/A * __rem64
2N/A *
2N/A * Perform division of two signed 64-bit quantities, returning the
2N/A * remainder in %edx:%eax.  __rem64 pops the arguments on return.
2N/A */
2N/A/ int64_t
2N/A/ __rem64(int64_t x, int64_t y)
2N/A/ {
2N/A/   uint64_t    xt, yt, rem;
2N/A/
2N/A/   if (x < 0) {
2N/A/       xt = -(uint64_t) x;
2N/A/   } else {
2N/A/       xt = x;
2N/A/   }
2N/A/   if (y < 0) {
2N/A/       yt = -(uint64_t) y;
2N/A/   } else {
2N/A/       yt = y;
2N/A/   }
2N/A/   (void) UDivRem(xt, yt, &rem);
2N/A/   return (x < 0 ? (int64_t) - rem : rem);
2N/A/ }
2N/A    ENTRY(__rem64)
2N/A    pushl   %edi
2N/A    pushl   %esi
2N/A    subl    $20, %esp
2N/A    movl    36(%esp), %ecx  / x,
2N/A    movl    32(%esp), %esi  / x,
2N/A    movl    36(%esp), %edi  / x,
2N/A    testl   %ecx, %ecx
2N/A    movl    40(%esp), %eax  / y, y
2N/A    movl    44(%esp), %edx  / y, y
2N/A    movl    %esi, (%esp)    /, xt
2N/A    movl    %edi, 4(%esp)   /, xt
2N/A    js  .LL92
2N/A    testl   %edx, %edx  / y
2N/A    movl    %eax, %esi  / y, yt
2N/A    movl    %edx, %edi  / y, yt
2N/A    js  .LL93
2N/A.LL90:
2N/A    leal    8(%esp), %eax   /, tmp66
2N/A    pushl   %eax        / tmp66
2N/A    pushl   %edi        / yt
2N/A    pushl   %esi        / yt
2N/A    movl    12(%esp), %eax  / xt, xt
2N/A    movl    16(%esp), %edx  / xt, xt
2N/A    call    UDivRem
2N/A    addl    $12, %esp
2N/A    movl    36(%esp), %edi  / x,
2N/A    testl   %edi, %edi
2N/A    movl    8(%esp), %eax   / rem, rem
2N/A    movl    12(%esp), %edx  / rem, rem
2N/A    js  .LL94
2N/A    addl    $20, %esp
2N/A    popl    %esi
2N/A    popl    %edi
2N/A    ret $16
2N/A    .align  16
2N/A.LL92:
2N/A    negl    %esi
2N/A    adcl    $0, %edi
2N/A    negl    %edi
2N/A    testl   %edx, %edx  / y
2N/A    movl    %esi, (%esp)    /, xt
2N/A    movl    %edi, 4(%esp)   /, xt
2N/A    movl    %eax, %esi  / y, yt
2N/A    movl    %edx, %edi  / y, yt
2N/A    jns .LL90
2N/A    .align  16
2N/A.LL93:
2N/A    negl    %esi        / yt
2N/A    adcl    $0, %edi    /, yt
2N/A    negl    %edi        / yt
2N/A    jmp .LL90
2N/A    .align  16
2N/A.LL94:
2N/A    negl    %eax        / rem
2N/A    adcl    $0, %edx    /, rem
2N/A    addl    $20, %esp
2N/A    popl    %esi
2N/A    negl    %edx        / rem
2N/A    popl    %edi
2N/A    ret $16
2N/A    SET_SIZE(__rem64)
2N/A
2N/A/*
2N/A * __udivrem64
2N/A *
2N/A * Perform division of two unsigned 64-bit quantities, returning the
2N/A * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
2N/A * pops the arguments on return.
2N/A */
2N/A    ENTRY(__udivrem64)
2N/A    subl    $12, %esp
2N/A    movl    %esp, %ecx  /, tmp64
2N/A    movl    16(%esp), %eax  / x, x
2N/A    movl    20(%esp), %edx  / x, x
2N/A    pushl   %ecx        / tmp64
2N/A    pushl   32(%esp)    / y
2N/A    pushl   32(%esp)
2N/A    call    UDivRem
2N/A    movl    16(%esp), %ecx  / rem, tmp63
2N/A    movl    12(%esp), %esi  / rem
2N/A    addl    $24, %esp
2N/A    ret $16
2N/A    SET_SIZE(__udivrem64)
2N/A
2N/A/*
2N/A * Signed division with remainder.
2N/A */
2N/A/ int64_t
2N/A/ SDivRem(int64_t x, int64_t y, int64_t * pmod)
2N/A/ {
2N/A/   int     negative;
2N/A/   uint64_t    xt, yt, r, rem;
2N/A/
2N/A/   if (x < 0) {
2N/A/       xt = -(uint64_t) x;
2N/A/       negative = 1;
2N/A/   } else {
2N/A/       xt = x;
2N/A/       negative = 0;
2N/A/   }
2N/A/   if (y < 0) {
2N/A/       yt = -(uint64_t) y;
2N/A/       negative ^= 1;
2N/A/   } else {
2N/A/       yt = y;
2N/A/   }
2N/A/   r = UDivRem(xt, yt, &rem);
2N/A/   *pmod = (x < 0 ? (int64_t) - rem : rem);
2N/A/   return (negative ? (int64_t) - r : r);
2N/A/ }
2N/A    ENTRY(SDivRem)
2N/A    pushl   %ebp
2N/A    pushl   %edi
2N/A    pushl   %esi
2N/A    subl    $24, %esp
2N/A    testl   %edx, %edx  / x
2N/A    movl    %edx, %edi  / x, x
2N/A    js  .LL73
2N/A    movl    44(%esp), %esi  / y,
2N/A    xorl    %ebp, %ebp  / negative
2N/A    testl   %esi, %esi
2N/A    movl    %edx, 12(%esp)  / x, xt
2N/A    movl    %eax, 8(%esp)   / x, xt
2N/A    movl    40(%esp), %edx  / y, yt
2N/A    movl    44(%esp), %ecx  / y, yt
2N/A    js  .LL74
2N/A.LL70:
2N/A    leal    16(%esp), %eax  /, tmp70
2N/A    pushl   %eax        / tmp70
2N/A    pushl   %ecx        / yt
2N/A    pushl   %edx        / yt
2N/A    movl    20(%esp), %eax  / xt, xt
2N/A    movl    24(%esp), %edx  / xt, xt
2N/A    call    UDivRem
2N/A    movl    %edx, 16(%esp)  /, r
2N/A    movl    %eax, 12(%esp)  /, r
2N/A    addl    $12, %esp
2N/A    testl   %edi, %edi  / x
2N/A    movl    16(%esp), %edx  / rem, rem
2N/A    movl    20(%esp), %ecx  / rem, rem
2N/A    js  .LL75
2N/A.LL71:
2N/A    movl    48(%esp), %edi  / pmod, pmod
2N/A    testl   %ebp, %ebp  / negative
2N/A    movl    %edx, (%edi)    / rem,* pmod
2N/A    movl    %ecx, 4(%edi)   / rem,
2N/A    movl    (%esp), %eax    / r, r
2N/A    movl    4(%esp), %edx   / r, r
2N/A    je  .LL72
2N/A    negl    %eax        / r
2N/A    adcl    $0, %edx    /, r
2N/A    negl    %edx        / r
2N/A.LL72:
2N/A    addl    $24, %esp
2N/A    popl    %esi
2N/A    popl    %edi
2N/A    popl    %ebp
2N/A    ret
2N/A    .align  16
2N/A.LL73:
2N/A    negl    %eax
2N/A    adcl    $0, %edx
2N/A    movl    44(%esp), %esi  / y,
2N/A    negl    %edx
2N/A    testl   %esi, %esi
2N/A    movl    %edx, 12(%esp)  /, xt
2N/A    movl    %eax, 8(%esp)   /, xt
2N/A    movl    $1, %ebp    /, negative
2N/A    movl    40(%esp), %edx  / y, yt
2N/A    movl    44(%esp), %ecx  / y, yt
2N/A    jns .LL70
2N/A    .align  16
2N/A.LL74:
2N/A    negl    %edx        / yt
2N/A    adcl    $0, %ecx    /, yt
2N/A    negl    %ecx        / yt
2N/A    xorl    $1, %ebp    /, negative
2N/A    jmp .LL70
2N/A    .align  16
2N/A.LL75:
2N/A    negl    %edx        / rem
2N/A    adcl    $0, %ecx    /, rem
2N/A    negl    %ecx        / rem
2N/A    jmp .LL71
2N/A    SET_SIZE(SDivRem)
2N/A
2N/A/*
2N/A * __divrem64
2N/A *
2N/A * Perform division of two signed 64-bit quantities, returning the
2N/A * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
2N/A * pops the arguments on return.
2N/A */
2N/A    ENTRY(__divrem64)
2N/A    subl    $20, %esp
2N/A    movl    %esp, %ecx  /, tmp64
2N/A    movl    24(%esp), %eax  / x, x
2N/A    movl    28(%esp), %edx  / x, x
2N/A    pushl   %ecx        / tmp64
2N/A    pushl   40(%esp)    / y
2N/A    pushl   40(%esp)
2N/A    call    SDivRem
2N/A    movl    16(%esp), %ecx
2N/A    movl    12(%esp),%esi   / rem
2N/A    addl    $32, %esp
2N/A    ret $16
2N/A    SET_SIZE(__divrem64)