sgs/rtld.4.x/div.s

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

!   .seg    "data"
!   .asciz  "Copyr 1986 Sun Micro"
    .seg    "text"

#ident  "%Z%%M% %I% %E% SMI"

/*
 * Copyright 1986 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * divison/remainder
 *
 * Input is:
 *  dividend -- the thing being divided
 * divisor  -- how many ways to divide
 * Important parameters:
 *  N -- how many bits per iteration we try to get
 *      as our current guess:
 *  WORDSIZE -- how many bits altogether we're talking about:
 *      obviously:
 * A derived constant:
 *  TOPBITS -- how many bits are in the top "decade" of a number:
 *
 * Important variables are:
 *  Q -- the partial quotient under development -- initally 0
 *  R -- the remainder so far -- initially == the dividend
 *  ITER -- number of iterations of the main division loop will
 *      be required. Equal to CEIL( lg2(quotient)/4 )
 *      Note that this is log_base_(2^4) of the quotient.
 *  V -- the current comparand -- initially divisor*2^(ITER*4-1)
 * Cost:
 *  current estimate for non-large dividend is
 *      CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
 *  a large dividend is one greater than 2^(31-4 ) and takes a
 *  different path, as the upper bits of the quotient must be developed
 *  one bit at a time.
 */

#include <sys/trap.h>
#include <sys/asm_linkage.h>


    ! working variable


/*
 * this is the recursive definition of how we develop quotient digits.
 * it takes three important parameters:
 *  $1 -- the current depth, 1<=$1<=4
 *  $2 -- the current accumulation of quotient bits
 *  4  -- max depth
 * We add a new bit to $2 and either recurse or
 * insert the bits in the quotient.
 * Dynamic input:
 *  %o3 -- current remainder
 *  %o2 -- current quotient
 *  %o5 -- current comparand
 *  cc -- set on current value of %o3
 * Dynamic output:
 * %o3', %o2', %o5', cc'
 */


!   RTENTRY(.udiv)      ! unsigned divide
    .global .udiv
.udiv:
    b   divide
    mov 0,%g1       ! result always positive

!   RTENTRY(.div)       ! SIGNED DIVIDE
    .global .div
.div:
    orcc    %o1,%o0,%g0 ! are either %o0 or %o1 negative
    bge divide      ! if not, skip this junk
    xor %o1,%o0,%g1 ! record sign of result in sign of %g1
        tst %o1
        bge 2f
        tst %o0
    !   %o1 < 0
        bge divide
        neg %o1
    2:
    !   %o0 < 0
        neg %o0
    !   FALL THROUGH


divide:
!   compute size of quotient, scale comparand
    orcc    %o1,%g0,%o5 ! movcc %o1,%o5
    bnz 0f      ! if %o1 != 0
    mov %o0,%o3
    ba  zero_divide
    nop
0:
    cmp     %o3,%o5
    blu     got_result ! if %o3<%o5 already, there's no point in continuing
    mov 0,%o2
    sethi   %hi(1<<(32-4 -1)),%g2
    cmp %o3,%g2
    blu not_really_big
    mov 0,%o4
    !
    ! here, the %o0 is >= 2^(31-4) or so. We must be careful here, as
    ! our usual 4-at-a-shot divide step will cause overflow and havoc. The
    ! total number of bits in the result here is 4*%o4+%g3, where %g3 <= 4.
    ! compute %o4, in an unorthodox manner: know we need to Shift %o5 into
    !   the top decade: so don't even bother to compare to %o3.
    1:
        cmp %o5,%g2
        bgeu    3f
        mov 1,%g3
        sll %o5,4,%o5
        b   1b
        inc %o4
    ! now compute %g3
    2:  addcc   %o5,%o5,%o5
        bcc not_too_big ! bcc   not_too_big
        add %g3,1,%g3
            !
            ! here if the %o1 overflowed when Shifting
            ! this means that %o3 has the high-order bit set
            ! restore %o5 and subtract from %o3
            sll %g2,4 ,%g2 ! high order bit
            srl %o5,1,%o5 ! rest of %o5
            add %o5,%g2,%o5
            b   do_single_div
            sub %g3,1,%g3
    not_too_big:
    3:  cmp %o5,%o3
        blu 2b
        nop
        be  do_single_div
        nop
    ! %o5 > %o3: went too far: back up 1 step
    !   srl %o5,1,%o5
    !   dec %g3
    ! do single-bit divide steps
    !
    ! we have to be careful here. We know that %o3 >= %o5, so we can do the
    ! first divide step without thinking. BUT, the others are conditional,
    ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
    ! order bit set in the first step, just falling into the regular
    ! division loop will mess up the first time around.
    ! So we unroll slightly...
    do_single_div:
        deccc   %g3
        bl  end_regular_divide
        nop
        sub %o3,%o5,%o3
        mov 1,%o2
        b,a end_single_divloop
    single_divloop:
        sll %o2,1,%o2
        bl  1f
        srl %o5,1,%o5
        ! %o3 >= 0
        sub %o3,%o5,%o3
        b   2f
        inc %o2
    1:  ! %o3 < 0
        add %o3,%o5,%o3
        dec %o2
    2:
    end_single_divloop:
        deccc   %g3
        bge single_divloop
        tst %o3
        b,a end_regular_divide

not_really_big:
1:
    sll %o5,4,%o5
    cmp %o5,%o3
    bleu    1b
    inccc   %o4
    be  got_result
    dec %o4
do_regular_divide:

!   do the main division iteration
    tst %o3
!   fall through into divide loop
divloop:
    sll %o2,4,%o2
        !depth 1, accumulated bits 0
    bl  L.1.16
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
            !depth 2, accumulated bits 1
    bl  L.2.17
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
            !depth 3, accumulated bits 3
    bl  L.3.19
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
            !depth 4, accumulated bits 7
    bl  L.4.23
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
        b   9f
        add %o2, (7*2+1), %o2

L.4.23: ! remainder is negative
    addcc   %o3,%o5,%o3
        b   9f
        add %o2, (7*2-1), %o2


L.3.19: ! remainder is negative
    addcc   %o3,%o5,%o3
            !depth 4, accumulated bits 5
    bl  L.4.21
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
        b   9f
        add %o2, (5*2+1), %o2

L.4.21: ! remainder is negative
    addcc   %o3,%o5,%o3
        b   9f
        add %o2, (5*2-1), %o2


L.2.17: ! remainder is negative
    addcc   %o3,%o5,%o3
            !depth 3, accumulated bits 1
    bl  L.3.17
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
            !depth 4, accumulated bits 3
    bl  L.4.19
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
        b   9f
        add %o2, (3*2+1), %o2

L.4.19: ! remainder is negative
    addcc   %o3,%o5,%o3
        b   9f
        add %o2, (3*2-1), %o2


L.3.17: ! remainder is negative
    addcc   %o3,%o5,%o3
            !depth 4, accumulated bits 1
    bl  L.4.17
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
        b   9f
        add %o2, (1*2+1), %o2

L.4.17: ! remainder is negative
    addcc   %o3,%o5,%o3
        b   9f
        add %o2, (1*2-1), %o2


L.1.16: ! remainder is negative
    addcc   %o3,%o5,%o3
            !depth 2, accumulated bits -1
    bl  L.2.15
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
            !depth 3, accumulated bits -1
    bl  L.3.15
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
            !depth 4, accumulated bits -1
    bl  L.4.15
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
        b   9f
        add %o2, (-1*2+1), %o2

L.4.15: ! remainder is negative
    addcc   %o3,%o5,%o3
        b   9f
        add %o2, (-1*2-1), %o2


L.3.15: ! remainder is negative
    addcc   %o3,%o5,%o3
            !depth 4, accumulated bits -3
    bl  L.4.13
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
        b   9f
        add %o2, (-3*2+1), %o2

L.4.13: ! remainder is negative
    addcc   %o3,%o5,%o3
        b   9f
        add %o2, (-3*2-1), %o2


L.2.15: ! remainder is negative
    addcc   %o3,%o5,%o3
            !depth 3, accumulated bits -3
    bl  L.3.13
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
            !depth 4, accumulated bits -5
    bl  L.4.11
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
        b   9f
        add %o2, (-5*2+1), %o2

L.4.11: ! remainder is negative
    addcc   %o3,%o5,%o3
        b   9f
        add %o2, (-5*2-1), %o2


L.3.13: ! remainder is negative
    addcc   %o3,%o5,%o3
            !depth 4, accumulated bits -7
    bl  L.4.9
    srl %o5,1,%o5
    ! remainder is positive
    subcc   %o3,%o5,%o3
        b   9f
        add %o2, (-7*2+1), %o2

L.4.9:  ! remainder is negative
    addcc   %o3,%o5,%o3
        b   9f
        add %o2, (-7*2-1), %o2


    9:

end_regular_divide:
    deccc   %o4
    bge divloop
    tst %o3
    bl,a    got_result
    dec %o2


got_result:
    tst %g1
    bl,a    1f
    neg %o2 ! quotient  <- -%o2

1:
    retl
    mov %o2,%o0 ! quotient  <-  %o2


zero_divide:
    ta  ST_DIV0     ! divide by zero trap
    retl            ! if handled, ignored, return
    mov 0, %o0