__quad_mag64.s revision 7c478bd95313f5f23a4c958a745db2134aa03244
!
! Copyright 2005 Sun Microsystems, Inc. All rights reserved.
! Use is subject to license terms.
!
! CDDL HEADER START
!
! The contents of this file are subject to the terms of the
! Common Development and Distribution License, Version 1.0 only
! (the "License"). You may not use this file except in compliance
! with the License.
!
! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
! or http://www.opensolaris.org/os/licensing.
! See the License for the specific language governing permissions
! and limitations under the License.
!
! When distributing Covered Code, include this CDDL HEADER in each
! file and include the License file at usr/src/OPENSOLARIS.LICENSE.
! If applicable, add the following below this CDDL HEADER, with the
! fields enclosed by brackets "[]" replaced with your own identifying
! information: Portions Copyright [yyyy] [name of copyright owner]
!
! CDDL HEADER END
!
.ident "%Z%%M% %I% %E% SMI"
! /*
! * This file contains __quad_mag_add and __quad_mag_sub, the core
! * of the quad precision add and subtract operations.
! */
! SPARC V9 version hand-coded in assembly to use 64-bit integer registers
.file "__quad_mag64.s"
#include <sys/asm_linkage.h>
! union longdouble {
! struct {
! unsigned int msw;
! unsigned int frac2;
! unsigned int frac3;
! unsigned int frac4;
! } l;
! struct {
! unsigned long msll;
! unsigned long frac;
! } ll;
! long double d;
! };
!
! /*
! * __quad_mag_add(x, y, z, fsr)
! *
! * Sets *z = *x + *y, rounded according to the rounding mode in *fsr,
! * and updates the current exceptions in *fsr. This routine assumes
! * *x and *y are finite, with the same sign (i.e., an addition of
! * magnitudes), |*x| >= |*y|, and *z already has its sign bit set.
! */
! void
! __quad_mag_add(const union longdouble *x, const union longdouble *y,
! union longdouble *z, unsigned int *fsr)
! {
! unsigned long lx, ly, frac, sticky;
! unsigned int ex, ey, round, rm;
! int e, uflo;
!
! /* get the leading significand double-words and exponents */
! ex = (x->ll.msll >> 48) & 0x7fff;
! lx = x->ll.msll & ~0xffff000000000000ul;
! if (ex == 0)
! ex = 1;
! else
! lx |= 0x0001000000000000ul;
!
! ey = (y->ll.msll >> 48) & 0x7fff;
! ly = y->ll.msll & ~0xffff000000000000ul;
! if (ey == 0)
! ey = 1;
! else
! ly |= 0x0001000000000000ul;
!
! /* prenormalize y */
! e = (int) ex - (int) ey;
! round = sticky = 0;
! if (e >= 114) {
! frac = x->ll.frac;
! sticky = ly | y->ll.frac;
! } else {
! frac = y->ll.frac;
! if (e >= 64) {
! sticky = frac & 0x7ffffffffffffffful;
! round = frac >> 63;
! frac = ly;
! ly = 0;
! e -= 64;
! }
! if (e) {
! sticky |= round | (frac & ((1ul << (e - 1)) - 1));
! round = (frac >> (e - 1)) & 1;
! frac = (frac >> e) | (ly << (64 - e));
! ly >>= e;
! }
!
! /* add, propagating carries */
! frac += x->ll.frac;
! lx += ly;
! if (frac < x->ll.frac)
! lx++;
!
! /* postnormalize */
! if (lx >= 0x0002000000000000ul) {
! sticky |= round;
! round = frac & 1;
! frac = (frac >> 1) | (lx << 63);
! lx >>= 1;
! ex++;
! }
! }
!
! /* keep track of whether the result before rounding is tiny */
! uflo = (lx < 0x0001000000000000ul);
!
! /* get the rounding mode, fudging directed rounding modes
! as though the result were positive */
! rm = *fsr >> 30;
! if (z->l.msw)
! rm ^= (rm >> 1);
!
! /* see if we need to round */
! if (round | sticky) {
! *fsr |= FSR_NXC;
!
! /* round up if necessary */
! if (rm == FSR_RP || (rm == FSR_RN && round &&
! (sticky || (frac & 1)))) {
! if (++frac == 0)
! if (++lx >= 0x0002000000000000ul) {
! lx >>= 1;
! ex++;
! }
! }
! }
!
! /* check for overflow */
! if (ex >= 0x7fff) {
! /* store the default overflowed result */
! *fsr |= FSR_OFC | FSR_NXC;
! if (rm == FSR_RN || rm == FSR_RP) {
! z->l.msw |= 0x7fff0000;
! z->l.frac2 = 0;
! z->ll.frac = 0;
! } else {
! z->l.msw |= 0x7ffeffff;
! z->l.frac2 = 0xffffffff;
! z->ll.frac = 0xfffffffffffffffful;
! }
! } else {
! /* store the result */
! if (lx >= 0x0001000000000000ul)
! z->l.msw |= (ex << 16);
! z->l.msw |= (lx >> 32) & 0xffff;
! z->l.frac2 = (lx & 0xffffffff);
! z->ll.frac = frac;
!
! /* if the pre-rounded result was tiny and underflow trapping
! is enabled, simulate underflow */
! if (uflo && (*fsr & FSR_UFM))
! *fsr |= FSR_UFC;
! }
! }
ENTRY(__quad_mag_add)
save %sp,-SA(MINFRAME),%sp
sethi %hi(0xffff0000),%g1
sllx %g1,32,%g1 ! g1 = 0xffff000000000000
sethi %hi(0x7fff),%l7
or %l7,%lo(0x7fff),%l7 ! l7 = 0x7fff
ldx [%i0],%o0
srlx %o0,48,%l0
andcc %l0,%l7,%l0 ! l0 = ex
beq,pn %icc,1f
andn %o0,%g1,%o0 ! o0 = lx
ba,pt %icc,2f
sub %o0,%g1,%o0
1:
mov 1,%l0
2:
ldx [%i1],%o1
srlx %o1,48,%l1
andcc %l1,%l7,%l1 ! l1 = ey
beq,pn %icc,1f
andn %o1,%g1,%o1 ! o1 = ly
ba,pt %icc,2f
sub %o1,%g1,%o1
1:
mov 1,%l1
2:
sub %l0,%l1,%l1 ! l1 = e = ex - ey
cmp %l1,114 ! see if we need to prenormalize
bge,pn %icc,1f
mov 0,%l6 ! l6 = round
mov 0,%o7 ! o7 = sticky
cmp %l1,64
bl,pt %icc,3f
ldx [%i1+8],%o2 ! o2 = frac
sllx %o2,1,%o7 ! lop off high order bit
srlx %o2,63,%l6
mov %o1,%o2
mov 0,%o1
sub %l1,64,%l1
3:
tst %l1
beq,pn %icc,4f
sub %l1,1,%l2
mov 1,%o3
sllx %o3,%l2,%o3
sub %o3,1,%o3
and %o3,%o2,%o3
or %o3,%l6,%o3
or %o7,%o3,%o7
srlx %o2,%l2,%o4
and %o4,1,%l6
srlx %o2,%l1,%o2
mov 64,%l3
sub %l3,%l1,%l3
sllx %o1,%l3,%o5
or %o2,%o5,%o2
srlx %o1,%l1,%o1
4:
ldx [%i0+8],%o3
add %o2,%o3,%o2 ! add, propagating carry
cmp %o2,%o3
bgeu,pt %xcc,5f
add %o0,%o1,%o0
add %o0,1,%o0
5:
srlx %o0,49,%o5 ! if sum carried out, postnormalize
tst %o5
beq,pt %icc,2f
nop
or %o7,%l6,%o7
and %o2,1,%l6
srlx %o2,1,%o2
sllx %o0,63,%o3
or %o2,%o3,%o2
srlx %o0,1,%o0
ba,pt %icc,2f
add %l0,1,%l0
1:
ldx [%i0+8],%o2 ! (full prenormalization shift case)
ldx [%i1+8],%o3
or %o1,%o3,%o7
2:
add %o0,%g1,%o1 ! see if sum is tiny
srlx %o1,63,%l2 ! l2 = uflo
ld [%i3],%i4 ! get the rounding mode
srl %i4,30,%l3 ! l3 = rm
ld [%i2],%l4 ! l4 = z->l.msw
tst %l4
beq,pn %icc,1f
srl %l3,1,%l5
xor %l3,%l5,%l3
1:
orcc %o7,%l6,%g0 ! see if we need to round
beq,pn %xcc,1f
andcc %l3,1,%g0
or %i4,1,%i4
bne,pn %icc,1f
tst %l3
bne,pn %icc,2f
tst %l6
beq,pn %icc,1f
and %o2,1,%o3
orcc %o3,%o7,%g0
beq,pn %xcc,1f
nop
2:
addcc %o2,1,%o2 ! round up and check for carry out
bne,pt %xcc,1f
nop
add %o0,1,%o0
srlx %o0,49,%o1
tst %o1
beq,pt %icc,1f
nop
srlx %o0,1,%o0
add %l0,1,%l0
1:
cmp %l0,%l7 ! check for overflow
bge,pn %icc,1f
addcc %o0,%g1,%g0
bl,pn %xcc,2f
sll %l0,16,%l1
or %l4,%l1,%l4
2:
sllx %o0,16,%o1
srlx %o1,48,%o1
or %l4,%o1,%l4
st %l4,[%i2]
st %o0,[%i2+4]
stx %o2,[%i2+8]
tst %l2 ! see if we need to raise underflow
beq,pt %icc,3f
srl %i4,23,%i5
andcc %i5,4,%i5
ba,pt %icc,3f
or %i4,%i5,%i4
1:
andcc %l3,1,%g0
bne,pn %icc,2f
or %i4,9,%i4 ! overflow
sll %l7,16,%l7 ! 7fff00000...
or %l4,%l7,%l4
st %l4,[%i2]
st %g0,[%i2+4]
ba,pt %icc,3f
stx %g0,[%i2+8]
2:
mov -1,%o0 ! 7ffeffff...
sll %l7,16,%l7
add %o0,%l7,%l7
or %l4,%l7,%l4
st %l4,[%i2]
st %o0,[%i2+4]
stx %o0,[%i2+8]
3:
st %i4,[%i3]
ret
restore
SET_SIZE(__quad_mag_add)
! /*
! * __quad_mag_sub(x, y, z, fsr)
! *
! * Sets *z = *x - *y, rounded according to the rounding mode in *fsr,
! * and updates the current exceptions in *fsr. This routine assumes
! * *x and *y are finite, with opposite signs (i.e., a subtraction of
! * magnitudes), |*x| >= |*y|, and *z already has its sign bit set.
! */
! void
! __quad_mag_sub(const union longdouble *x, const union longdouble *y,
! union longdouble *z, unsigned int *fsr)
! {
! unsigned long lx, ly, frac, sticky;
! unsigned int ex, ey, gr, borrow, rm;
! int e;
!
! /* get the leading significand double-words and exponents */
! ex = (x->ll.msll >> 48) & 0x7fff;
! lx = x->ll.msll & ~0xffff000000000000ul;
! if (ex == 0)
! ex = 1;
! else
! lx |= 0x0001000000000000ul;
!
! ey = (y->ll.msll >> 48) & 0x7fff;
! ly = y->ll.msll & ~0xffff000000000000ul;
! if (ey == 0)
! ey = 1;
! else
! ly |= 0x0001000000000000ul;
!
! /* prenormalize y */
! e = (int) ex - (int) ey;
! gr = sticky = 0;
! if (e > 114) {
! sticky = ly | y->ll.frac;
! ly = frac = 0;
! } else {
! frac = y->ll.frac;
! if (e >= 64) {
! gr = frac >> 62;
! sticky = frac << 2;
! frac = ly;
! ly = 0;
! e -= 64;
! }
! if (e > 1) {
! sticky |= gr | (frac & ((1ul << (e - 2)) - 1));
! gr = (frac >> (e - 2)) & 3;
! frac = (frac >> e) | (ly << (64 - e));
! ly >>= e;
! } else if (e == 1) {
! sticky |= (gr & 1);
! gr = (gr >> 1) | ((frac & 1) << 1);
! frac = (frac >> 1) | (ly << 63);
! ly >>= 1;
! }
! }
!
! /* complement guard, round, and sticky as need be */
! gr <<= 1;
! if (sticky)
! gr |= 1;
! gr = (-gr & 7);
! if (gr)
! if (++frac == 0)
! ly++;
!
! /* subtract, propagating borrows */
! frac = x->ll.frac - frac;
! lx -= ly;
! if (frac > x->ll.frac)
! lx--;
!
! /* get the rounding mode */
! rm = *fsr >> 30;
!
! /* handle zero result */
! if (!(lx | frac | gr)) {
! z->l.msw = ((rm == FSR_RM)? 0x80000000 : 0);
! z->l.frac2 = z->l.frac3 = z->l.frac4 = 0;
! return;
! }
!
! /* postnormalize */
! if (lx < 0x0001000000000000ul) {
! /* if cancellation occurred or the exponent is 1,
! the result is exact */
! if (lx < 0x0000800000000000ul || ex == 1) {
! if ((lx | (frac & 0xfffe000000000000ul)) == 0 &&
! ex > 64) {
! lx = frac;
! frac = (unsigned long) gr << 61;
! gr = 0;
! ex -= 64;
! }
! while (lx < 0x0001000000000000ul && ex > 1) {
! lx = (lx << 1) | (frac >> 63);
! frac = (frac << 1) | (gr >> 2);
! gr = 0;
! ex--;
! }
! if (lx >= 0x0001000000000000ul)
! z->l.msw |= (ex << 16);
! z->l.msw |= ((lx >> 32) & 0xffff);
! z->l.frac2 = (lx & 0xffffffff);
! z->ll.frac = frac;
!
! /* if the result is tiny and underflow trapping is
! enabled, simulate underflow */
! if (lx < 0x0001000000000000ul && (*fsr & FSR_UFM))
! *fsr |= FSR_UFC;
! return;
! }
!
! /* otherwise we only borrowed one place */
! lx = (lx << 1) | (frac >> 63);
! frac = (frac << 1) | (gr >> 2);
! gr &= 3;
! ex--;
! }
! else
! gr = (gr >> 1) | (gr & 1);
!
! /* fudge directed rounding modes as though the result were positive */
! if (z->l.msw)
! rm ^= (rm >> 1);
!
! /* see if we need to round */
! if (gr) {
! *fsr |= FSR_NXC;
!
! /* round up if necessary */
! if (rm == FSR_RP || (rm == FSR_RN && (gr & 2) &&
! ((gr & 1) || (frac & 1)))) {
! if (++frac == 0)
! if (++lx >= 0x0002000000000000ul) {
! lx >>= 1;
! ex++;
! }
! }
! }
!
! /* store the result */
! z->l.msw |= (ex << 16) | ((lx >> 32) & 0xffff);
! z->l.frac2 = (lx & 0xffffffff);
! z->ll.frac = frac;
! }
ENTRY(__quad_mag_sub)
save %sp,-SA(MINFRAME),%sp
sethi %hi(0xffff0000),%g1
sllx %g1,32,%g1 ! g1 = 0xffff000000000000
sethi %hi(0x7fff),%l7
or %l7,%lo(0x7fff),%l7 ! l7 = 0x7fff
ldx [%i0],%o0
srlx %o0,48,%l0
andcc %l0,%l7,%l0 ! l0 = ex
beq,pn %icc,1f
andn %o0,%g1,%o0 ! o0 = lx
ba,pt %icc,2f
sub %o0,%g1,%o0
1:
mov 1,%l0
2:
ldx [%i1],%o1
srlx %o1,48,%l1
andcc %l1,%l7,%l1 ! l1 = ey
beq,pn %icc,1f
andn %o1,%g1,%o1 ! o1 = ly
ba,pt %icc,2f
sub %o1,%g1,%o1
1:
mov 1,%l1
2:
sub %l0,%l1,%l1 ! l1 = e = ex - ey
cmp %l1,114 ! see if we need to prenormalize y
bg,pn %icc,1f
mov 0,%l6 ! l6 = gr
mov 0,%o7 ! o7 = sticky
cmp %l1,64
bl,pt %icc,3f
ldx [%i1+8],%o2 ! o2 = frac
srlx %o2,62,%l6
sllx %o2,2,%o7 ! lop off top two bits
mov %o1,%o2
mov 0,%o1
sub %l1,64,%l1
3:
cmp %l1,1
ble,pn %icc,4f
sub %l1,2,%l2 ! shift more than one bit
mov 1,%o3
sllx %o3,%l2,%o3
sub %o3,1,%o3
and %o3,%o2,%o3
or %o3,%l6,%o3
or %o7,%o3,%o7
srlx %o2,%l2,%o4
and %o4,3,%l6
srlx %o2,%l1,%o2
mov 64,%l3
sub %l3,%l1,%l3
sllx %o1,%l3,%o5
or %o2,%o5,%o2
ba,pt %icc,2f
srlx %o1,%l1,%o1
4:
bne,pn %icc,2f
and %l6,1,%o3 ! shift one bit
or %o7,%o3,%o7
and %o2,1,%o4
sllx %o4,1,%o4
srl %l6,1,%l6
or %l6,%o4,%l6
srlx %o2,1,%o2
sllx %o1,63,%o5
or %o2,%o5,%o2
ba,pt %icc,2f
srlx %o1,1,%o1
1:
ldx [%i1+8],%o3 ! (full prenormalization shift case)
or %o1,%o3,%o7
mov 0,%o1
mov 0,%o2
2:
tst %o7 ! complement guard, round, and
beq,pn %xcc,1f ! sticky as need be
sll %l6,1,%l6
or %l6,1,%l6
1:
subcc %g0,%l6,%l6
beq,pn %icc,1f
and %l6,7,%l6
addcc %o2,1,%o2
beq,a,pn %xcc,1f
add %o1,1,%o1
1:
ldx [%i0+8],%o3 ! subtract, propagating borrows
sub %o3,%o2,%o2
cmp %o3,%o2
bgeu,pt %xcc,5f
sub %o0,%o1,%o0
sub %o0,1,%o0
5:
ld [%i3],%i4 ! get the rounding mode
srl %i4,30,%l3 ! l3 = rm
or %o0,%o2,%o1 ! look for zero result
orcc %o1,%l6,%g0
bne,pt %xcc,1f
srl %l3,1,%l4
and %l3,%l4,%l4
sll %l4,31,%l4
st %l4,[%i2]
st %g0,[%i2+4]
stx %g0,[%i2+8]
ret
restore
1:
addcc %o0,%g1,%g0 ! postnormalize
bl,pt %xcc,1f
ld [%i2],%l4 ! l4 = z->l.msw
and %l6,1,%l5 ! (no cancellation or borrow case)
srl %l6,1,%l6
ba,pt %icc,2f
or %l6,%l5,%l6
1:
srax %g1,1,%o7
addcc %o0,%o7,%g0
bl,pn %xcc,1f
cmp %l0,1
beq,pt %icc,1f
srlx %o2,63,%o3 ! borrowed one place
sllx %o0,1,%o0
or %o0,%o3,%o0
srl %l6,2,%o4
sllx %o2,1,%o2
or %o2,%o4,%o2
and %l6,3,%l6
ba,pt %icc,2f
sub %l0,1,%l0
1:
srlx %o2,49,%o3 ! cancellation or tiny result
orcc %o0,%o3,%g0
bne,pt %xcc,1f
cmp %l0,64
ble,pn %icc,1f
nop
mov %o2,%o0
sllx %l6,61,%o2
mov 0,%l6
sub %l0,64,%l0
1:
addcc %o0,%g1,%g0 ! normalization loop
bge,pn %xcc,1f
cmp %l0,1
ble,pn %icc,1f
srl %l6,2,%l6
srlx %o2,63,%o3
sllx %o0,1,%o0
or %o0,%o3,%o0
sllx %o2,1,%o2
or %o2,%l6,%o2
ba,pt %icc,1b
sub %l0,1,%l0
1:
sllx %o0,16,%o1
srlx %o1,48,%l5
or %l4,%l5,%l4
addcc %o0,%g1,%g0 ! see if result is tiny
bl,pn %xcc,1f
sll %l0,16,%l5
or %l4,%l5,%l4
1:
st %l4,[%i2]
st %o0,[%i2+4]
bge,pt %xcc,1f
stx %o2,[%i2+8]
srl %i4,23,%i5
andcc %i5,4,%g0 ! see if we need to raise underflow
beq,pt %icc,1f
or %i4,4,%i4
st %i4,[%i3]
1:
ret
restore
2:
tst %l4 ! fudge directect rounding modes
beq,pn %icc,1f
srl %l3,1,%l5
xor %l3,%l5,%l3
1:
tst %l6 ! see if we need to round
beq,pn %icc,1f
or %i4,1,%i4
st %i4,[%i3]
andcc %l3,1,%g0
bne,pn %icc,1f
tst %l3
bne,pn %icc,2f
andcc %l6,2,%g0
beq,pn %icc,1f
or %l6,%o2,%o3
andcc %o3,1,%o3
beq,pn %xcc,1f
nop
2:
addcc %o2,1,%o2 ! round up and check for carry
bne,pt %xcc,1f
nop
add %o0,1,%o0
srlx %o0,49,%o1
tst %o1
beq,pt %icc,1f
nop
srlx %o0,1,%o0
add %l0,1,%l0
1:
sllx %o0,16,%o1
srlx %o1,48,%o1
or %l4,%o1,%l4
sll %l0,16,%l5
or %l4,%l5,%l4
st %l4,[%i2]
st %o0,[%i2+4]
stx %o2,[%i2+8]
ret
restore
SET_SIZE(__quad_mag_sub)