__vsin.S revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
.file "__vsin.S"
#include "libm.h"
RO_DATA
.align 64
constants:
.word 0x3ec718e3,0xa6972785
.word 0x3ef9fd39,0x94293940
.word 0xbf2a019f,0x75ee4be1
.word 0xbf56c16b,0xba552569
.word 0x3f811111,0x1108c703
.word 0x3fa55555,0x554f5b35
.word 0xbfc55555,0x555554d0
.word 0xbfdfffff,0xffffff85
.word 0x3ff00000,0x00000000
.word 0xbfc55555,0x5551fc28
.word 0x3f811107,0x62eacc9d
.word 0xbfdfffff,0xffff6328
.word 0x3fa55551,0x5f7acf0c
.word 0x3fe45f30,0x6dc9c883
.word 0x43380000,0x00000000
.word 0x3ff921fb,0x54400000
.word 0x3dd0b461,0x1a600000
.word 0x3ba3198a,0x2e000000
.word 0x397b839a,0x252049c1
.word 0x80000000,0x00004000
.word 0xffff8000,0x00000000 ! N.B.: low-order words used
.word 0x3fc90000,0x80000000 ! for sign bit hacking; see
.word 0x3fc40000,0x00000000 ! references to "thresh" below
#define p4 0x0
#define q4 0x08
#define p3 0x10
#define q3 0x18
#define p2 0x20
#define q2 0x28
#define p1 0x30
#define q1 0x38
#define one 0x40
#define pp1 0x48
#define pp2 0x50
#define qq1 0x58
#define qq2 0x60
#define invpio2 0x68
#define round 0x70
#define pio2_1 0x78
#define pio2_2 0x80
#define pio2_3 0x88
#define pio2_3t 0x90
#define f30val 0x98
#define mask 0xa0
#define thresh 0xa8
! local storage indices
#define xsave STACK_BIAS-0x8
#define ysave STACK_BIAS-0x10
#define nsave STACK_BIAS-0x14
#define sxsave STACK_BIAS-0x18
#define sysave STACK_BIAS-0x1c
#define biguns STACK_BIAS-0x20
#define n2 STACK_BIAS-0x24
#define n1 STACK_BIAS-0x28
#define n0 STACK_BIAS-0x2c
#define x2_1 STACK_BIAS-0x40
#define x1_1 STACK_BIAS-0x50
#define x0_1 STACK_BIAS-0x60
#define y2_0 STACK_BIAS-0x70
#define y1_0 STACK_BIAS-0x80
#define y0_0 STACK_BIAS-0x90
! sizeof temp storage - must be a multiple of 16 for V9
#define tmps 0x90
!--------------------------------------------------------------
! Some defines to keep code more readable
#define LIM_l6 %l6
! in primary range, contains |x| upper limit when cos(x)=1.
! in transferring to medium range, denotes what loop was active.
!--------------------------------------------------------------
ENTRY(__vsin)
save %sp,-SA(MINFRAME)-tmps,%sp
PIC_SETUP(g5)
PIC_SET(g5,__vlibm_TBL_sincos_hi,l3)
PIC_SET(g5,__vlibm_TBL_sincos_lo,l4)
PIC_SET(g5,constants,l5)
mov %l5,%g1
wr %g0,0x82,%asi ! set %asi for non-faulting loads
! ========== primary range ==========
! register use
! i0 n
! i1 x
! i2 stridex
! i3 y
! i4 stridey
! i5 0x80000000
! l0 hx0
! l1 hx1
! l2 hx2
! l3 __vlibm_TBL_sincos_hi
! l4 __vlibm_TBL_sincos_lo
! l5 0x3fc90000
! l6 0x3e400000
! l7 0x3fe921fb
! the following are 64-bit registers in both V8+ and V9
! g1 scratch
! g5
! o0 py0
! o1 py1
! o2 py2
! o3 oy0
! o4 oy1
! o5 oy2
! o7 scratch
! f0 x0
! f2
! f4
! f6
! f8 scratch for table base
! f9 signbit0
! f10 x1
! f12
! f14
! f16
! f18 scratch for table base
! f19 signbit1
! f20 x2
! f22
! f24
! f26
! f28 scratch for table base
! f29 signbit2
! f30 0x80000000
! f31 0x4000
! f32
! f34
! f36
! f38
! f40
! f42
! f44 0xffff800000000000
! f46 p1
! f48 p2
! f50 p3
! f52 p4
! f54 one
! f56 pp1
! f58 pp2
! f60 qq1
! f62 qq2
#ifdef __sparcv9
stx %i1,[%fp+xsave] ! save arguments
stx %i3,[%fp+ysave]
#else
st %i1,[%fp+xsave] ! save arguments
st %i3,[%fp+ysave]
#endif
st %i0,[%fp+nsave]
st %i2,[%fp+sxsave]
st %i4,[%fp+sysave]
sethi %hi(0x80000000),%i5 ! load/set up constants
sethi %hi(0x3fc90000),%l5
sethi %hi(0x3e400000),LIM_l6
sethi %hi(0x3fe921fb),%l7
or %l7,%lo(0x3fe921fb),%l7
ldd [%g1+f30val],%f30
ldd [%g1+mask],%f44
ldd [%g1+p1],%f46
ldd [%g1+p2],%f48
ldd [%g1+p3],%f50
ldd [%g1+p4],%f52
ldd [%g1+one],%f54
ldd [%g1+pp1],%f56
ldd [%g1+pp2],%f58
ldd [%g1+qq1],%f60
ldd [%g1+qq2],%f62
sll %i2,3,%i2 ! scale strides
sll %i4,3,%i4
add %fp,x0_1,%o3 ! precondition loop
add %fp,x0_1,%o4
add %fp,x0_1,%o5
ld [%i1],%l0 ! hx = *x
ld [%i1],%f0
ld [%i1+4],%f1
andn %l0,%i5,%l0 ! hx &= ~0x80000000
add %i1,%i2,%i1 ! x += stridex
ba,pt %icc,.loop0
! delay slot
nop
.align 32
.loop0:
lda [%i1]%asi,%l1 ! preload next argument
sub %l0,LIM_l6,%g1
sub %l7,%l0,%o7
fands %f0,%f30,%f9 ! save signbit
lda [%i1]%asi,%f10
orcc %o7,%g1,%g0
mov %i3,%o0 ! py0 = y
bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb
! delay slot
lda [%i1+4]%asi,%f11
addcc %i0,-1,%i0
add %i3,%i4,%i3 ! y += stridey
ble,pn %icc,.endloop1
! delay slot
andn %l1,%i5,%l1
add %i1,%i2,%i1 ! x += stridex
fabsd %f0,%f0
fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only
.loop1:
lda [%i1]%asi,%l2 ! preload next argument
sub %l1,LIM_l6,%g1
sub %l7,%l1,%o7
fands %f10,%f30,%f19 ! save signbit
lda [%i1]%asi,%f20
orcc %o7,%g1,%g0
mov %i3,%o1 ! py1 = y
bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb
! delay slot
lda [%i1+4]%asi,%f21
addcc %i0,-1,%i0
add %i3,%i4,%i3 ! y += stridey
ble,pn %icc,.endloop2
! delay slot
andn %l2,%i5,%l2
add %i1,%i2,%i1 ! x += stridex
fabsd %f10,%f10
fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only
.loop2:
st %f6,[%o3]
sub %l2,LIM_l6,%g1
sub %l7,%l2,%o7
fands %f20,%f30,%f29 ! save signbit
st %f7,[%o3+4]
orcc %g1,%o7,%g0
mov %i3,%o2 ! py2 = y
bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb
! delay slot
add %i3,%i4,%i3 ! y += stridey
cmp %l0,%l5
fabsd %f20,%f20
bl,pn %icc,.case4
! delay slot
st %f16,[%o4]
cmp %l1,%l5
fpadd32s %f0,%f31,%f8
bl,pn %icc,.case2
! delay slot
st %f17,[%o4+4]
cmp %l2,%l5
fpadd32s %f10,%f31,%f18
bl,pn %icc,.case1
! delay slot
st %f26,[%o5]
mov %o0,%o3
sethi %hi(0x3fc3c000),%o7
fpadd32s %f20,%f31,%f28
st %f27,[%o5+4]
fand %f8,%f44,%f2
mov %o1,%o4
fand %f18,%f44,%f12
mov %o2,%o5
sub %l0,%o7,%l0
fand %f28,%f44,%f22
sub %l1,%o7,%l1
sub %l2,%o7,%l2
fsubd %f0,%f2,%f0
srl %l0,10,%l0
add %l3,8,%g1
fsubd %f10,%f12,%f10
srl %l1,10,%l1
fsubd %f20,%f22,%f20
srl %l2,10,%l2
fmuld %f0,%f0,%f2
andn %l0,0x1f,%l0
fmuld %f10,%f10,%f12
andn %l1,0x1f,%l1
fmuld %f20,%f20,%f22
andn %l2,0x1f,%l2
fmuld %f2,%f58,%f6
ldd [%l3+%l0],%f32
fmuld %f12,%f58,%f16
ldd [%l3+%l1],%f36
fmuld %f22,%f58,%f26
ldd [%l3+%l2],%f40
faddd %f6,%f56,%f6
fmuld %f2,%f62,%f4
ldd [%g1+%l0],%f34
faddd %f16,%f56,%f16
fmuld %f12,%f62,%f14
ldd [%g1+%l1],%f38
faddd %f26,%f56,%f26
fmuld %f22,%f62,%f24
ldd [%g1+%l2],%f42
fmuld %f2,%f6,%f6
faddd %f4,%f60,%f4
fmuld %f12,%f16,%f16
faddd %f14,%f60,%f14
fmuld %f22,%f26,%f26
faddd %f24,%f60,%f24
faddd %f6,%f54,%f6
fmuld %f2,%f4,%f4
faddd %f16,%f54,%f16
fmuld %f12,%f14,%f14
faddd %f26,%f54,%f26
fmuld %f22,%f24,%f24
fmuld %f0,%f6,%f6
ldd [%l4+%l0],%f2
fmuld %f10,%f16,%f16
ldd [%l4+%l1],%f12
fmuld %f20,%f26,%f26
ldd [%l4+%l2],%f22
fmuld %f4,%f32,%f4
lda [%i1]%asi,%l0 ! preload next argument
fmuld %f14,%f36,%f14
lda [%i1]%asi,%f0
fmuld %f24,%f40,%f24
lda [%i1+4]%asi,%f1
fmuld %f6,%f34,%f6
add %i1,%i2,%i1 ! x += stridex
fmuld %f16,%f38,%f16
fmuld %f26,%f42,%f26
faddd %f6,%f4,%f6
faddd %f16,%f14,%f16
faddd %f26,%f24,%f26
faddd %f6,%f2,%f6
faddd %f16,%f12,%f16
faddd %f26,%f22,%f26
faddd %f6,%f32,%f6
faddd %f16,%f36,%f16
faddd %f26,%f40,%f26
andn %l0,%i5,%l0 ! hx &= ~0x80000000
fors %f6,%f9,%f6
addcc %i0,-1,%i0
fors %f16,%f19,%f16
bg,pt %icc,.loop0
! delay slot
fors %f26,%f29,%f26
ba,pt %icc,.endloop0
! delay slot
nop
.align 32
.case1:
st %f27,[%o5+4]
sethi %hi(0x3fc3c000),%o7
add %l3,8,%g1
fand %f8,%f44,%f2
sub %l0,%o7,%l0
sub %l1,%o7,%l1
fand %f18,%f44,%f12
fmuld %f20,%f20,%f22
fsubd %f0,%f2,%f0
srl %l0,10,%l0
mov %o0,%o3
fsubd %f10,%f12,%f10
srl %l1,10,%l1
mov %o1,%o4
fmuld %f22,%f52,%f24
mov %o2,%o5
fmuld %f0,%f0,%f2
andn %l0,0x1f,%l0
fmuld %f10,%f10,%f12
andn %l1,0x1f,%l1
faddd %f24,%f50,%f24
fmuld %f2,%f58,%f6
ldd [%l3+%l0],%f32
fmuld %f12,%f58,%f16
ldd [%l3+%l1],%f36
fmuld %f22,%f24,%f24
faddd %f6,%f56,%f6
fmuld %f2,%f62,%f4
ldd [%g1+%l0],%f34
faddd %f16,%f56,%f16
fmuld %f12,%f62,%f14
ldd [%g1+%l1],%f38
faddd %f24,%f48,%f24
fmuld %f2,%f6,%f6
faddd %f4,%f60,%f4
fmuld %f12,%f16,%f16
faddd %f14,%f60,%f14
fmuld %f22,%f24,%f24
faddd %f6,%f54,%f6
fmuld %f2,%f4,%f4
faddd %f16,%f54,%f16
fmuld %f12,%f14,%f14
faddd %f24,%f46,%f24
fmuld %f0,%f6,%f6
ldd [%l4+%l0],%f2
fmuld %f10,%f16,%f16
ldd [%l4+%l1],%f12
fmuld %f4,%f32,%f4
lda [%i1]%asi,%l0 ! preload next argument
fmuld %f14,%f36,%f14
lda [%i1]%asi,%f0
fmuld %f6,%f34,%f6
lda [%i1+4]%asi,%f1
fmuld %f16,%f38,%f16
add %i1,%i2,%i1 ! x += stridex
fmuld %f22,%f24,%f24
faddd %f6,%f4,%f6
faddd %f16,%f14,%f16
fmuld %f20,%f24,%f24
faddd %f6,%f2,%f6
faddd %f16,%f12,%f16
faddd %f20,%f24,%f26
faddd %f6,%f32,%f6
faddd %f16,%f36,%f16
andn %l0,%i5,%l0 ! hx &= ~0x80000000
fors %f26,%f29,%f26
addcc %i0,-1,%i0
fors %f6,%f9,%f6
bg,pt %icc,.loop0
! delay slot
fors %f16,%f19,%f16
ba,pt %icc,.endloop0
! delay slot
nop
.align 32
.case2:
st %f26,[%o5]
cmp %l2,%l5
fpadd32s %f20,%f31,%f28
bl,pn %icc,.case3
! delay slot
st %f27,[%o5+4]
sethi %hi(0x3fc3c000),%o7
add %l3,8,%g1
fand %f8,%f44,%f2
sub %l0,%o7,%l0
sub %l2,%o7,%l2
fand %f28,%f44,%f22
fmuld %f10,%f10,%f12
fsubd %f0,%f2,%f0
srl %l0,10,%l0
mov %o0,%o3
fsubd %f20,%f22,%f20
srl %l2,10,%l2
mov %o2,%o5
fmuld %f12,%f52,%f14
mov %o1,%o4
fmuld %f0,%f0,%f2
andn %l0,0x1f,%l0
fmuld %f20,%f20,%f22
andn %l2,0x1f,%l2
faddd %f14,%f50,%f14
fmuld %f2,%f58,%f6
ldd [%l3+%l0],%f32
fmuld %f22,%f58,%f26
ldd [%l3+%l2],%f40
fmuld %f12,%f14,%f14
faddd %f6,%f56,%f6
fmuld %f2,%f62,%f4
ldd [%g1+%l0],%f34
faddd %f26,%f56,%f26
fmuld %f22,%f62,%f24
ldd [%g1+%l2],%f42
faddd %f14,%f48,%f14
fmuld %f2,%f6,%f6
faddd %f4,%f60,%f4
fmuld %f22,%f26,%f26
faddd %f24,%f60,%f24
fmuld %f12,%f14,%f14
faddd %f6,%f54,%f6
fmuld %f2,%f4,%f4
faddd %f26,%f54,%f26
fmuld %f22,%f24,%f24
faddd %f14,%f46,%f14
fmuld %f0,%f6,%f6
ldd [%l4+%l0],%f2
fmuld %f20,%f26,%f26
ldd [%l4+%l2],%f22
fmuld %f4,%f32,%f4
lda [%i1]%asi,%l0 ! preload next argument
fmuld %f24,%f40,%f24
lda [%i1]%asi,%f0
fmuld %f6,%f34,%f6
lda [%i1+4]%asi,%f1
fmuld %f26,%f42,%f26
add %i1,%i2,%i1 ! x += stridex
fmuld %f12,%f14,%f14
faddd %f6,%f4,%f6
faddd %f26,%f24,%f26
fmuld %f10,%f14,%f14
faddd %f6,%f2,%f6
faddd %f26,%f22,%f26
faddd %f10,%f14,%f16
faddd %f6,%f32,%f6
faddd %f26,%f40,%f26
andn %l0,%i5,%l0 ! hx &= ~0x80000000
fors %f16,%f19,%f16
addcc %i0,-1,%i0
fors %f6,%f9,%f6
bg,pt %icc,.loop0
! delay slot
fors %f26,%f29,%f26
ba,pt %icc,.endloop0
! delay slot
nop
.align 32
.case3:
sethi %hi(0x3fc3c000),%o7
add %l3,8,%g1
fand %f8,%f44,%f2
fmuld %f10,%f10,%f12
sub %l0,%o7,%l0
fmuld %f20,%f20,%f22
fsubd %f0,%f2,%f0
srl %l0,10,%l0
mov %o0,%o3
fmuld %f12,%f52,%f14
mov %o1,%o4
fmuld %f22,%f52,%f24
mov %o2,%o5
fmuld %f0,%f0,%f2
andn %l0,0x1f,%l0
faddd %f14,%f50,%f14
faddd %f24,%f50,%f24
fmuld %f2,%f58,%f6
ldd [%l3+%l0],%f32
fmuld %f12,%f14,%f14
fmuld %f22,%f24,%f24
faddd %f6,%f56,%f6
fmuld %f2,%f62,%f4
ldd [%g1+%l0],%f34
faddd %f14,%f48,%f14
faddd %f24,%f48,%f24
fmuld %f2,%f6,%f6
faddd %f4,%f60,%f4
fmuld %f12,%f14,%f14
fmuld %f22,%f24,%f24
faddd %f6,%f54,%f6
fmuld %f2,%f4,%f4
faddd %f14,%f46,%f14
faddd %f24,%f46,%f24
fmuld %f0,%f6,%f6
ldd [%l4+%l0],%f2
fmuld %f4,%f32,%f4
lda [%i1]%asi,%l0 ! preload next argument
fmuld %f12,%f14,%f14
lda [%i1]%asi,%f0
fmuld %f6,%f34,%f6
lda [%i1+4]%asi,%f1
fmuld %f22,%f24,%f24
add %i1,%i2,%i1 ! x += stridex
fmuld %f10,%f14,%f14
faddd %f6,%f4,%f6
fmuld %f20,%f24,%f24
faddd %f10,%f14,%f16
faddd %f6,%f2,%f6
faddd %f20,%f24,%f26
fors %f16,%f19,%f16
andn %l0,%i5,%l0 ! hx &= ~0x80000000
faddd %f6,%f32,%f6
addcc %i0,-1,%i0
fors %f26,%f29,%f26
bg,pt %icc,.loop0
! delay slot
fors %f6,%f9,%f6
ba,pt %icc,.endloop0
! delay slot
nop
.align 32
.case4:
st %f17,[%o4+4]
cmp %l1,%l5
fpadd32s %f10,%f31,%f18
bl,pn %icc,.case6
! delay slot
st %f26,[%o5]
cmp %l2,%l5
fpadd32s %f20,%f31,%f28
bl,pn %icc,.case5
! delay slot
st %f27,[%o5+4]
sethi %hi(0x3fc3c000),%o7
add %l3,8,%g1
fand %f18,%f44,%f12
sub %l1,%o7,%l1
sub %l2,%o7,%l2
fand %f28,%f44,%f22
fmuld %f0,%f0,%f2
fsubd %f10,%f12,%f10
srl %l1,10,%l1
mov %o1,%o4
fsubd %f20,%f22,%f20
srl %l2,10,%l2
mov %o2,%o5
fmovd %f0,%f6
fmuld %f2,%f52,%f4
mov %o0,%o3
fmuld %f10,%f10,%f12
andn %l1,0x1f,%l1
fmuld %f20,%f20,%f22
andn %l2,0x1f,%l2
faddd %f4,%f50,%f4
fmuld %f12,%f58,%f16
ldd [%l3+%l1],%f36
fmuld %f22,%f58,%f26
ldd [%l3+%l2],%f40
fmuld %f2,%f4,%f4
faddd %f16,%f56,%f16
fmuld %f12,%f62,%f14
ldd [%g1+%l1],%f38
faddd %f26,%f56,%f26
fmuld %f22,%f62,%f24
ldd [%g1+%l2],%f42
faddd %f4,%f48,%f4
fmuld %f12,%f16,%f16
faddd %f14,%f60,%f14
fmuld %f22,%f26,%f26
faddd %f24,%f60,%f24
fmuld %f2,%f4,%f4
faddd %f16,%f54,%f16
fmuld %f12,%f14,%f14
faddd %f26,%f54,%f26
fmuld %f22,%f24,%f24
faddd %f4,%f46,%f4
fmuld %f10,%f16,%f16
ldd [%l4+%l1],%f12
fmuld %f20,%f26,%f26
ldd [%l4+%l2],%f22
fmuld %f14,%f36,%f14
lda [%i1]%asi,%l0 ! preload next argument
fmuld %f24,%f40,%f24
lda [%i1]%asi,%f0
fmuld %f16,%f38,%f16
lda [%i1+4]%asi,%f1
fmuld %f26,%f42,%f26
add %i1,%i2,%i1 ! x += stridex
fmuld %f2,%f4,%f4
faddd %f16,%f14,%f16
faddd %f26,%f24,%f26
fmuld %f6,%f4,%f4
faddd %f16,%f12,%f16
faddd %f26,%f22,%f26
faddd %f6,%f4,%f6
faddd %f16,%f36,%f16
faddd %f26,%f40,%f26
andn %l0,%i5,%l0 ! hx &= ~0x80000000
fors %f6,%f9,%f6
addcc %i0,-1,%i0
fors %f16,%f19,%f16
bg,pt %icc,.loop0
! delay slot
fors %f26,%f29,%f26
ba,pt %icc,.endloop0
! delay slot
nop
.align 32
.case5:
sethi %hi(0x3fc3c000),%o7
add %l3,8,%g1
fand %f18,%f44,%f12
fmuld %f0,%f0,%f2
sub %l1,%o7,%l1
fmuld %f20,%f20,%f22
fsubd %f10,%f12,%f10
srl %l1,10,%l1
mov %o1,%o4
fmovd %f0,%f6
fmuld %f2,%f52,%f4
mov %o0,%o3
fmuld %f22,%f52,%f24
mov %o2,%o5
fmuld %f10,%f10,%f12
andn %l1,0x1f,%l1
faddd %f4,%f50,%f4
faddd %f24,%f50,%f24
fmuld %f12,%f58,%f16
ldd [%l3+%l1],%f36
fmuld %f2,%f4,%f4
fmuld %f22,%f24,%f24
faddd %f16,%f56,%f16
fmuld %f12,%f62,%f14
ldd [%g1+%l1],%f38
faddd %f4,%f48,%f4
faddd %f24,%f48,%f24
fmuld %f12,%f16,%f16
faddd %f14,%f60,%f14
fmuld %f2,%f4,%f4
fmuld %f22,%f24,%f24
faddd %f16,%f54,%f16
fmuld %f12,%f14,%f14
faddd %f4,%f46,%f4
faddd %f24,%f46,%f24
fmuld %f10,%f16,%f16
ldd [%l4+%l1],%f12
fmuld %f14,%f36,%f14
lda [%i1]%asi,%l0 ! preload next argument
fmuld %f2,%f4,%f4
lda [%i1]%asi,%f0
fmuld %f16,%f38,%f16
lda [%i1+4]%asi,%f1
fmuld %f22,%f24,%f24
add %i1,%i2,%i1 ! x += stridex
fmuld %f6,%f4,%f4
faddd %f16,%f14,%f16
fmuld %f20,%f24,%f24
faddd %f6,%f4,%f6
faddd %f16,%f12,%f16
faddd %f20,%f24,%f26
fors %f6,%f9,%f6
andn %l0,%i5,%l0 ! hx &= ~0x80000000
faddd %f16,%f36,%f16
addcc %i0,-1,%i0
fors %f26,%f29,%f26
bg,pt %icc,.loop0
! delay slot
fors %f16,%f19,%f16
ba,pt %icc,.endloop0
! delay slot
nop
.align 32
.case6:
st %f27,[%o5+4]
cmp %l2,%l5
fpadd32s %f20,%f31,%f28
bl,pn %icc,.case7
! delay slot
sethi %hi(0x3fc3c000),%o7
add %l3,8,%g1
fand %f28,%f44,%f22
fmuld %f0,%f0,%f2
sub %l2,%o7,%l2
fmuld %f10,%f10,%f12
fsubd %f20,%f22,%f20
srl %l2,10,%l2
mov %o2,%o5
fmovd %f0,%f6
fmuld %f2,%f52,%f4
mov %o0,%o3
fmuld %f12,%f52,%f14
mov %o1,%o4
fmuld %f20,%f20,%f22
andn %l2,0x1f,%l2
faddd %f4,%f50,%f4
faddd %f14,%f50,%f14
fmuld %f22,%f58,%f26
ldd [%l3+%l2],%f40
fmuld %f2,%f4,%f4
fmuld %f12,%f14,%f14
faddd %f26,%f56,%f26
fmuld %f22,%f62,%f24
ldd [%g1+%l2],%f42
faddd %f4,%f48,%f4
faddd %f14,%f48,%f14
fmuld %f22,%f26,%f26
faddd %f24,%f60,%f24
fmuld %f2,%f4,%f4
fmuld %f12,%f14,%f14
faddd %f26,%f54,%f26
fmuld %f22,%f24,%f24
faddd %f4,%f46,%f4
faddd %f14,%f46,%f14
fmuld %f20,%f26,%f26
ldd [%l4+%l2],%f22
fmuld %f24,%f40,%f24
lda [%i1]%asi,%l0 ! preload next argument
fmuld %f2,%f4,%f4
lda [%i1]%asi,%f0
fmuld %f26,%f42,%f26
lda [%i1+4]%asi,%f1
fmuld %f12,%f14,%f14
add %i1,%i2,%i1 ! x += stridex
fmuld %f6,%f4,%f4
faddd %f26,%f24,%f26
fmuld %f10,%f14,%f14
faddd %f6,%f4,%f6
faddd %f26,%f22,%f26
faddd %f10,%f14,%f16
fors %f6,%f9,%f6
andn %l0,%i5,%l0 ! hx &= ~0x80000000
faddd %f26,%f40,%f26
addcc %i0,-1,%i0
fors %f16,%f19,%f16
bg,pt %icc,.loop0
! delay slot
fors %f26,%f29,%f26
ba,pt %icc,.endloop0
! delay slot
nop
.align 32
.case7:
fmuld %f0,%f0,%f2
fmovd %f0,%f6
mov %o0,%o3
fmuld %f10,%f10,%f12
mov %o1,%o4
fmuld %f20,%f20,%f22
mov %o2,%o5
fmuld %f2,%f52,%f4
lda [%i1]%asi,%l0 ! preload next argument
fmuld %f12,%f52,%f14
lda [%i1]%asi,%f0
fmuld %f22,%f52,%f24
lda [%i1+4]%asi,%f1
faddd %f4,%f50,%f4
add %i1,%i2,%i1 ! x += stridex
faddd %f14,%f50,%f14
faddd %f24,%f50,%f24
fmuld %f2,%f4,%f4
fmuld %f12,%f14,%f14
fmuld %f22,%f24,%f24
faddd %f4,%f48,%f4
faddd %f14,%f48,%f14
faddd %f24,%f48,%f24
fmuld %f2,%f4,%f4
fmuld %f12,%f14,%f14
fmuld %f22,%f24,%f24
faddd %f4,%f46,%f4
faddd %f14,%f46,%f14
faddd %f24,%f46,%f24
fmuld %f2,%f4,%f4
fmuld %f12,%f14,%f14
fmuld %f22,%f24,%f24
fmuld %f6,%f4,%f4
fmuld %f10,%f14,%f14
fmuld %f20,%f24,%f24
faddd %f6,%f4,%f6
faddd %f10,%f14,%f16
faddd %f20,%f24,%f26
andn %l0,%i5,%l0 ! hx &= ~0x80000000
fors %f6,%f9,%f6
addcc %i0,-1,%i0
fors %f16,%f19,%f16
bg,pt %icc,.loop0
! delay slot
fors %f26,%f29,%f26
ba,pt %icc,.endloop0
! delay slot
nop
.align 32
.endloop2:
cmp %l1,%l5
bl,pn %icc,1f
! delay slot
fabsd %f10,%f10
sethi %hi(0x3fc3c000),%o7
fpadd32s %f10,%f31,%f18
add %l3,8,%g1
fand %f18,%f44,%f12
sub %l1,%o7,%l1
fsubd %f10,%f12,%f10
srl %l1,10,%l1
fmuld %f10,%f10,%f12
andn %l1,0x1f,%l1
fmuld %f12,%f58,%f20
ldd [%l3+%l1],%f36
faddd %f20,%f56,%f20
fmuld %f12,%f62,%f14
ldd [%g1+%l1],%f38
fmuld %f12,%f20,%f20
faddd %f14,%f60,%f14
faddd %f20,%f54,%f20
fmuld %f12,%f14,%f14
fmuld %f10,%f20,%f20
ldd [%l4+%l1],%f12
fmuld %f14,%f36,%f14
fmuld %f20,%f38,%f20
faddd %f20,%f14,%f20
faddd %f20,%f12,%f20
ba,pt %icc,2f
! delay slot
faddd %f20,%f36,%f20
1:
fmuld %f10,%f10,%f12
fmuld %f12,%f52,%f14
faddd %f14,%f50,%f14
fmuld %f12,%f14,%f14
faddd %f14,%f48,%f14
fmuld %f12,%f14,%f14
faddd %f14,%f46,%f14
fmuld %f12,%f14,%f14
fmuld %f10,%f14,%f14
faddd %f10,%f14,%f20
2:
fors %f20,%f19,%f20
st %f20,[%o1]
st %f21,[%o1+4]
.endloop1:
cmp %l0,%l5
bl,pn %icc,1f
! delay slot
fabsd %f0,%f0
sethi %hi(0x3fc3c000),%o7
fpadd32s %f0,%f31,%f8
add %l3,8,%g1
fand %f8,%f44,%f2
sub %l0,%o7,%l0
fsubd %f0,%f2,%f0
srl %l0,10,%l0
fmuld %f0,%f0,%f2
andn %l0,0x1f,%l0
fmuld %f2,%f58,%f20
ldd [%l3+%l0],%f32
faddd %f20,%f56,%f20
fmuld %f2,%f62,%f4
ldd [%g1+%l0],%f34
fmuld %f2,%f20,%f20
faddd %f4,%f60,%f4
faddd %f20,%f54,%f20
fmuld %f2,%f4,%f4
fmuld %f0,%f20,%f20
ldd [%l4+%l0],%f2
fmuld %f4,%f32,%f4
fmuld %f20,%f34,%f20
faddd %f20,%f4,%f20
faddd %f20,%f2,%f20
ba,pt %icc,2f
! delay slot
faddd %f20,%f32,%f20
1:
fmuld %f0,%f0,%f2
fmuld %f2,%f52,%f4
faddd %f4,%f50,%f4
fmuld %f2,%f4,%f4
faddd %f4,%f48,%f4
fmuld %f2,%f4,%f4
faddd %f4,%f46,%f4
fmuld %f2,%f4,%f4
fmuld %f0,%f4,%f4
faddd %f0,%f4,%f20
2:
fors %f20,%f9,%f20
st %f20,[%o0]
st %f21,[%o0+4]
.endloop0:
st %f6,[%o3]
st %f7,[%o3+4]
st %f16,[%o4]
st %f17,[%o4+4]
st %f26,[%o5]
st %f27,[%o5+4]
! return. finished off with only primary range arguments.
ret
restore
.align 32
.range0:
cmp %l0,LIM_l6
bg,a,pt %icc,.MEDIUM ! branch if x is not tiny
! delay slot, annulled if branch not taken
mov 0x1,LIM_l6 ! set "processing loop0"
st %f0,[%o0] ! *y = *x with inexact if x nonzero
st %f1,[%o0+4]
fdtoi %f0,%f2
addcc %i0,-1,%i0
ble,pn %icc,.endloop0
! delay slot, harmless if branch taken
add %i3,%i4,%i3 ! y += stridey
andn %l1,%i5,%l0 ! hx &= ~0x80000000
fmovd %f10,%f0
ba,pt %icc,.loop0
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 32
.range1:
cmp %l1,LIM_l6
bg,a,pt %icc,.MEDIUM ! branch if x is not tiny
! delay slot, annulled if branch not taken
mov 0x2,LIM_l6 ! set "processing loop1"
st %f10,[%o1] ! *y = *x with inexact if x nonzero
st %f11,[%o1+4]
fdtoi %f10,%f12
addcc %i0,-1,%i0
ble,pn %icc,.endloop1
! delay slot, harmless if branch taken
add %i3,%i4,%i3 ! y += stridey
andn %l2,%i5,%l1 ! hx &= ~0x80000000
fmovd %f20,%f10
ba,pt %icc,.loop1
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 32
.range2:
cmp %l2,LIM_l6
bg,a,pt %icc,.MEDIUM ! branch if x is not tiny
! delay slot, annulled if branch not taken
mov 0x3,LIM_l6 ! set "processing loop2"
st %f20,[%o2] ! *y = *x with inexact if x nonzero
st %f21,[%o2+4]
fdtoi %f20,%f22
1:
addcc %i0,-1,%i0
ble,pn %icc,.endloop2
! delay slot
nop
ld [%i1],%l2
ld [%i1],%f20
ld [%i1+4],%f21
andn %l2,%i5,%l2 ! hx &= ~0x80000000
ba,pt %icc,.loop2
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 32
.MEDIUM:
! ========== medium range ==========
! register use
! i0 n
! i1 x
! i2 stridex
! i3 y
! i4 stridey
! i5 0x80000000
! l0 hx0
! l1 hx1
! l2 hx2
! l3 __vlibm_TBL_sincos_hi
! l4 __vlibm_TBL_sincos_lo
! l5 constants
! l6 in transition from pri-range and here, use for biguns
! l7 0x413921fb
! the following are 64-bit registers in both V8+ and V9
! g1 scratch
! g5
! o0 py0
! o1 py1
! o2 py2
! o3 n0
! o4 n1
! o5 n2
! o7 scratch
! f0 x0
! f2 n0,y0
! f4
! f6
! f8 scratch for table base
! f9 signbit0
! f10 x1
! f12 n1,y1
! f14
! f16
! f18 scratch for table base
! f19 signbit1
! f20 x2
! f22 n2,y2
! f24
! f26
! f28 scratch for table base
! f29 signbit2
! f30 0x80000000
! f31 0x4000
! f32
! f34
! f36
! f38
! f40 invpio2
! f42 round
! f44 0xffff800000000000
! f46 pio2_1
! f48 pio2_2
! f50 pio2_3
! f52 pio2_3t
! f54 one
! f56 pp1
! f58 pp2
! f60 qq1
! f62 qq2
PIC_SET(g5,constants,l5)
! %o3,%o4,%o5 need to be stored
st %f6,[%o3]
sethi %hi(0x413921fb),%l7
st %f7,[%o3+4]
or %l7,%lo(0x413921fb),%l7
st %f16,[%o4]
st %f17,[%o4+4]
st %f26,[%o5]
st %f27,[%o5+4]
ldd [%l5+invpio2],%f40
ldd [%l5+round],%f42
ldd [%l5+pio2_1],%f46
ldd [%l5+pio2_2],%f48
ldd [%l5+pio2_3],%f50
ldd [%l5+pio2_3t],%f52
std %f54,[%fp+x0_1+8] ! set up stack data
std %f54,[%fp+x1_1+8]
std %f54,[%fp+x2_1+8]
stx %g0,[%fp+y0_0+8]
stx %g0,[%fp+y1_0+8]
stx %g0,[%fp+y2_0+8]
! branched here in the middle of the array. Need to adjust
! for the members of the triple that were selected in the primary
! loop.
! no adjustment since all three selected here
subcc LIM_l6,0x1,%g0 ! continue in LOOP0?
bz,a %icc,.LOOP0
mov 0x0,LIM_l6 ! delay slot set biguns=0
! ajust 1st triple since 2d and 3d done here
subcc LIM_l6,0x2,%g0 ! continue in LOOP1?
fors %f0,%f9,%f0 ! restore sign bit
fmuld %f0,%f40,%f2 ! adj LOOP0
bz,a %icc,.LOOP1
mov 0x0,LIM_l6 ! delay slot set biguns=0
! ajust 1st and 2d triple since 3d done here
subcc LIM_l6,0x3,%g0 ! continue in LOOP2?
!done fmuld %f0,%f40,%f2 ! adj LOOP0
sub %i3,%i4,%i3 ! adjust to not double increment
fors %f10,%f19,%f10 ! restore sign bit
fmuld %f10,%f40,%f12 ! adj LOOP1
faddd %f2,%f42,%f2 ! adj LOOP1
bz,a %icc,.LOOP2
mov 0x0,LIM_l6 ! delay slot set biguns=0
.align 32
.LOOP0:
lda [%i1]%asi,%l1 ! preload next argument
mov %i3,%o0 ! py0 = y
lda [%i1]%asi,%f10
cmp %l0,%l7
add %i3,%i4,%i3 ! y += stridey
bg,pn %icc,.BIG0 ! if hx > 0x413921fb
! delay slot
lda [%i1+4]%asi,%f11
addcc %i0,-1,%i0
add %i1,%i2,%i1 ! x += stridex
ble,pn %icc,.ENDLOOP1
! delay slot
andn %l1,%i5,%l1
nop
fmuld %f0,%f40,%f2
fabsd %f54,%f54 ! a nop for alignment only
.LOOP1:
lda [%i1]%asi,%l2 ! preload next argument
mov %i3,%o1 ! py1 = y
lda [%i1]%asi,%f20
cmp %l1,%l7
add %i3,%i4,%i3 ! y += stridey
bg,pn %icc,.BIG1 ! if hx > 0x413921fb
! delay slot
lda [%i1+4]%asi,%f21
addcc %i0,-1,%i0
add %i1,%i2,%i1 ! x += stridex
ble,pn %icc,.ENDLOOP2
! delay slot
andn %l2,%i5,%l2
nop
fmuld %f10,%f40,%f12
faddd %f2,%f42,%f2
.LOOP2:
st %f3,[%fp+n0]
mov %i3,%o2 ! py2 = y
cmp %l2,%l7
add %i3,%i4,%i3 ! y += stridey
fmuld %f20,%f40,%f22
bg,pn %icc,.BIG2 ! if hx > 0x413921fb
! delay slot
add %l5,thresh+4,%o7
faddd %f12,%f42,%f12
st %f13,[%fp+n1]
! -
add %l5,thresh,%g1
faddd %f22,%f42,%f22
st %f23,[%fp+n2]
fsubd %f2,%f42,%f2 ! n
fsubd %f12,%f42,%f12 ! n
fsubd %f22,%f42,%f22 ! n
fmuld %f2,%f46,%f4
fmuld %f12,%f46,%f14
fmuld %f22,%f46,%f24
fsubd %f0,%f4,%f4
fmuld %f2,%f48,%f6
fsubd %f10,%f14,%f14
fmuld %f12,%f48,%f16
fsubd %f20,%f24,%f24
fmuld %f22,%f48,%f26
fsubd %f4,%f6,%f0
ld [%fp+n0],%o3
fsubd %f14,%f16,%f10
ld [%fp+n1],%o4
fsubd %f24,%f26,%f20
ld [%fp+n2],%o5
fsubd %f4,%f0,%f32
and %o3,1,%o3
fsubd %f14,%f10,%f34
and %o4,1,%o4
fsubd %f24,%f20,%f36
and %o5,1,%o5
fsubd %f32,%f6,%f32
fmuld %f2,%f50,%f8
sll %o3,3,%o3
fsubd %f34,%f16,%f34
fmuld %f12,%f50,%f18
sll %o4,3,%o4
fsubd %f36,%f26,%f36
fmuld %f22,%f50,%f28
sll %o5,3,%o5
fsubd %f8,%f32,%f8
ld [%g1+%o3],%f6
fsubd %f18,%f34,%f18
ld [%g1+%o4],%f16
fsubd %f28,%f36,%f28
ld [%g1+%o5],%f26
fsubd %f0,%f8,%f4
fsubd %f10,%f18,%f14
fsubd %f20,%f28,%f24
fsubd %f0,%f4,%f32
fsubd %f10,%f14,%f34
fsubd %f20,%f24,%f36
fsubd %f32,%f8,%f32
fmuld %f2,%f52,%f2
fsubd %f34,%f18,%f34
fmuld %f12,%f52,%f12
fsubd %f36,%f28,%f36
fmuld %f22,%f52,%f22
fsubd %f2,%f32,%f2
ld [%o7+%o3],%f8
fsubd %f12,%f34,%f12
ld [%o7+%o4],%f18
fsubd %f22,%f36,%f22
ld [%o7+%o5],%f28
fsubd %f4,%f2,%f0 ! x
fsubd %f14,%f12,%f10 ! x
fsubd %f24,%f22,%f20 ! x
fsubd %f4,%f0,%f4
fsubd %f14,%f10,%f14
fsubd %f24,%f20,%f24
fands %f0,%f30,%f9 ! save signbit
fands %f10,%f30,%f19 ! save signbit
fands %f20,%f30,%f29 ! save signbit
fabsd %f0,%f0
std %f0,[%fp+x0_1]
fabsd %f10,%f10
std %f10,[%fp+x1_1]
fabsd %f20,%f20
std %f20,[%fp+x2_1]
fsubd %f4,%f2,%f2 ! y
fsubd %f14,%f12,%f12 ! y
fsubd %f24,%f22,%f22 ! y
fcmpgt32 %f6,%f0,%l0
fcmpgt32 %f16,%f10,%l1
fcmpgt32 %f26,%f20,%l2
! -- 16 byte aligned
fxors %f2,%f9,%f2
fxors %f12,%f19,%f12
fxors %f22,%f29,%f22
fands %f9,%f8,%f9 ! if (n & 1) clear sign bit
andcc %l0,2,%g0
bne,pn %icc,.CASE4
! delay slot
fands %f19,%f18,%f19 ! if (n & 1) clear sign bit
andcc %l1,2,%g0
bne,pn %icc,.CASE2
! delay slot
fands %f29,%f28,%f29 ! if (n & 1) clear sign bit
andcc %l2,2,%g0
bne,pn %icc,.CASE1
! delay slot
fpadd32s %f0,%f31,%f8
sethi %hi(0x3fc3c000),%o7
ld [%fp+x0_1],%l0
fpadd32s %f10,%f31,%f18
add %l3,8,%g1
ld [%fp+x1_1],%l1
fpadd32s %f20,%f31,%f28
ld [%fp+x2_1],%l2
fand %f8,%f44,%f4
sub %l0,%o7,%l0
fand %f18,%f44,%f14
sub %l1,%o7,%l1
fand %f28,%f44,%f24
sub %l2,%o7,%l2
fsubd %f0,%f4,%f0
srl %l0,10,%l0
fsubd %f10,%f14,%f10
srl %l1,10,%l1
fsubd %f20,%f24,%f20
srl %l2,10,%l2
faddd %f0,%f2,%f0
andn %l0,0x1f,%l0
faddd %f10,%f12,%f10
andn %l1,0x1f,%l1
faddd %f20,%f22,%f20
andn %l2,0x1f,%l2
fmuld %f0,%f0,%f2
add %l0,%o3,%l0
fmuld %f10,%f10,%f12
add %l1,%o4,%l1
fmuld %f20,%f20,%f22
add %l2,%o5,%l2
fmuld %f2,%f58,%f6
ldd [%l3+%l0],%f32
fmuld %f12,%f58,%f16
ldd [%l3+%l1],%f34
fmuld %f22,%f58,%f26
ldd [%l3+%l2],%f36
faddd %f6,%f56,%f6
fmuld %f2,%f62,%f4
faddd %f16,%f56,%f16
fmuld %f12,%f62,%f14
faddd %f26,%f56,%f26
fmuld %f22,%f62,%f24
fmuld %f2,%f6,%f6
faddd %f4,%f60,%f4
fmuld %f12,%f16,%f16
faddd %f14,%f60,%f14
fmuld %f22,%f26,%f26
faddd %f24,%f60,%f24
faddd %f6,%f54,%f6
fmuld %f2,%f4,%f4
faddd %f16,%f54,%f16
fmuld %f12,%f14,%f14
faddd %f26,%f54,%f26
fmuld %f22,%f24,%f24
fmuld %f0,%f6,%f6
ldd [%g1+%l0],%f2
fmuld %f10,%f16,%f16
ldd [%g1+%l1],%f12
fmuld %f20,%f26,%f26
ldd [%g1+%l2],%f22
fmuld %f4,%f32,%f4
ldd [%l4+%l0],%f0
fmuld %f14,%f34,%f14
ldd [%l4+%l1],%f10
fmuld %f24,%f36,%f24
ldd [%l4+%l2],%f20
fmuld %f6,%f2,%f6
fmuld %f16,%f12,%f16
fmuld %f26,%f22,%f26
faddd %f6,%f4,%f6
faddd %f16,%f14,%f16
faddd %f26,%f24,%f26
faddd %f6,%f0,%f6
faddd %f16,%f10,%f16
faddd %f26,%f20,%f26
faddd %f6,%f32,%f6
faddd %f16,%f34,%f16
faddd %f26,%f36,%f26
.FIXSIGN:
ld [%fp+n0],%o3
add %l5,thresh-4,%g1
ld [%fp+n1],%o4
ld [%fp+n2],%o5
and %o3,2,%o3
sll %o3,2,%o3
and %o4,2,%o4
lda [%i1]%asi,%l0 ! preload next argument
sll %o4,2,%o4
and %o5,2,%o5
ld [%g1+%o3],%f8
sll %o5,2,%o5
ld [%g1+%o4],%f18
ld [%g1+%o5],%f28
fxors %f9,%f8,%f9
lda [%i1]%asi,%f0
fxors %f29,%f28,%f29
lda [%i1+4]%asi,%f1
fxors %f19,%f18,%f19
fors %f6,%f9,%f6 ! tack on sign
add %i1,%i2,%i1 ! x += stridex
st %f6,[%o0]
fors %f26,%f29,%f26 ! tack on sign
st %f7,[%o0+4]
fors %f16,%f19,%f16 ! tack on sign
st %f26,[%o2]
st %f27,[%o2+4]
addcc %i0,-1,%i0
st %f16,[%o1]
andn %l0,%i5,%l0 ! hx &= ~0x80000000
bg,pt %icc,.LOOP0
! delay slot
st %f17,[%o1+4]
ba,pt %icc,.ENDLOOP0
! delay slot
nop
.align 32
.CASE1:
fpadd32s %f10,%f31,%f18
sethi %hi(0x3fc3c000),%o7
ld [%fp+x0_1],%l0
fand %f8,%f44,%f4
add %l3,8,%g1
ld [%fp+x1_1],%l1
fand %f18,%f44,%f14
sub %l0,%o7,%l0
fsubd %f0,%f4,%f0
srl %l0,10,%l0
sub %l1,%o7,%l1
fsubd %f10,%f14,%f10
srl %l1,10,%l1
fmuld %f20,%f20,%f20
ldd [%l5+%o5],%f36
add %l5,%o5,%l2
faddd %f0,%f2,%f0
andn %l0,0x1f,%l0
faddd %f10,%f12,%f10
andn %l1,0x1f,%l1
fmuld %f20,%f36,%f24
ldd [%l2+0x10],%f26
add %fp,%o5,%o5
fmuld %f0,%f0,%f2
add %l0,%o3,%l0
fmuld %f10,%f10,%f12
add %l1,%o4,%l1
faddd %f24,%f26,%f24
ldd [%l2+0x20],%f36
fmuld %f2,%f58,%f6
ldd [%l3+%l0],%f32
fmuld %f12,%f58,%f16
ldd [%l3+%l1],%f34
fmuld %f20,%f24,%f24
ldd [%l2+0x30],%f26
faddd %f6,%f56,%f6
fmuld %f2,%f62,%f4
faddd %f16,%f56,%f16
fmuld %f12,%f62,%f14
faddd %f24,%f36,%f24
ldd [%o5+x2_1],%f36
fmuld %f2,%f6,%f6
faddd %f4,%f60,%f4
fmuld %f12,%f16,%f16
faddd %f14,%f60,%f14
fmuld %f20,%f24,%f24
faddd %f6,%f54,%f6
fmuld %f2,%f4,%f4
ldd [%g1+%l0],%f2
faddd %f16,%f54,%f16
fmuld %f12,%f14,%f14
ldd [%g1+%l1],%f12
faddd %f24,%f26,%f24
fmuld %f0,%f6,%f6
ldd [%l4+%l0],%f0
fmuld %f10,%f16,%f16
ldd [%l4+%l1],%f10
fmuld %f4,%f32,%f4
std %f22,[%fp+y2_0]
fmuld %f14,%f34,%f14
fmuld %f6,%f2,%f6
fmuld %f16,%f12,%f16
fmuld %f20,%f24,%f24
faddd %f6,%f4,%f6
faddd %f16,%f14,%f16
fmuld %f36,%f24,%f24
ldd [%o5+y2_0],%f22
faddd %f6,%f0,%f6
faddd %f16,%f10,%f16
faddd %f24,%f22,%f24
faddd %f6,%f32,%f6
faddd %f16,%f34,%f16
ba,pt %icc,.FIXSIGN
! delay slot
faddd %f36,%f24,%f26
.align 32
.CASE2:
fpadd32s %f0,%f31,%f8
ld [%fp+x0_1],%l0
andcc %l2,2,%g0
bne,pn %icc,.CASE3
! delay slot
sethi %hi(0x3fc3c000),%o7
fpadd32s %f20,%f31,%f28
ld [%fp+x2_1],%l2
fand %f8,%f44,%f4
sub %l0,%o7,%l0
add %l3,8,%g1
fand %f28,%f44,%f24
sub %l2,%o7,%l2
fsubd %f0,%f4,%f0
srl %l0,10,%l0
fsubd %f20,%f24,%f20
srl %l2,10,%l2
fmuld %f10,%f10,%f10
ldd [%l5+%o4],%f34
add %l5,%o4,%l1
faddd %f0,%f2,%f0
andn %l0,0x1f,%l0
faddd %f20,%f22,%f20
andn %l2,0x1f,%l2
fmuld %f10,%f34,%f14
ldd [%l1+0x10],%f16
add %fp,%o4,%o4
fmuld %f0,%f0,%f2
add %l0,%o3,%l0
fmuld %f20,%f20,%f22
add %l2,%o5,%l2
faddd %f14,%f16,%f14
ldd [%l1+0x20],%f34
fmuld %f2,%f58,%f6
ldd [%l3+%l0],%f32
fmuld %f22,%f58,%f26
ldd [%l3+%l2],%f36
fmuld %f10,%f14,%f14
ldd [%l1+0x30],%f16
faddd %f6,%f56,%f6
fmuld %f2,%f62,%f4
faddd %f26,%f56,%f26
fmuld %f22,%f62,%f24
faddd %f14,%f34,%f14
ldd [%o4+x1_1],%f34
fmuld %f2,%f6,%f6
faddd %f4,%f60,%f4
fmuld %f22,%f26,%f26
faddd %f24,%f60,%f24
fmuld %f10,%f14,%f14
faddd %f6,%f54,%f6
fmuld %f2,%f4,%f4
ldd [%g1+%l0],%f2
faddd %f26,%f54,%f26
fmuld %f22,%f24,%f24
ldd [%g1+%l2],%f22
faddd %f14,%f16,%f14
fmuld %f0,%f6,%f6
ldd [%l4+%l0],%f0
fmuld %f20,%f26,%f26
ldd [%l4+%l2],%f20
fmuld %f4,%f32,%f4
std %f12,[%fp+y1_0]
fmuld %f24,%f36,%f24
fmuld %f6,%f2,%f6
fmuld %f26,%f22,%f26
fmuld %f10,%f14,%f14
faddd %f6,%f4,%f6
faddd %f26,%f24,%f26
fmuld %f34,%f14,%f14
ldd [%o4+y1_0],%f12
faddd %f6,%f0,%f6
faddd %f26,%f20,%f26
faddd %f14,%f12,%f14
faddd %f6,%f32,%f6
faddd %f26,%f36,%f26
ba,pt %icc,.FIXSIGN
! delay slot
faddd %f34,%f14,%f16
.align 32
.CASE3:
fand %f8,%f44,%f4
add %l3,8,%g1
sub %l0,%o7,%l0
fmuld %f10,%f10,%f10
ldd [%l5+%o4],%f34
add %l5,%o4,%l1
fsubd %f0,%f4,%f0
srl %l0,10,%l0
fmuld %f20,%f20,%f20
ldd [%l5+%o5],%f36
add %l5,%o5,%l2
fmuld %f10,%f34,%f14
ldd [%l1+0x10],%f16
add %fp,%o4,%o4
faddd %f0,%f2,%f0
andn %l0,0x1f,%l0
fmuld %f20,%f36,%f24
ldd [%l2+0x10],%f26
add %fp,%o5,%o5
faddd %f14,%f16,%f14
ldd [%l1+0x20],%f34
fmuld %f0,%f0,%f2
add %l0,%o3,%l0
faddd %f24,%f26,%f24
ldd [%l2+0x20],%f36
fmuld %f10,%f14,%f14
ldd [%l1+0x30],%f16
fmuld %f2,%f58,%f6
ldd [%l3+%l0],%f32
fmuld %f20,%f24,%f24
ldd [%l2+0x30],%f26
faddd %f14,%f34,%f14
ldd [%o4+x1_1],%f34
faddd %f6,%f56,%f6
fmuld %f2,%f62,%f4
faddd %f24,%f36,%f24
ldd [%o5+x2_1],%f36
fmuld %f10,%f14,%f14
std %f12,[%fp+y1_0]
fmuld %f2,%f6,%f6
faddd %f4,%f60,%f4
fmuld %f20,%f24,%f24
std %f22,[%fp+y2_0]
faddd %f14,%f16,%f14
faddd %f6,%f54,%f6
fmuld %f2,%f4,%f4
ldd [%g1+%l0],%f2
faddd %f24,%f26,%f24
fmuld %f10,%f14,%f14
fmuld %f0,%f6,%f6
ldd [%l4+%l0],%f0
fmuld %f4,%f32,%f4
fmuld %f20,%f24,%f24
fmuld %f6,%f2,%f6
fmuld %f34,%f14,%f14
ldd [%o4+y1_0],%f12
fmuld %f36,%f24,%f24
ldd [%o5+y2_0],%f22
faddd %f6,%f4,%f6
faddd %f14,%f12,%f14
faddd %f24,%f22,%f24
faddd %f6,%f0,%f6
faddd %f34,%f14,%f16
faddd %f36,%f24,%f26
ba,pt %icc,.FIXSIGN
! delay slot
faddd %f6,%f32,%f6
.align 32
.CASE4:
fands %f29,%f28,%f29 ! if (n & 1) clear sign bit
sethi %hi(0x3fc3c000),%o7
andcc %l1,2,%g0
bne,pn %icc,.CASE6
! delay slot
andcc %l2,2,%g0
fpadd32s %f10,%f31,%f18
ld [%fp+x1_1],%l1
bne,pn %icc,.CASE5
! delay slot
add %l3,8,%g1
ld [%fp+x2_1],%l2
fpadd32s %f20,%f31,%f28
fand %f18,%f44,%f14
sub %l1,%o7,%l1
fand %f28,%f44,%f24
sub %l2,%o7,%l2
fsubd %f10,%f14,%f10
srl %l1,10,%l1
fsubd %f20,%f24,%f20
srl %l2,10,%l2
fmuld %f0,%f0,%f0
ldd [%l5+%o3],%f32
add %l5,%o3,%l0
faddd %f10,%f12,%f10
andn %l1,0x1f,%l1
faddd %f20,%f22,%f20
andn %l2,0x1f,%l2
fmuld %f0,%f32,%f4
ldd [%l0+0x10],%f6
add %fp,%o3,%o3
fmuld %f10,%f10,%f12
add %l1,%o4,%l1
fmuld %f20,%f20,%f22
add %l2,%o5,%l2
faddd %f4,%f6,%f4
ldd [%l0+0x20],%f32
fmuld %f12,%f58,%f16
ldd [%l3+%l1],%f34
fmuld %f22,%f58,%f26
ldd [%l3+%l2],%f36
fmuld %f0,%f4,%f4
ldd [%l0+0x30],%f6
faddd %f16,%f56,%f16
fmuld %f12,%f62,%f14
faddd %f26,%f56,%f26
fmuld %f22,%f62,%f24
faddd %f4,%f32,%f4
ldd [%o3+x0_1],%f32
fmuld %f12,%f16,%f16
faddd %f14,%f60,%f14
fmuld %f22,%f26,%f26
faddd %f24,%f60,%f24
fmuld %f0,%f4,%f4
faddd %f16,%f54,%f16
fmuld %f12,%f14,%f14
ldd [%g1+%l1],%f12
faddd %f26,%f54,%f26
fmuld %f22,%f24,%f24
ldd [%g1+%l2],%f22
faddd %f4,%f6,%f4
fmuld %f10,%f16,%f16
ldd [%l4+%l1],%f10
fmuld %f20,%f26,%f26
ldd [%l4+%l2],%f20
fmuld %f14,%f34,%f14
std %f2,[%fp+y0_0]
fmuld %f24,%f36,%f24
fmuld %f0,%f4,%f4
fmuld %f16,%f12,%f16
fmuld %f26,%f22,%f26
fmuld %f32,%f4,%f4
ldd [%o3+y0_0],%f2
faddd %f16,%f14,%f16
faddd %f26,%f24,%f26
faddd %f4,%f2,%f4
faddd %f16,%f10,%f16
faddd %f26,%f20,%f26
faddd %f32,%f4,%f6
faddd %f16,%f34,%f16
ba,pt %icc,.FIXSIGN
! delay slot
faddd %f26,%f36,%f26
.align 32
.CASE5:
fand %f18,%f44,%f14
sub %l1,%o7,%l1
fmuld %f0,%f0,%f0
ldd [%l5+%o3],%f32
add %l5,%o3,%l0
fsubd %f10,%f14,%f10
srl %l1,10,%l1
fmuld %f20,%f20,%f20
ldd [%l5+%o5],%f36
add %l5,%o5,%l2
fmuld %f0,%f32,%f4
ldd [%l0+0x10],%f6
add %fp,%o3,%o3
faddd %f10,%f12,%f10
andn %l1,0x1f,%l1
fmuld %f20,%f36,%f24
ldd [%l2+0x10],%f26
add %fp,%o5,%o5
faddd %f4,%f6,%f4
ldd [%l0+0x20],%f32
fmuld %f10,%f10,%f12
add %l1,%o4,%l1
faddd %f24,%f26,%f24
ldd [%l2+0x20],%f36
fmuld %f0,%f4,%f4
ldd [%l0+0x30],%f6
fmuld %f12,%f58,%f16
ldd [%l3+%l1],%f34
fmuld %f20,%f24,%f24
ldd [%l2+0x30],%f26
faddd %f4,%f32,%f4
ldd [%o3+x0_1],%f32
faddd %f16,%f56,%f16
fmuld %f12,%f62,%f14
faddd %f24,%f36,%f24
ldd [%o5+x2_1],%f36
fmuld %f0,%f4,%f4
std %f2,[%fp+y0_0]
fmuld %f12,%f16,%f16
faddd %f14,%f60,%f14
fmuld %f20,%f24,%f24
std %f22,[%fp+y2_0]
faddd %f4,%f6,%f4
faddd %f16,%f54,%f16
fmuld %f12,%f14,%f14
ldd [%g1+%l1],%f12
faddd %f24,%f26,%f24
fmuld %f0,%f4,%f4
fmuld %f10,%f16,%f16
ldd [%l4+%l1],%f10
fmuld %f14,%f34,%f14
fmuld %f20,%f24,%f24
fmuld %f16,%f12,%f16
fmuld %f32,%f4,%f4
ldd [%o3+y0_0],%f2
fmuld %f36,%f24,%f24
ldd [%o5+y2_0],%f22
faddd %f16,%f14,%f16
faddd %f4,%f2,%f4
faddd %f24,%f22,%f24
faddd %f16,%f10,%f16
faddd %f32,%f4,%f6
faddd %f36,%f24,%f26
ba,pt %icc,.FIXSIGN
! delay slot
faddd %f16,%f34,%f16
.align 32
.CASE6:
ld [%fp+x2_1],%l2
add %l3,8,%g1
bne,pn %icc,.CASE7
! delay slot
fpadd32s %f20,%f31,%f28
fand %f28,%f44,%f24
ldd [%l5+%o3],%f32
add %l5,%o3,%l0
fmuld %f0,%f0,%f0
sub %l2,%o7,%l2
fsubd %f20,%f24,%f20
srl %l2,10,%l2
fmuld %f10,%f10,%f10
ldd [%l5+%o4],%f34
add %l5,%o4,%l1
fmuld %f0,%f32,%f4
ldd [%l0+0x10],%f6
add %fp,%o3,%o3
faddd %f20,%f22,%f20
andn %l2,0x1f,%l2
fmuld %f10,%f34,%f14
ldd [%l1+0x10],%f16
add %fp,%o4,%o4
faddd %f4,%f6,%f4
ldd [%l0+0x20],%f32
fmuld %f20,%f20,%f22
add %l2,%o5,%l2
faddd %f14,%f16,%f14
ldd [%l1+0x20],%f34
fmuld %f0,%f4,%f4
ldd [%l0+0x30],%f6
fmuld %f22,%f58,%f26
ldd [%l3+%l2],%f36
fmuld %f10,%f14,%f14
ldd [%l1+0x30],%f16
faddd %f4,%f32,%f4
ldd [%o3+x0_1],%f32
faddd %f26,%f56,%f26
fmuld %f22,%f62,%f24
faddd %f14,%f34,%f14
ldd [%o4+x1_1],%f34
fmuld %f0,%f4,%f4
std %f2,[%fp+y0_0]
fmuld %f22,%f26,%f26
faddd %f24,%f60,%f24
fmuld %f10,%f14,%f14
std %f12,[%fp+y1_0]
faddd %f4,%f6,%f4
faddd %f26,%f54,%f26
fmuld %f22,%f24,%f24
ldd [%g1+%l2],%f22
faddd %f14,%f16,%f14
fmuld %f0,%f4,%f4
fmuld %f20,%f26,%f26
ldd [%l4+%l2],%f20
fmuld %f24,%f36,%f24
fmuld %f10,%f14,%f14
fmuld %f26,%f22,%f26
fmuld %f32,%f4,%f4
ldd [%o3+y0_0],%f2
fmuld %f34,%f14,%f14
ldd [%o4+y1_0],%f12
faddd %f26,%f24,%f26
faddd %f4,%f2,%f4
faddd %f14,%f12,%f14
faddd %f26,%f20,%f26
faddd %f32,%f4,%f6
faddd %f34,%f14,%f16
ba,pt %icc,.FIXSIGN
! delay slot
faddd %f26,%f36,%f26
.align 32
.CASE7:
fmuld %f0,%f0,%f0
ldd [%l5+%o3],%f32
add %l5,%o3,%l0
fmuld %f10,%f10,%f10
ldd [%l5+%o4],%f34
add %l5,%o4,%l1
fmuld %f20,%f20,%f20
ldd [%l5+%o5],%f36
add %l5,%o5,%l2
fmuld %f0,%f32,%f4
ldd [%l0+0x10],%f6
add %fp,%o3,%o3
fmuld %f10,%f34,%f14
ldd [%l1+0x10],%f16
add %fp,%o4,%o4
fmuld %f20,%f36,%f24
ldd [%l2+0x10],%f26
add %fp,%o5,%o5
faddd %f4,%f6,%f4
ldd [%l0+0x20],%f32
faddd %f14,%f16,%f14
ldd [%l1+0x20],%f34
faddd %f24,%f26,%f24
ldd [%l2+0x20],%f36
fmuld %f0,%f4,%f4
ldd [%l0+0x30],%f6
fmuld %f10,%f14,%f14
ldd [%l1+0x30],%f16
fmuld %f20,%f24,%f24
ldd [%l2+0x30],%f26
faddd %f4,%f32,%f4
ldd [%o3+x0_1],%f32
faddd %f14,%f34,%f14
ldd [%o4+x1_1],%f34
faddd %f24,%f36,%f24
ldd [%o5+x2_1],%f36
fmuld %f0,%f4,%f4
std %f2,[%fp+y0_0]
fmuld %f10,%f14,%f14
std %f12,[%fp+y1_0]
fmuld %f20,%f24,%f24
std %f22,[%fp+y2_0]
faddd %f4,%f6,%f4
faddd %f14,%f16,%f14
faddd %f24,%f26,%f24
fmuld %f0,%f4,%f4
fmuld %f10,%f14,%f14
fmuld %f20,%f24,%f24
fmuld %f32,%f4,%f4
ldd [%o3+y0_0],%f2
fmuld %f34,%f14,%f14
ldd [%o4+y1_0],%f12
fmuld %f36,%f24,%f24
ldd [%o5+y2_0],%f22
faddd %f4,%f2,%f4
faddd %f14,%f12,%f14
faddd %f24,%f22,%f24
faddd %f32,%f4,%f6
faddd %f34,%f14,%f16
ba,pt %icc,.FIXSIGN
! delay slot
faddd %f36,%f24,%f26
.align 32
.ENDLOOP2:
fmuld %f10,%f40,%f12
add %l5,thresh,%g1
faddd %f12,%f42,%f12
st %f13,[%fp+n1]
fsubd %f12,%f42,%f12 ! n
fmuld %f12,%f46,%f14
fsubd %f10,%f14,%f14
fmuld %f12,%f48,%f16
fsubd %f14,%f16,%f10
ld [%fp+n1],%o4
fsubd %f14,%f10,%f34
and %o4,1,%o4
fsubd %f34,%f16,%f34
fmuld %f12,%f50,%f18
sll %o4,3,%o4
fsubd %f18,%f34,%f18
ld [%g1+%o4],%f16
fsubd %f10,%f18,%f14
fsubd %f10,%f14,%f34
add %l5,thresh+4,%o7
fsubd %f34,%f18,%f34
fmuld %f12,%f52,%f12
fsubd %f12,%f34,%f12
ld [%o7+%o4],%f18
fsubd %f14,%f12,%f10 ! x
fsubd %f14,%f10,%f14
fands %f10,%f30,%f19 ! save signbit
fabsd %f10,%f10
std %f10,[%fp+x1_1]
fsubd %f14,%f12,%f12 ! y
fcmpgt32 %f16,%f10,%l1
fxors %f12,%f19,%f12
fands %f19,%f18,%f19 ! if (n & 1) clear sign bit
andcc %l1,2,%g0
bne,pn %icc,1f
! delay slot
nop
fpadd32s %f10,%f31,%f18
ld [%fp+x1_1],%l1
fand %f18,%f44,%f14
sethi %hi(0x3fc3c000),%o7
add %l3,8,%g1
fsubd %f10,%f14,%f10
sub %l1,%o7,%l1
srl %l1,10,%l1
faddd %f10,%f12,%f10
andn %l1,0x1f,%l1
fmuld %f10,%f10,%f12
add %l1,%o4,%l1
fmuld %f12,%f58,%f16
ldd [%l3+%l1],%f34
faddd %f16,%f56,%f16
fmuld %f12,%f62,%f14
fmuld %f12,%f16,%f16
faddd %f14,%f60,%f14
faddd %f16,%f54,%f16
fmuld %f12,%f14,%f14
ldd [%g1+%l1],%f12
fmuld %f10,%f16,%f16
ldd [%l4+%l1],%f10
fmuld %f14,%f34,%f14
fmuld %f16,%f12,%f16
faddd %f16,%f14,%f16
faddd %f16,%f10,%f16
ba,pt %icc,2f
faddd %f16,%f34,%f16
1:
fmuld %f10,%f10,%f10
ldd [%l5+%o4],%f34
add %l5,%o4,%l1
fmuld %f10,%f34,%f14
ldd [%l1+0x10],%f16
add %fp,%o4,%o4
faddd %f14,%f16,%f14
ldd [%l1+0x20],%f34
fmuld %f10,%f14,%f14
ldd [%l1+0x30],%f16
faddd %f14,%f34,%f14
ldd [%o4+x1_1],%f34
fmuld %f10,%f14,%f14
std %f12,[%fp+y1_0]
faddd %f14,%f16,%f14
fmuld %f10,%f14,%f14
fmuld %f34,%f14,%f14
ldd [%o4+y1_0],%f12
faddd %f14,%f12,%f14
faddd %f34,%f14,%f16
2:
add %l5,thresh-4,%g1
ld [%fp+n1],%o4
and %o4,2,%o4
sll %o4,2,%o4
ld [%g1+%o4],%f18
fxors %f19,%f18,%f19
fors %f16,%f19,%f16 ! tack on sign
st %f16,[%o1]
st %f17,[%o1+4]
.ENDLOOP1:
fmuld %f0,%f40,%f2
add %l5,thresh,%g1
faddd %f2,%f42,%f2
st %f3,[%fp+n0]
fsubd %f2,%f42,%f2 ! n
fmuld %f2,%f46,%f4
fsubd %f0,%f4,%f4
fmuld %f2,%f48,%f6
fsubd %f4,%f6,%f0
ld [%fp+n0],%o3
fsubd %f4,%f0,%f32
and %o3,1,%o3
fsubd %f32,%f6,%f32
fmuld %f2,%f50,%f8
sll %o3,3,%o3
fsubd %f8,%f32,%f8
ld [%g1+%o3],%f6
fsubd %f0,%f8,%f4
fsubd %f0,%f4,%f32
add %l5,thresh+4,%o7
fsubd %f32,%f8,%f32
fmuld %f2,%f52,%f2
fsubd %f2,%f32,%f2
ld [%o7+%o3],%f8
fsubd %f4,%f2,%f0 ! x
fsubd %f4,%f0,%f4
fands %f0,%f30,%f9 ! save signbit
fabsd %f0,%f0
std %f0,[%fp+x0_1]
fsubd %f4,%f2,%f2 ! y
fcmpgt32 %f6,%f0,%l0
fxors %f2,%f9,%f2
fands %f9,%f8,%f9 ! if (n & 1) clear sign bit
andcc %l0,2,%g0
bne,pn %icc,1f
! delay slot
nop
fpadd32s %f0,%f31,%f8
ld [%fp+x0_1],%l0
fand %f8,%f44,%f4
sethi %hi(0x3fc3c000),%o7
add %l3,8,%g1
fsubd %f0,%f4,%f0
sub %l0,%o7,%l0
srl %l0,10,%l0
faddd %f0,%f2,%f0
andn %l0,0x1f,%l0
fmuld %f0,%f0,%f2
add %l0,%o3,%l0
fmuld %f2,%f58,%f6
ldd [%l3+%l0],%f32
faddd %f6,%f56,%f6
fmuld %f2,%f62,%f4
fmuld %f2,%f6,%f6
faddd %f4,%f60,%f4
faddd %f6,%f54,%f6
fmuld %f2,%f4,%f4
ldd [%g1+%l0],%f2
fmuld %f0,%f6,%f6
ldd [%l4+%l0],%f0
fmuld %f4,%f32,%f4
fmuld %f6,%f2,%f6
faddd %f6,%f4,%f6
faddd %f6,%f0,%f6
ba,pt %icc,2f
faddd %f6,%f32,%f6
1:
fmuld %f0,%f0,%f0
ldd [%l5+%o3],%f32
add %l5,%o3,%l0
fmuld %f0,%f32,%f4
ldd [%l0+0x10],%f6
add %fp,%o3,%o3
faddd %f4,%f6,%f4
ldd [%l0+0x20],%f32
fmuld %f0,%f4,%f4
ldd [%l0+0x30],%f6
faddd %f4,%f32,%f4
ldd [%o3+x0_1],%f32
fmuld %f0,%f4,%f4
std %f2,[%fp+y0_0]
faddd %f4,%f6,%f4
fmuld %f0,%f4,%f4
fmuld %f32,%f4,%f4
ldd [%o3+y0_0],%f2
faddd %f4,%f2,%f4
faddd %f32,%f4,%f6
2:
add %l5,thresh-4,%g1
ld [%fp+n0],%o3
and %o3,2,%o3
sll %o3,2,%o3
ld [%g1+%o3],%f8
fxors %f9,%f8,%f9
fors %f6,%f9,%f6 ! tack on sign
st %f6,[%o0]
st %f7,[%o0+4]
.ENDLOOP0:
! check for huge arguments remaining
tst LIM_l6
be,pt %icc,.exit
! delay slot
nop
! ========== huge range (use C code) ==========
#ifdef __sparcv9
ldx [%fp+xsave],%o1
ldx [%fp+ysave],%o3
#else
ld [%fp+xsave],%o1
ld [%fp+ysave],%o3
#endif
ld [%fp+nsave],%o0
ld [%fp+sxsave],%o2
ld [%fp+sysave],%o4
sra %o2,0,%o2 ! sign-extend for V9
sra %o4,0,%o4
call __vlibm_vsin_big
mov %l7,%o5 ! delay slot
.exit:
ret
restore
.align 32
.SKIP0:
addcc %i0,-1,%i0
ble,pn %icc,.ENDLOOP0
! delay slot, harmless if branch taken
add %i3,%i4,%i3 ! y += stridey
andn %l1,%i5,%l0 ! hx &= ~0x80000000
fmovs %f10,%f0
ld [%i1+4],%f1
ba,pt %icc,.LOOP0
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 32
.SKIP1:
addcc %i0,-1,%i0
ble,pn %icc,.ENDLOOP1
! delay slot, harmless if branch taken
add %i3,%i4,%i3 ! y += stridey
andn %l2,%i5,%l1 ! hx &= ~0x80000000
fmovs %f20,%f10
ld [%i1+4],%f11
ba,pt %icc,.LOOP1
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 32
.SKIP2:
addcc %i0,-1,%i0
ble,pn %icc,.ENDLOOP2
! delay slot, harmless if branch taken
add %i3,%i4,%i3 ! y += stridey
ld [%i1],%l2
ld [%i1],%f20
ld [%i1+4],%f21
andn %l2,%i5,%l2 ! hx &= ~0x80000000
ba,pt %icc,.LOOP2
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 32
.BIG0:
sethi %hi(0x7ff00000),%o7
cmp %l0,%o7
bl,a,pt %icc,1f ! if hx < 0x7ff00000
! delay slot, annulled if branch not taken
mov %l7,LIM_l6 ! set biguns flag or
fsubd %f0,%f0,%f0 ! y = x - x
st %f0,[%o0]
st %f1,[%o0+4]
1:
addcc %i0,-1,%i0
ble,pn %icc,.ENDLOOP0
! delay slot, harmless if branch taken
andn %l1,%i5,%l0 ! hx &= ~0x80000000
fmovd %f10,%f0
ba,pt %icc,.LOOP0
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 32
.BIG1:
sethi %hi(0x7ff00000),%o7
cmp %l1,%o7
bl,a,pt %icc,1f ! if hx < 0x7ff00000
! delay slot, annulled if branch not taken
mov %l7,LIM_l6 ! set biguns flag or
fsubd %f10,%f10,%f10 ! y = x - x
st %f10,[%o1]
st %f11,[%o1+4]
1:
addcc %i0,-1,%i0
ble,pn %icc,.ENDLOOP1
! delay slot, harmless if branch taken
andn %l2,%i5,%l1 ! hx &= ~0x80000000
fmovd %f20,%f10
ba,pt %icc,.LOOP1
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 32
.BIG2:
sethi %hi(0x7ff00000),%o7
cmp %l2,%o7
bl,a,pt %icc,1f ! if hx < 0x7ff00000
! delay slot, annulled if branch not taken
mov %l7,LIM_l6 ! set biguns flag or
fsubd %f20,%f20,%f20 ! y = x - x
st %f20,[%o2]
st %f21,[%o2+4]
1:
addcc %i0,-1,%i0
ble,pn %icc,.ENDLOOP2
! delay slot
nop
ld [%i1],%l2
ld [%i1],%f20
ld [%i1+4],%f21
andn %l2,%i5,%l2 ! hx &= ~0x80000000
ba,pt %icc,.LOOP2
! delay slot
add %i1,%i2,%i1 ! x += stridex
SET_SIZE(__vsin)