__vcos_ultra3.S revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
.file "__vcos_ultra3.S"
#include "libm.h"
#if defined(LIBMVEC_SO_BUILD)
.weak __vcos
.type __vcos,#function
__vcos = __vcos_ultra3
#endif
RO_DATA
.align 64
constants:
.word 0x42c80000,0x00000000 ! 3 * 2^44
.word 0x43380000,0x00000000 ! 3 * 2^51
.word 0x3fe45f30,0x6dc9c883 ! invpio2
.word 0x3ff921fb,0x54442c00 ! pio2_1
.word 0x3d318469,0x898cc400 ! pio2_2
.word 0x3a71701b,0x839a2520 ! pio2_3
.word 0xbfc55555,0x55555533 ! pp1
.word 0x3f811111,0x10e7d53b ! pp2
.word 0xbf2a0167,0xe6b3cf9b ! pp3
.word 0xbfdfffff,0xffffff65 ! qq1
.word 0x3fa55555,0x54f88ed0 ! qq2
.word 0xbf56c12c,0xdd185f60 ! qq3
! local storage indices
#define xsave STACK_BIAS-0x8
#define ysave STACK_BIAS-0x10
#define nsave STACK_BIAS-0x14
#define sxsave STACK_BIAS-0x18
#define sysave STACK_BIAS-0x1c
#define biguns STACK_BIAS-0x20
#define nk3 STACK_BIAS-0x24
#define nk2 STACK_BIAS-0x28
#define nk1 STACK_BIAS-0x2c
#define nk0 STACK_BIAS-0x30
#define junk STACK_BIAS-0x38
! sizeof temp storage - must be a multiple of 16 for V9
#define tmps 0x40
! register use
! i0 n
! i1 x
! i2 stridex
! i3 y
! i4 stridey
! i5 0x80000000
! l0 hx0
! l1 hx1
! l2 hx2
! l3 hx3
! l4 k0
! l5 k1
! l6 k2
! l7 k3
! the following are 64-bit registers in both V8+ and V9
! g1 __vlibm_TBL_sincos2
! g5 scratch
! o0 py0
! o1 py1
! o2 py2
! o3 py3
! o4 0x3e400000
! o5 0x3fe921fb,0x4099251e
! o7 scratch
! f0 hx0
! f2
! f4
! f6
! f8 hx1
! f10
! f12
! f14
! f16 hx2
! f18
! f20
! f22
! f24 hx3
! f26
! f28
! f30
! f32
! f34
! f36
! f38
#define c3two44 %f40
#define c3two51 %f42
#define invpio2 %f44
#define pio2_1 %f46
#define pio2_2 %f48
#define pio2_3 %f50
#define pp1 %f52
#define pp2 %f54
#define pp3 %f56
#define qq1 %f58
#define qq2 %f60
#define qq3 %f62
ENTRY(__vcos_ultra3)
save %sp,-SA(MINFRAME)-tmps,%sp
PIC_SETUP(l7)
PIC_SET(l7,constants,o0)
PIC_SET(l7,__vlibm_TBL_sincos2,o1)
mov %o1,%g1
wr %g0,0x82,%asi ! set %asi for non-faulting loads
#ifdef __sparcv9
stx %i1,[%fp+xsave] ! save arguments
stx %i3,[%fp+ysave]
#else
st %i1,[%fp+xsave] ! save arguments
st %i3,[%fp+ysave]
#endif
st %i0,[%fp+nsave]
st %i2,[%fp+sxsave]
st %i4,[%fp+sysave]
st %g0,[%fp+biguns] ! biguns = 0
ldd [%o0+0x00],c3two44 ! load/set up constants
ldd [%o0+0x08],c3two51
ldd [%o0+0x10],invpio2
ldd [%o0+0x18],pio2_1
ldd [%o0+0x20],pio2_2
ldd [%o0+0x28],pio2_3
ldd [%o0+0x30],pp1
ldd [%o0+0x38],pp2
ldd [%o0+0x40],pp3
ldd [%o0+0x48],qq1
ldd [%o0+0x50],qq2
ldd [%o0+0x58],qq3
sethi %hi(0x80000000),%i5
sethi %hi(0x3e400000),%o4
sethi %hi(0x3fe921fb),%o5
or %o5,%lo(0x3fe921fb),%o5
sllx %o5,32,%o5
sethi %hi(0x4099251e),%o7
or %o7,%lo(0x4099251e),%o7
or %o5,%o7,%o5
sll %i2,3,%i2 ! scale strides
sll %i4,3,%i4
add %fp,junk,%o1 ! loop prologue
add %fp,junk,%o2
add %fp,junk,%o3
ld [%i1],%l0 ! *x
ld [%i1],%f0
ld [%i1+4],%f3
andn %l0,%i5,%l0 ! mask off sign
add %i1,%i2,%i1 ! x += stridex
ba .loop0
nop
! 16-byte aligned
.align 16
.loop0:
lda [%i1]%asi,%l1 ! preload next argument
sub %l0,%o4,%g5
sub %o5,%l0,%o7
fabss %f0,%f2
lda [%i1]%asi,%f8
orcc %o7,%g5,%g0
mov %i3,%o0 ! py0 = y
bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e
! delay slot
lda [%i1+4]%asi,%f11
addcc %i0,-1,%i0
add %i3,%i4,%i3 ! y += stridey
ble,pn %icc,.last1
! delay slot
andn %l1,%i5,%l1
add %i1,%i2,%i1 ! x += stridex
faddd %f2,c3two44,%f4
st %f15,[%o1+4]
.loop1:
lda [%i1]%asi,%l2 ! preload next argument
sub %l1,%o4,%g5
sub %o5,%l1,%o7
fabss %f8,%f10
lda [%i1]%asi,%f16
orcc %o7,%g5,%g0
mov %i3,%o1 ! py1 = y
bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e
! delay slot
lda [%i1+4]%asi,%f19
addcc %i0,-1,%i0
add %i3,%i4,%i3 ! y += stridey
ble,pn %icc,.last2
! delay slot
andn %l2,%i5,%l2
add %i1,%i2,%i1 ! x += stridex
faddd %f10,c3two44,%f12
st %f23,[%o2+4]
.loop2:
lda [%i1]%asi,%l3 ! preload next argument
sub %l2,%o4,%g5
sub %o5,%l2,%o7
fabss %f16,%f18
lda [%i1]%asi,%f24
orcc %o7,%g5,%g0
mov %i3,%o2 ! py2 = y
bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e
! delay slot
lda [%i1+4]%asi,%f27
addcc %i0,-1,%i0
add %i3,%i4,%i3 ! y += stridey
ble,pn %icc,.last3
! delay slot
andn %l3,%i5,%l3
add %i1,%i2,%i1 ! x += stridex
faddd %f18,c3two44,%f20
st %f31,[%o3+4]
.loop3:
sub %l3,%o4,%g5
sub %o5,%l3,%o7
fabss %f24,%f26
st %f5,[%fp+nk0]
orcc %o7,%g5,%g0
mov %i3,%o3 ! py3 = y
bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e
! delay slot
st %f13,[%fp+nk1]
!!! DONE?
.cont:
srlx %o5,32,%o7
add %i3,%i4,%i3 ! y += stridey
fmovs %f3,%f1
st %f21,[%fp+nk2]
sub %o7,%l0,%l0
sub %o7,%l1,%l1
faddd %f26,c3two44,%f28
st %f29,[%fp+nk3]
sub %o7,%l2,%l2
sub %o7,%l3,%l3
fmovs %f11,%f9
or %l0,%l1,%l0
or %l2,%l3,%l2
fmovs %f19,%f17
fmovs %f27,%f25
fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range
fmuld %f8,invpio2,%f14
ld [%fp+nk0],%l4
fmuld %f16,invpio2,%f22
ld [%fp+nk1],%l5
orcc %l0,%l2,%g0
bl,pn %icc,.medium
! delay slot
fmuld %f24,invpio2,%f30
ld [%fp+nk2],%l6
ld [%fp+nk3],%l7
sll %l4,5,%l4 ! k
fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0
sll %l5,5,%l5
ldd [%l4+%g1],%f4
fcmpd %fcc1,%f8,pio2_3
sll %l6,5,%l6
ldd [%l5+%g1],%f12
fcmpd %fcc2,%f16,pio2_3
sll %l7,5,%l7
ldd [%l6+%g1],%f20
fcmpd %fcc3,%f24,pio2_3
ldd [%l7+%g1],%f28
fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
fsubd %f10,%f12,%f10
fsubd %f18,%f20,%f18
fsubd %f26,%f28,%f26
fmuld %f2,%f2,%f0 ! z = x * x
fmuld %f10,%f10,%f8
fmuld %f18,%f18,%f16
fmuld %f26,%f26,%f24
fmuld %f0,qq3,%f6
fmuld %f8,qq3,%f14
fmuld %f16,qq3,%f22
fmuld %f24,qq3,%f30
faddd %f6,qq2,%f6
fmuld %f0,pp2,%f4
faddd %f14,qq2,%f14
fmuld %f8,pp2,%f12
faddd %f22,qq2,%f22
fmuld %f16,pp2,%f20
faddd %f30,qq2,%f30
fmuld %f24,pp2,%f28
fmuld %f0,%f6,%f6
faddd %f4,pp1,%f4
fmuld %f8,%f14,%f14
faddd %f12,pp1,%f12
fmuld %f16,%f22,%f22
faddd %f20,pp1,%f20
fmuld %f24,%f30,%f30
faddd %f28,pp1,%f28
faddd %f6,qq1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
faddd %f14,qq1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
faddd %f22,qq1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
faddd %f30,qq1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f2,%f4,%f4
fmuld %f10,%f12,%f12
fmuld %f18,%f20,%f20
fmuld %f26,%f28,%f28
fmuld %f0,%f6,%f6
faddd %f4,%f2,%f4
ldd [%l4+16],%f32
fmuld %f8,%f14,%f14
faddd %f12,%f10,%f12
ldd [%l5+16],%f34
fmuld %f16,%f22,%f22
faddd %f20,%f18,%f20
ldd [%l6+16],%f36
fmuld %f24,%f30,%f30
faddd %f28,%f26,%f28
ldd [%l7+16],%f38
fmuld %f32,%f6,%f6
ldd [%l4+8],%f2
fmuld %f34,%f14,%f14
ldd [%l5+8],%f10
fmuld %f36,%f22,%f22
ldd [%l6+8],%f18
fmuld %f38,%f30,%f30
ldd [%l7+8],%f26
fmuld %f2,%f4,%f4
fmuld %f10,%f12,%f12
fmuld %f18,%f20,%f20
fmuld %f26,%f28,%f28
fsubd %f6,%f4,%f6
lda [%i1]%asi,%l0 ! preload next argument
fsubd %f14,%f12,%f14
lda [%i1]%asi,%f0
fsubd %f22,%f20,%f22
lda [%i1+4]%asi,%f3
fsubd %f30,%f28,%f30
andn %l0,%i5,%l0
add %i1,%i2,%i1
faddd %f6,%f32,%f6
st %f6,[%o0]
faddd %f14,%f34,%f14
st %f14,[%o1]
faddd %f22,%f36,%f22
st %f22,[%o2]
faddd %f30,%f38,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.medium:
faddd %f6,c3two51,%f4
st %f5,[%fp+nk0]
faddd %f14,c3two51,%f12
st %f13,[%fp+nk1]
faddd %f22,c3two51,%f20
st %f21,[%fp+nk2]
faddd %f30,c3two51,%f28
st %f29,[%fp+nk3]
fsubd %f4,c3two51,%f6
fsubd %f12,c3two51,%f14
fsubd %f20,c3two51,%f22
fsubd %f28,c3two51,%f30
fmuld %f6,pio2_1,%f2
ld [%fp+nk0],%l0 ! n
fmuld %f14,pio2_1,%f10
ld [%fp+nk1],%l1
fmuld %f22,pio2_1,%f18
ld [%fp+nk2],%l2
fmuld %f30,pio2_1,%f26
ld [%fp+nk3],%l3
fsubd %f0,%f2,%f0
fmuld %f6,pio2_2,%f4
add %l0,1,%l0
fsubd %f8,%f10,%f8
fmuld %f14,pio2_2,%f12
add %l1,1,%l1
fsubd %f16,%f18,%f16
fmuld %f22,pio2_2,%f20
add %l2,1,%l2
fsubd %f24,%f26,%f24
fmuld %f30,pio2_2,%f28
add %l3,1,%l3
fsubd %f0,%f4,%f32
fsubd %f8,%f12,%f34
fsubd %f16,%f20,%f36
fsubd %f24,%f28,%f38
fsubd %f0,%f32,%f0
fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0
fsubd %f8,%f34,%f8
fcmple32 %f34,pio2_3,%l5
fsubd %f16,%f36,%f16
fcmple32 %f36,pio2_3,%l6
fsubd %f24,%f38,%f24
fcmple32 %f38,pio2_3,%l7
fsubd %f0,%f4,%f0
fmuld %f6,pio2_3,%f6
sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2
fsubd %f8,%f12,%f8
fmuld %f14,pio2_3,%f14
sll %l5,30,%l5
fsubd %f16,%f20,%f16
fmuld %f22,pio2_3,%f22
sll %l6,30,%l6
fsubd %f24,%f28,%f24
fmuld %f30,pio2_3,%f30
sll %l7,30,%l7
fsubd %f6,%f0,%f6
sra %l4,31,%l4
fsubd %f14,%f8,%f14
sra %l5,31,%l5
fsubd %f22,%f16,%f22
sra %l6,31,%l6
fsubd %f30,%f24,%f30
sra %l7,31,%l7
fsubd %f32,%f6,%f0 ! reduced x
xor %l0,%l4,%l0
fsubd %f34,%f14,%f8
xor %l1,%l5,%l1
fsubd %f36,%f22,%f16
xor %l2,%l6,%l2
fsubd %f38,%f30,%f24
xor %l3,%l7,%l3
fabsd %f0,%f2
sub %l0,%l4,%l0
fabsd %f8,%f10
sub %l1,%l5,%l1
fabsd %f16,%f18
sub %l2,%l6,%l2
fabsd %f24,%f26
sub %l3,%l7,%l3
faddd %f2,c3two44,%f4
st %f5,[%fp+nk0]
and %l4,2,%l4
faddd %f10,c3two44,%f12
st %f13,[%fp+nk1]
and %l5,2,%l5
faddd %f18,c3two44,%f20
st %f21,[%fp+nk2]
and %l6,2,%l6
faddd %f26,c3two44,%f28
st %f29,[%fp+nk3]
and %l7,2,%l7
fsubd %f32,%f0,%f4
xor %l0,%l4,%l0
fsubd %f34,%f8,%f12
xor %l1,%l5,%l1
fsubd %f36,%f16,%f20
xor %l2,%l6,%l2
fsubd %f38,%f24,%f28
xor %l3,%l7,%l3
fzero %f38
ld [%fp+nk0],%l4
fsubd %f4,%f6,%f6 ! w
ld [%fp+nk1],%l5
fsubd %f12,%f14,%f14
ld [%fp+nk2],%l6
fnegd %f38,%f38
ld [%fp+nk3],%l7
sll %l4,5,%l4 ! k
fsubd %f20,%f22,%f22
sll %l5,5,%l5
fsubd %f28,%f30,%f30
sll %l6,5,%l6
fand %f0,%f38,%f32 ! sign bit of x
ldd [%l4+%g1],%f4
sll %l7,5,%l7
fand %f8,%f38,%f34
ldd [%l5+%g1],%f12
fand %f16,%f38,%f36
ldd [%l6+%g1],%f20
fand %f24,%f38,%f38
ldd [%l7+%g1],%f28
fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
fsubd %f10,%f12,%f10
fsubd %f18,%f20,%f18
nop
fsubd %f26,%f28,%f26
nop
! 16-byte aligned
fmuld %f2,%f2,%f0 ! z = x * x
andcc %l0,1,%g0
bz,pn %icc,.case8
! delay slot
fxor %f6,%f32,%f32
fmuld %f10,%f10,%f8
andcc %l1,1,%g0
bz,pn %icc,.case4
! delay slot
fxor %f14,%f34,%f34
fmuld %f18,%f18,%f16
andcc %l2,1,%g0
bz,pn %icc,.case2
! delay slot
fxor %f22,%f36,%f36
fmuld %f26,%f26,%f24
andcc %l3,1,%g0
bz,pn %icc,.case1
! delay slot
fxor %f30,%f38,%f38
!.case0:
fmuld %f0,qq3,%f6 ! cos(x0)
fmuld %f8,qq3,%f14 ! cos(x1)
fmuld %f16,qq3,%f22 ! cos(x2)
fmuld %f24,qq3,%f30 ! cos(x3)
faddd %f6,qq2,%f6
fmuld %f0,pp2,%f4
faddd %f14,qq2,%f14
fmuld %f8,pp2,%f12
faddd %f22,qq2,%f22
fmuld %f16,pp2,%f20
faddd %f30,qq2,%f30
fmuld %f24,pp2,%f28
fmuld %f0,%f6,%f6
faddd %f4,pp1,%f4
fmuld %f8,%f14,%f14
faddd %f12,pp1,%f12
fmuld %f16,%f22,%f22
faddd %f20,pp1,%f20
fmuld %f24,%f30,%f30
faddd %f28,pp1,%f28
faddd %f6,qq1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
faddd %f14,qq1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
faddd %f22,qq1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
faddd %f30,qq1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f2,%f4,%f4
fmuld %f10,%f12,%f12
fmuld %f18,%f20,%f20
fmuld %f26,%f28,%f28
fmuld %f0,%f6,%f6
faddd %f4,%f32,%f4
ldd [%l4+16],%f0
fmuld %f8,%f14,%f14
faddd %f12,%f34,%f12
ldd [%l5+16],%f8
fmuld %f16,%f22,%f22
faddd %f20,%f36,%f20
ldd [%l6+16],%f16
fmuld %f24,%f30,%f30
faddd %f28,%f38,%f28
ldd [%l7+16],%f24
fmuld %f0,%f6,%f6
faddd %f4,%f2,%f4
ldd [%l4+8],%f32
fmuld %f8,%f14,%f14
faddd %f12,%f10,%f12
ldd [%l5+8],%f34
fmuld %f16,%f22,%f22
faddd %f20,%f18,%f20
ldd [%l6+8],%f36
fmuld %f24,%f30,%f30
faddd %f28,%f26,%f28
ldd [%l7+8],%f38
fmuld %f32,%f4,%f4
fmuld %f34,%f12,%f12
fmuld %f36,%f20,%f20
fmuld %f38,%f28,%f28
fsubd %f6,%f4,%f6
fsubd %f14,%f12,%f14
fsubd %f22,%f20,%f22
fsubd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case1:
fmuld %f24,pp3,%f30 ! sin(x3)
fmuld %f0,qq3,%f6 ! cos(x0)
fmuld %f8,qq3,%f14 ! cos(x1)
fmuld %f16,qq3,%f22 ! cos(x2)
faddd %f30,pp2,%f30
fmuld %f24,qq2,%f28
faddd %f6,qq2,%f6
fmuld %f0,pp2,%f4
faddd %f14,qq2,%f14
fmuld %f8,pp2,%f12
faddd %f22,qq2,%f22
fmuld %f16,pp2,%f20
fmuld %f24,%f30,%f30
faddd %f28,qq1,%f28
fmuld %f0,%f6,%f6
faddd %f4,pp1,%f4
fmuld %f8,%f14,%f14
faddd %f12,pp1,%f12
fmuld %f16,%f22,%f22
faddd %f20,pp1,%f20
faddd %f30,pp1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
faddd %f6,qq1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
faddd %f14,qq1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
faddd %f22,qq1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
fmuld %f24,%f30,%f30
fmuld %f2,%f4,%f4
fmuld %f10,%f12,%f12
fmuld %f18,%f20,%f20
fmuld %f26,%f30,%f30
ldd [%l7+8],%f24
fmuld %f0,%f6,%f6
faddd %f4,%f32,%f4
ldd [%l4+16],%f0
fmuld %f8,%f14,%f14
faddd %f12,%f34,%f12
ldd [%l5+16],%f8
fmuld %f16,%f22,%f22
faddd %f20,%f36,%f20
ldd [%l6+16],%f16
fmuld %f24,%f28,%f28
faddd %f38,%f30,%f30
fmuld %f0,%f6,%f6
faddd %f4,%f2,%f4
ldd [%l4+8],%f32
fmuld %f8,%f14,%f14
faddd %f12,%f10,%f12
ldd [%l5+8],%f34
fmuld %f16,%f22,%f22
faddd %f20,%f18,%f20
ldd [%l6+8],%f36
faddd %f26,%f30,%f30
ldd [%l7+16],%f38
fmuld %f32,%f4,%f4
fmuld %f34,%f12,%f12
fmuld %f36,%f20,%f20
fmuld %f38,%f30,%f30
fsubd %f6,%f4,%f6
fsubd %f14,%f12,%f14
fsubd %f22,%f20,%f22
faddd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case2:
fmuld %f26,%f26,%f24
andcc %l3,1,%g0
bz,pn %icc,.case3
! delay slot
fxor %f30,%f38,%f38
fmuld %f16,pp3,%f22 ! sin(x2)
fmuld %f0,qq3,%f6 ! cos(x0)
fmuld %f8,qq3,%f14 ! cos(x1)
faddd %f22,pp2,%f22
fmuld %f16,qq2,%f20
fmuld %f24,qq3,%f30 ! cos(x3)
faddd %f6,qq2,%f6
fmuld %f0,pp2,%f4
faddd %f14,qq2,%f14
fmuld %f8,pp2,%f12
fmuld %f16,%f22,%f22
faddd %f20,qq1,%f20
faddd %f30,qq2,%f30
fmuld %f24,pp2,%f28
fmuld %f0,%f6,%f6
faddd %f4,pp1,%f4
fmuld %f8,%f14,%f14
faddd %f12,pp1,%f12
faddd %f22,pp1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
fmuld %f24,%f30,%f30
faddd %f28,pp1,%f28
faddd %f6,qq1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
faddd %f14,qq1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
fmuld %f16,%f22,%f22
faddd %f30,qq1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f2,%f4,%f4
fmuld %f10,%f12,%f12
fmuld %f18,%f22,%f22
ldd [%l6+8],%f16
fmuld %f26,%f28,%f28
fmuld %f0,%f6,%f6
faddd %f4,%f32,%f4
ldd [%l4+16],%f0
fmuld %f8,%f14,%f14
faddd %f12,%f34,%f12
ldd [%l5+16],%f8
fmuld %f16,%f20,%f20
faddd %f36,%f22,%f22
fmuld %f24,%f30,%f30
faddd %f28,%f38,%f28
ldd [%l7+16],%f24
fmuld %f0,%f6,%f6
faddd %f4,%f2,%f4
ldd [%l4+8],%f32
fmuld %f8,%f14,%f14
faddd %f12,%f10,%f12
ldd [%l5+8],%f34
faddd %f18,%f22,%f22
ldd [%l6+16],%f36
fmuld %f24,%f30,%f30
faddd %f28,%f26,%f28
ldd [%l7+8],%f38
fmuld %f32,%f4,%f4
fmuld %f34,%f12,%f12
fmuld %f36,%f22,%f22
fmuld %f38,%f28,%f28
fsubd %f6,%f4,%f6
fsubd %f14,%f12,%f14
faddd %f22,%f20,%f22
fsubd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case3:
fmuld %f16,pp3,%f22 ! sin(x2)
fmuld %f24,pp3,%f30 ! sin(x3)
fmuld %f0,qq3,%f6 ! cos(x0)
fmuld %f8,qq3,%f14 ! cos(x1)
faddd %f22,pp2,%f22
fmuld %f16,qq2,%f20
faddd %f30,pp2,%f30
fmuld %f24,qq2,%f28
faddd %f6,qq2,%f6
fmuld %f0,pp2,%f4
faddd %f14,qq2,%f14
fmuld %f8,pp2,%f12
fmuld %f16,%f22,%f22
faddd %f20,qq1,%f20
fmuld %f24,%f30,%f30
faddd %f28,qq1,%f28
fmuld %f0,%f6,%f6
faddd %f4,pp1,%f4
fmuld %f8,%f14,%f14
faddd %f12,pp1,%f12
faddd %f22,pp1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
faddd %f30,pp1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
faddd %f6,qq1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
faddd %f14,qq1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
fmuld %f16,%f22,%f22
fmuld %f24,%f30,%f30
fmuld %f2,%f4,%f4
fmuld %f10,%f12,%f12
fmuld %f18,%f22,%f22
ldd [%l6+8],%f16
fmuld %f26,%f30,%f30
ldd [%l7+8],%f24
fmuld %f0,%f6,%f6
faddd %f4,%f32,%f4
ldd [%l4+16],%f0
fmuld %f8,%f14,%f14
faddd %f12,%f34,%f12
ldd [%l5+16],%f8
fmuld %f16,%f20,%f20
faddd %f36,%f22,%f22
fmuld %f24,%f28,%f28
faddd %f38,%f30,%f30
fmuld %f0,%f6,%f6
faddd %f4,%f2,%f4
ldd [%l4+8],%f32
fmuld %f8,%f14,%f14
faddd %f12,%f10,%f12
ldd [%l5+8],%f34
faddd %f18,%f22,%f22
ldd [%l6+16],%f36
faddd %f26,%f30,%f30
ldd [%l7+16],%f38
fmuld %f32,%f4,%f4
fmuld %f34,%f12,%f12
fmuld %f36,%f22,%f22
fmuld %f38,%f30,%f30
fsubd %f6,%f4,%f6
fsubd %f14,%f12,%f14
faddd %f22,%f20,%f22
faddd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case4:
fmuld %f18,%f18,%f16
andcc %l2,1,%g0
bz,pn %icc,.case6
! delay slot
fxor %f22,%f36,%f36
fmuld %f26,%f26,%f24
andcc %l3,1,%g0
bz,pn %icc,.case5
! delay slot
fxor %f30,%f38,%f38
fmuld %f8,pp3,%f14 ! sin(x1)
fmuld %f0,qq3,%f6 ! cos(x0)
faddd %f14,pp2,%f14
fmuld %f8,qq2,%f12
fmuld %f16,qq3,%f22 ! cos(x2)
fmuld %f24,qq3,%f30 ! cos(x3)
faddd %f6,qq2,%f6
fmuld %f0,pp2,%f4
fmuld %f8,%f14,%f14
faddd %f12,qq1,%f12
faddd %f22,qq2,%f22
fmuld %f16,pp2,%f20
faddd %f30,qq2,%f30
fmuld %f24,pp2,%f28
fmuld %f0,%f6,%f6
faddd %f4,pp1,%f4
faddd %f14,pp1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
fmuld %f16,%f22,%f22
faddd %f20,pp1,%f20
fmuld %f24,%f30,%f30
faddd %f28,pp1,%f28
faddd %f6,qq1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
fmuld %f8,%f14,%f14
faddd %f22,qq1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
faddd %f30,qq1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f2,%f4,%f4
fmuld %f10,%f14,%f14
ldd [%l5+8],%f8
fmuld %f18,%f20,%f20
fmuld %f26,%f28,%f28
fmuld %f0,%f6,%f6
faddd %f4,%f32,%f4
ldd [%l4+16],%f0
fmuld %f8,%f12,%f12
faddd %f34,%f14,%f14
fmuld %f16,%f22,%f22
faddd %f20,%f36,%f20
ldd [%l6+16],%f16
fmuld %f24,%f30,%f30
faddd %f28,%f38,%f28
ldd [%l7+16],%f24
fmuld %f0,%f6,%f6
faddd %f4,%f2,%f4
ldd [%l4+8],%f32
faddd %f10,%f14,%f14
ldd [%l5+16],%f34
fmuld %f16,%f22,%f22
faddd %f20,%f18,%f20
ldd [%l6+8],%f36
fmuld %f24,%f30,%f30
faddd %f28,%f26,%f28
ldd [%l7+8],%f38
fmuld %f32,%f4,%f4
fmuld %f34,%f14,%f14
fmuld %f36,%f20,%f20
fmuld %f38,%f28,%f28
fsubd %f6,%f4,%f6
faddd %f14,%f12,%f14
fsubd %f22,%f20,%f22
fsubd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case5:
fmuld %f8,pp3,%f14 ! sin(x1)
fmuld %f24,pp3,%f30 ! sin(x3)
fmuld %f0,qq3,%f6 ! cos(x0)
faddd %f14,pp2,%f14
fmuld %f8,qq2,%f12
fmuld %f16,qq3,%f22 ! cos(x2)
faddd %f30,pp2,%f30
fmuld %f24,qq2,%f28
faddd %f6,qq2,%f6
fmuld %f0,pp2,%f4
fmuld %f8,%f14,%f14
faddd %f12,qq1,%f12
faddd %f22,qq2,%f22
fmuld %f16,pp2,%f20
fmuld %f24,%f30,%f30
faddd %f28,qq1,%f28
fmuld %f0,%f6,%f6
faddd %f4,pp1,%f4
faddd %f14,pp1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
fmuld %f16,%f22,%f22
faddd %f20,pp1,%f20
faddd %f30,pp1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
faddd %f6,qq1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
fmuld %f8,%f14,%f14
faddd %f22,qq1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
fmuld %f24,%f30,%f30
fmuld %f2,%f4,%f4
fmuld %f10,%f14,%f14
ldd [%l5+8],%f8
fmuld %f18,%f20,%f20
fmuld %f26,%f30,%f30
ldd [%l7+8],%f24
fmuld %f0,%f6,%f6
faddd %f4,%f32,%f4
ldd [%l4+16],%f0
fmuld %f8,%f12,%f12
faddd %f34,%f14,%f14
fmuld %f16,%f22,%f22
faddd %f20,%f36,%f20
ldd [%l6+16],%f16
fmuld %f24,%f28,%f28
faddd %f38,%f30,%f30
fmuld %f0,%f6,%f6
faddd %f4,%f2,%f4
ldd [%l4+8],%f32
faddd %f10,%f14,%f14
ldd [%l5+16],%f34
fmuld %f16,%f22,%f22
faddd %f20,%f18,%f20
ldd [%l6+8],%f36
faddd %f26,%f30,%f30
ldd [%l7+16],%f38
fmuld %f32,%f4,%f4
fmuld %f34,%f14,%f14
fmuld %f36,%f20,%f20
fmuld %f38,%f30,%f30
fsubd %f6,%f4,%f6
faddd %f14,%f12,%f14
fsubd %f22,%f20,%f22
faddd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case6:
fmuld %f26,%f26,%f24
andcc %l3,1,%g0
bz,pn %icc,.case7
! delay slot
fxor %f30,%f38,%f38
fmuld %f8,pp3,%f14 ! sin(x1)
fmuld %f16,pp3,%f22 ! sin(x2)
fmuld %f0,qq3,%f6 ! cos(x0)
faddd %f14,pp2,%f14
fmuld %f8,qq2,%f12
faddd %f22,pp2,%f22
fmuld %f16,qq2,%f20
fmuld %f24,qq3,%f30 ! cos(x3)
faddd %f6,qq2,%f6
fmuld %f0,pp2,%f4
fmuld %f8,%f14,%f14
faddd %f12,qq1,%f12
fmuld %f16,%f22,%f22
faddd %f20,qq1,%f20
faddd %f30,qq2,%f30
fmuld %f24,pp2,%f28
fmuld %f0,%f6,%f6
faddd %f4,pp1,%f4
faddd %f14,pp1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
faddd %f22,pp1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
fmuld %f24,%f30,%f30
faddd %f28,pp1,%f28
faddd %f6,qq1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
fmuld %f8,%f14,%f14
fmuld %f16,%f22,%f22
faddd %f30,qq1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f2,%f4,%f4
fmuld %f10,%f14,%f14
ldd [%l5+8],%f8
fmuld %f18,%f22,%f22
ldd [%l6+8],%f16
fmuld %f26,%f28,%f28
fmuld %f0,%f6,%f6
faddd %f4,%f32,%f4
ldd [%l4+16],%f0
fmuld %f8,%f12,%f12
faddd %f34,%f14,%f14
fmuld %f16,%f20,%f20
faddd %f36,%f22,%f22
fmuld %f24,%f30,%f30
faddd %f28,%f38,%f28
ldd [%l7+16],%f24
fmuld %f0,%f6,%f6
faddd %f4,%f2,%f4
ldd [%l4+8],%f32
faddd %f10,%f14,%f14
ldd [%l5+16],%f34
faddd %f18,%f22,%f22
ldd [%l6+16],%f36
fmuld %f24,%f30,%f30
faddd %f28,%f26,%f28
ldd [%l7+8],%f38
fmuld %f32,%f4,%f4
fmuld %f34,%f14,%f14
fmuld %f36,%f22,%f22
fmuld %f38,%f28,%f28
fsubd %f6,%f4,%f6
faddd %f14,%f12,%f14
faddd %f22,%f20,%f22
fsubd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case7:
fmuld %f8,pp3,%f14 ! sin(x1)
fmuld %f16,pp3,%f22 ! sin(x2)
fmuld %f24,pp3,%f30 ! sin(x3)
fmuld %f0,qq3,%f6 ! cos(x0)
faddd %f14,pp2,%f14
fmuld %f8,qq2,%f12
faddd %f22,pp2,%f22
fmuld %f16,qq2,%f20
faddd %f30,pp2,%f30
fmuld %f24,qq2,%f28
faddd %f6,qq2,%f6
fmuld %f0,pp2,%f4
fmuld %f8,%f14,%f14
faddd %f12,qq1,%f12
fmuld %f16,%f22,%f22
faddd %f20,qq1,%f20
fmuld %f24,%f30,%f30
faddd %f28,qq1,%f28
fmuld %f0,%f6,%f6
faddd %f4,pp1,%f4
faddd %f14,pp1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
faddd %f22,pp1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
faddd %f30,pp1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
faddd %f6,qq1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
fmuld %f8,%f14,%f14
fmuld %f16,%f22,%f22
fmuld %f24,%f30,%f30
fmuld %f2,%f4,%f4
fmuld %f10,%f14,%f14
ldd [%l5+8],%f8
fmuld %f18,%f22,%f22
ldd [%l6+8],%f16
fmuld %f26,%f30,%f30
ldd [%l7+8],%f24
fmuld %f0,%f6,%f6
faddd %f4,%f32,%f4
ldd [%l4+16],%f0
fmuld %f8,%f12,%f12
faddd %f34,%f14,%f14
fmuld %f16,%f20,%f20
faddd %f36,%f22,%f22
fmuld %f24,%f28,%f28
faddd %f38,%f30,%f30
fmuld %f0,%f6,%f6
faddd %f4,%f2,%f4
ldd [%l4+8],%f32
faddd %f10,%f14,%f14
ldd [%l5+16],%f34
faddd %f18,%f22,%f22
ldd [%l6+16],%f36
faddd %f26,%f30,%f30
ldd [%l7+16],%f38
fmuld %f32,%f4,%f4
fmuld %f34,%f14,%f14
fmuld %f36,%f22,%f22
fmuld %f38,%f30,%f30
fsubd %f6,%f4,%f6
faddd %f14,%f12,%f14
faddd %f22,%f20,%f22
faddd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case8:
fmuld %f10,%f10,%f8
andcc %l1,1,%g0
bz,pn %icc,.case12
! delay slot
fxor %f14,%f34,%f34
fmuld %f18,%f18,%f16
andcc %l2,1,%g0
bz,pn %icc,.case10
! delay slot
fxor %f22,%f36,%f36
fmuld %f26,%f26,%f24
andcc %l3,1,%g0
bz,pn %icc,.case9
! delay slot
fxor %f30,%f38,%f38
fmuld %f0,pp3,%f6 ! sin(x0)
faddd %f6,pp2,%f6
fmuld %f0,qq2,%f4
fmuld %f8,qq3,%f14 ! cos(x1)
fmuld %f16,qq3,%f22 ! cos(x2)
fmuld %f24,qq3,%f30 ! cos(x3)
fmuld %f0,%f6,%f6
faddd %f4,qq1,%f4
faddd %f14,qq2,%f14
fmuld %f8,pp2,%f12
faddd %f22,qq2,%f22
fmuld %f16,pp2,%f20
faddd %f30,qq2,%f30
fmuld %f24,pp2,%f28
faddd %f6,pp1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
fmuld %f8,%f14,%f14
faddd %f12,pp1,%f12
fmuld %f16,%f22,%f22
faddd %f20,pp1,%f20
fmuld %f24,%f30,%f30
faddd %f28,pp1,%f28
fmuld %f0,%f6,%f6
faddd %f14,qq1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
faddd %f22,qq1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
faddd %f30,qq1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f2,%f6,%f6
ldd [%l4+8],%f0
fmuld %f10,%f12,%f12
fmuld %f18,%f20,%f20
fmuld %f26,%f28,%f28
fmuld %f0,%f4,%f4
faddd %f32,%f6,%f6
fmuld %f8,%f14,%f14
faddd %f12,%f34,%f12
ldd [%l5+16],%f8
fmuld %f16,%f22,%f22
faddd %f20,%f36,%f20
ldd [%l6+16],%f16
fmuld %f24,%f30,%f30
faddd %f28,%f38,%f28
ldd [%l7+16],%f24
faddd %f2,%f6,%f6
ldd [%l4+16],%f32
fmuld %f8,%f14,%f14
faddd %f12,%f10,%f12
ldd [%l5+8],%f34
fmuld %f16,%f22,%f22
faddd %f20,%f18,%f20
ldd [%l6+8],%f36
fmuld %f24,%f30,%f30
faddd %f28,%f26,%f28
ldd [%l7+8],%f38
fmuld %f32,%f6,%f6
fmuld %f34,%f12,%f12
fmuld %f36,%f20,%f20
fmuld %f38,%f28,%f28
faddd %f6,%f4,%f6
fsubd %f14,%f12,%f14
fsubd %f22,%f20,%f22
fsubd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case9:
fmuld %f0,pp3,%f6 ! sin(x0)
fmuld %f24,pp3,%f30 ! sin(x3)
faddd %f6,pp2,%f6
fmuld %f0,qq2,%f4
fmuld %f8,qq3,%f14 ! cos(x1)
fmuld %f16,qq3,%f22 ! cos(x2)
faddd %f30,pp2,%f30
fmuld %f24,qq2,%f28
fmuld %f0,%f6,%f6
faddd %f4,qq1,%f4
faddd %f14,qq2,%f14
fmuld %f8,pp2,%f12
faddd %f22,qq2,%f22
fmuld %f16,pp2,%f20
fmuld %f24,%f30,%f30
faddd %f28,qq1,%f28
faddd %f6,pp1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
fmuld %f8,%f14,%f14
faddd %f12,pp1,%f12
fmuld %f16,%f22,%f22
faddd %f20,pp1,%f20
faddd %f30,pp1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f0,%f6,%f6
faddd %f14,qq1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
faddd %f22,qq1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
fmuld %f24,%f30,%f30
fmuld %f2,%f6,%f6
ldd [%l4+8],%f0
fmuld %f10,%f12,%f12
fmuld %f18,%f20,%f20
fmuld %f26,%f30,%f30
ldd [%l7+8],%f24
fmuld %f0,%f4,%f4
faddd %f32,%f6,%f6
fmuld %f8,%f14,%f14
faddd %f12,%f34,%f12
ldd [%l5+16],%f8
fmuld %f16,%f22,%f22
faddd %f20,%f36,%f20
ldd [%l6+16],%f16
fmuld %f24,%f28,%f28
faddd %f38,%f30,%f30
faddd %f2,%f6,%f6
ldd [%l4+16],%f32
fmuld %f8,%f14,%f14
faddd %f12,%f10,%f12
ldd [%l5+8],%f34
fmuld %f16,%f22,%f22
faddd %f20,%f18,%f20
ldd [%l6+8],%f36
faddd %f26,%f30,%f30
ldd [%l7+16],%f38
fmuld %f32,%f6,%f6
fmuld %f34,%f12,%f12
fmuld %f36,%f20,%f20
fmuld %f38,%f30,%f30
faddd %f6,%f4,%f6
fsubd %f14,%f12,%f14
fsubd %f22,%f20,%f22
faddd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case10:
fmuld %f26,%f26,%f24
andcc %l3,1,%g0
bz,pn %icc,.case11
! delay slot
fxor %f30,%f38,%f38
fmuld %f0,pp3,%f6 ! sin(x0)
fmuld %f16,pp3,%f22 ! sin(x2)
faddd %f6,pp2,%f6
fmuld %f0,qq2,%f4
fmuld %f8,qq3,%f14 ! cos(x1)
faddd %f22,pp2,%f22
fmuld %f16,qq2,%f20
fmuld %f24,qq3,%f30 ! cos(x3)
fmuld %f0,%f6,%f6
faddd %f4,qq1,%f4
faddd %f14,qq2,%f14
fmuld %f8,pp2,%f12
fmuld %f16,%f22,%f22
faddd %f20,qq1,%f20
faddd %f30,qq2,%f30
fmuld %f24,pp2,%f28
faddd %f6,pp1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
fmuld %f8,%f14,%f14
faddd %f12,pp1,%f12
faddd %f22,pp1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
fmuld %f24,%f30,%f30
faddd %f28,pp1,%f28
fmuld %f0,%f6,%f6
faddd %f14,qq1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
fmuld %f16,%f22,%f22
faddd %f30,qq1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f2,%f6,%f6
ldd [%l4+8],%f0
fmuld %f10,%f12,%f12
fmuld %f18,%f22,%f22
ldd [%l6+8],%f16
fmuld %f26,%f28,%f28
fmuld %f0,%f4,%f4
faddd %f32,%f6,%f6
fmuld %f8,%f14,%f14
faddd %f12,%f34,%f12
ldd [%l5+16],%f8
fmuld %f16,%f20,%f20
faddd %f36,%f22,%f22
fmuld %f24,%f30,%f30
faddd %f28,%f38,%f28
ldd [%l7+16],%f24
faddd %f2,%f6,%f6
ldd [%l4+16],%f32
fmuld %f8,%f14,%f14
faddd %f12,%f10,%f12
ldd [%l5+8],%f34
faddd %f18,%f22,%f22
ldd [%l6+16],%f36
fmuld %f24,%f30,%f30
faddd %f28,%f26,%f28
ldd [%l7+8],%f38
fmuld %f32,%f6,%f6
fmuld %f34,%f12,%f12
fmuld %f36,%f22,%f22
fmuld %f38,%f28,%f28
faddd %f6,%f4,%f6
fsubd %f14,%f12,%f14
faddd %f22,%f20,%f22
fsubd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case11:
fmuld %f0,pp3,%f6 ! sin(x0)
fmuld %f16,pp3,%f22 ! sin(x2)
fmuld %f24,pp3,%f30 ! sin(x3)
faddd %f6,pp2,%f6
fmuld %f0,qq2,%f4
fmuld %f8,qq3,%f14 ! cos(x1)
faddd %f22,pp2,%f22
fmuld %f16,qq2,%f20
faddd %f30,pp2,%f30
fmuld %f24,qq2,%f28
fmuld %f0,%f6,%f6
faddd %f4,qq1,%f4
faddd %f14,qq2,%f14
fmuld %f8,pp2,%f12
fmuld %f16,%f22,%f22
faddd %f20,qq1,%f20
fmuld %f24,%f30,%f30
faddd %f28,qq1,%f28
faddd %f6,pp1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
fmuld %f8,%f14,%f14
faddd %f12,pp1,%f12
faddd %f22,pp1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
faddd %f30,pp1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f0,%f6,%f6
faddd %f14,qq1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
fmuld %f16,%f22,%f22
fmuld %f24,%f30,%f30
fmuld %f2,%f6,%f6
ldd [%l4+8],%f0
fmuld %f10,%f12,%f12
fmuld %f18,%f22,%f22
ldd [%l6+8],%f16
fmuld %f26,%f30,%f30
ldd [%l7+8],%f24
fmuld %f0,%f4,%f4
faddd %f32,%f6,%f6
fmuld %f8,%f14,%f14
faddd %f12,%f34,%f12
ldd [%l5+16],%f8
fmuld %f16,%f20,%f20
faddd %f36,%f22,%f22
fmuld %f24,%f28,%f28
faddd %f38,%f30,%f30
faddd %f2,%f6,%f6
ldd [%l4+16],%f32
fmuld %f8,%f14,%f14
faddd %f12,%f10,%f12
ldd [%l5+8],%f34
faddd %f18,%f22,%f22
ldd [%l6+16],%f36
faddd %f26,%f30,%f30
ldd [%l7+16],%f38
fmuld %f32,%f6,%f6
fmuld %f34,%f12,%f12
fmuld %f36,%f22,%f22
fmuld %f38,%f30,%f30
faddd %f6,%f4,%f6
fsubd %f14,%f12,%f14
faddd %f22,%f20,%f22
faddd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case12:
fmuld %f18,%f18,%f16
andcc %l2,1,%g0
bz,pn %icc,.case14
! delay slot
fxor %f22,%f36,%f36
fmuld %f26,%f26,%f24
andcc %l3,1,%g0
bz,pn %icc,.case13
! delay slot
fxor %f30,%f38,%f38
fmuld %f0,pp3,%f6 ! sin(x0)
fmuld %f8,pp3,%f14 ! sin(x1)
faddd %f6,pp2,%f6
fmuld %f0,qq2,%f4
faddd %f14,pp2,%f14
fmuld %f8,qq2,%f12
fmuld %f16,qq3,%f22 ! cos(x2)
fmuld %f24,qq3,%f30 ! cos(x3)
fmuld %f0,%f6,%f6
faddd %f4,qq1,%f4
fmuld %f8,%f14,%f14
faddd %f12,qq1,%f12
faddd %f22,qq2,%f22
fmuld %f16,pp2,%f20
faddd %f30,qq2,%f30
fmuld %f24,pp2,%f28
faddd %f6,pp1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
faddd %f14,pp1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
fmuld %f16,%f22,%f22
faddd %f20,pp1,%f20
fmuld %f24,%f30,%f30
faddd %f28,pp1,%f28
fmuld %f0,%f6,%f6
fmuld %f8,%f14,%f14
faddd %f22,qq1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
faddd %f30,qq1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f2,%f6,%f6
ldd [%l4+8],%f0
fmuld %f10,%f14,%f14
ldd [%l5+8],%f8
fmuld %f18,%f20,%f20
fmuld %f26,%f28,%f28
fmuld %f0,%f4,%f4
faddd %f32,%f6,%f6
fmuld %f8,%f12,%f12
faddd %f34,%f14,%f14
fmuld %f16,%f22,%f22
faddd %f20,%f36,%f20
ldd [%l6+16],%f16
fmuld %f24,%f30,%f30
faddd %f28,%f38,%f28
ldd [%l7+16],%f24
faddd %f2,%f6,%f6
ldd [%l4+16],%f32
faddd %f10,%f14,%f14
ldd [%l5+16],%f34
fmuld %f16,%f22,%f22
faddd %f20,%f18,%f20
ldd [%l6+8],%f36
fmuld %f24,%f30,%f30
faddd %f28,%f26,%f28
ldd [%l7+8],%f38
fmuld %f32,%f6,%f6
fmuld %f34,%f14,%f14
fmuld %f36,%f20,%f20
fmuld %f38,%f28,%f28
faddd %f6,%f4,%f6
faddd %f14,%f12,%f14
fsubd %f22,%f20,%f22
fsubd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case13:
fmuld %f0,pp3,%f6 ! sin(x0)
fmuld %f8,pp3,%f14 ! sin(x1)
fmuld %f24,pp3,%f30 ! sin(x3)
faddd %f6,pp2,%f6
fmuld %f0,qq2,%f4
faddd %f14,pp2,%f14
fmuld %f8,qq2,%f12
fmuld %f16,qq3,%f22 ! cos(x2)
faddd %f30,pp2,%f30
fmuld %f24,qq2,%f28
fmuld %f0,%f6,%f6
faddd %f4,qq1,%f4
fmuld %f8,%f14,%f14
faddd %f12,qq1,%f12
faddd %f22,qq2,%f22
fmuld %f16,pp2,%f20
fmuld %f24,%f30,%f30
faddd %f28,qq1,%f28
faddd %f6,pp1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
faddd %f14,pp1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
fmuld %f16,%f22,%f22
faddd %f20,pp1,%f20
faddd %f30,pp1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f0,%f6,%f6
fmuld %f8,%f14,%f14
faddd %f22,qq1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
fmuld %f24,%f30,%f30
fmuld %f2,%f6,%f6
ldd [%l4+8],%f0
fmuld %f10,%f14,%f14
ldd [%l5+8],%f8
fmuld %f18,%f20,%f20
fmuld %f26,%f30,%f30
ldd [%l7+8],%f24
fmuld %f0,%f4,%f4
faddd %f32,%f6,%f6
fmuld %f8,%f12,%f12
faddd %f34,%f14,%f14
fmuld %f16,%f22,%f22
faddd %f20,%f36,%f20
ldd [%l6+16],%f16
fmuld %f24,%f28,%f28
faddd %f38,%f30,%f30
faddd %f2,%f6,%f6
ldd [%l4+16],%f32
faddd %f10,%f14,%f14
ldd [%l5+16],%f34
fmuld %f16,%f22,%f22
faddd %f20,%f18,%f20
ldd [%l6+8],%f36
faddd %f26,%f30,%f30
ldd [%l7+16],%f38
fmuld %f32,%f6,%f6
fmuld %f34,%f14,%f14
fmuld %f36,%f20,%f20
fmuld %f38,%f30,%f30
faddd %f6,%f4,%f6
faddd %f14,%f12,%f14
fsubd %f22,%f20,%f22
faddd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case14:
fmuld %f26,%f26,%f24
andcc %l3,1,%g0
bz,pn %icc,.case15
! delay slot
fxor %f30,%f38,%f38
fmuld %f0,pp3,%f6 ! sin(x0)
fmuld %f8,pp3,%f14 ! sin(x1)
fmuld %f16,pp3,%f22 ! sin(x2)
faddd %f6,pp2,%f6
fmuld %f0,qq2,%f4
faddd %f14,pp2,%f14
fmuld %f8,qq2,%f12
faddd %f22,pp2,%f22
fmuld %f16,qq2,%f20
fmuld %f24,qq3,%f30 ! cos(x3)
fmuld %f0,%f6,%f6
faddd %f4,qq1,%f4
fmuld %f8,%f14,%f14
faddd %f12,qq1,%f12
fmuld %f16,%f22,%f22
faddd %f20,qq1,%f20
faddd %f30,qq2,%f30
fmuld %f24,pp2,%f28
faddd %f6,pp1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
faddd %f14,pp1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
faddd %f22,pp1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
fmuld %f24,%f30,%f30
faddd %f28,pp1,%f28
fmuld %f0,%f6,%f6
fmuld %f8,%f14,%f14
fmuld %f16,%f22,%f22
faddd %f30,qq1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f2,%f6,%f6
ldd [%l4+8],%f0
fmuld %f10,%f14,%f14
ldd [%l5+8],%f8
fmuld %f18,%f22,%f22
ldd [%l6+8],%f16
fmuld %f26,%f28,%f28
fmuld %f0,%f4,%f4
faddd %f32,%f6,%f6
fmuld %f8,%f12,%f12
faddd %f34,%f14,%f14
fmuld %f16,%f20,%f20
faddd %f36,%f22,%f22
fmuld %f24,%f30,%f30
faddd %f28,%f38,%f28
ldd [%l7+16],%f24
faddd %f2,%f6,%f6
ldd [%l4+16],%f32
faddd %f10,%f14,%f14
ldd [%l5+16],%f34
faddd %f18,%f22,%f22
ldd [%l6+16],%f36
fmuld %f24,%f30,%f30
faddd %f28,%f26,%f28
ldd [%l7+8],%f38
fmuld %f32,%f6,%f6
fmuld %f34,%f14,%f14
fmuld %f36,%f22,%f22
fmuld %f38,%f28,%f28
faddd %f6,%f4,%f6
faddd %f14,%f12,%f14
faddd %f22,%f20,%f22
fsubd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.case15:
fmuld %f0,pp3,%f6 ! sin(x0)
fmuld %f8,pp3,%f14 ! sin(x1)
fmuld %f16,pp3,%f22 ! sin(x2)
fmuld %f24,pp3,%f30 ! sin(x3)
faddd %f6,pp2,%f6
fmuld %f0,qq2,%f4
faddd %f14,pp2,%f14
fmuld %f8,qq2,%f12
faddd %f22,pp2,%f22
fmuld %f16,qq2,%f20
faddd %f30,pp2,%f30
fmuld %f24,qq2,%f28
fmuld %f0,%f6,%f6
faddd %f4,qq1,%f4
fmuld %f8,%f14,%f14
faddd %f12,qq1,%f12
fmuld %f16,%f22,%f22
faddd %f20,qq1,%f20
fmuld %f24,%f30,%f30
faddd %f28,qq1,%f28
faddd %f6,pp1,%f6
fmuld %f0,%f4,%f4
add %l4,%g1,%l4
faddd %f14,pp1,%f14
fmuld %f8,%f12,%f12
add %l5,%g1,%l5
faddd %f22,pp1,%f22
fmuld %f16,%f20,%f20
add %l6,%g1,%l6
faddd %f30,pp1,%f30
fmuld %f24,%f28,%f28
add %l7,%g1,%l7
fmuld %f0,%f6,%f6
fmuld %f8,%f14,%f14
fmuld %f16,%f22,%f22
fmuld %f24,%f30,%f30
fmuld %f2,%f6,%f6
ldd [%l4+8],%f0
fmuld %f10,%f14,%f14
ldd [%l5+8],%f8
fmuld %f18,%f22,%f22
ldd [%l6+8],%f16
fmuld %f26,%f30,%f30
ldd [%l7+8],%f24
fmuld %f0,%f4,%f4
faddd %f32,%f6,%f6
fmuld %f8,%f12,%f12
faddd %f34,%f14,%f14
fmuld %f16,%f20,%f20
faddd %f36,%f22,%f22
fmuld %f24,%f28,%f28
faddd %f38,%f30,%f30
faddd %f2,%f6,%f6
ldd [%l4+16],%f32
faddd %f10,%f14,%f14
ldd [%l5+16],%f34
faddd %f18,%f22,%f22
ldd [%l6+16],%f36
faddd %f26,%f30,%f30
ldd [%l7+16],%f38
fmuld %f32,%f6,%f6
fmuld %f34,%f14,%f14
fmuld %f36,%f22,%f22
fmuld %f38,%f30,%f30
faddd %f6,%f4,%f6
faddd %f14,%f12,%f14
faddd %f22,%f20,%f22
faddd %f30,%f28,%f30
faddd %f6,%f0,%f6
faddd %f14,%f8,%f14
faddd %f22,%f16,%f22
faddd %f30,%f24,%f30
mov %l0,%l4
fnegd %f6,%f4
lda [%i1]%asi,%l0 ! preload next argument
fnegd %f14,%f12
lda [%i1]%asi,%f0
fnegd %f22,%f20
lda [%i1+4]%asi,%f3
fnegd %f30,%f28
andn %l0,%i5,%l0
add %i1,%i2,%i1
andcc %l4,2,%g0
fmovdnz %icc,%f4,%f6
st %f6,[%o0]
andcc %l1,2,%g0
fmovdnz %icc,%f12,%f14
st %f14,[%o1]
andcc %l2,2,%g0
fmovdnz %icc,%f20,%f22
st %f22,[%o2]
andcc %l3,2,%g0
fmovdnz %icc,%f28,%f30
st %f30,[%o3]
addcc %i0,-1,%i0
bg,pt %icc,.loop0
! delay slot
st %f7,[%o0+4]
ba,pt %icc,.end
! delay slot
nop
.align 16
.end:
st %f15,[%o1+4]
st %f23,[%o2+4]
st %f31,[%o3+4]
ld [%fp+biguns],%i5
tst %i5 ! check for huge arguments remaining
be,pt %icc,.exit
! delay slot
nop
#ifdef __sparcv9
ldx [%fp+xsave],%o1
ldx [%fp+ysave],%o3
#else
ld [%fp+xsave],%o1
ld [%fp+ysave],%o3
#endif
ld [%fp+nsave],%o0
ld [%fp+sxsave],%o2
ld [%fp+sysave],%o4
sra %o2,0,%o2 ! sign-extend for V9
sra %o4,0,%o4
call __vlibm_vcos_big_ultra3
sra %o5,0,%o5 ! delay slot
.exit:
ret
restore
.align 16
.last1:
faddd %f2,c3two44,%f4
st %f15,[%o1+4]
.last1_from_range1:
mov 0,%l1
fzeros %f8
fzero %f10
add %fp,junk,%o1
.last2:
faddd %f10,c3two44,%f12
st %f23,[%o2+4]
.last2_from_range2:
mov 0,%l2
fzeros %f16
fzero %f18
add %fp,junk,%o2
.last3:
faddd %f18,c3two44,%f20
st %f31,[%o3+4]
st %f5,[%fp+nk0]
st %f13,[%fp+nk1]
.last3_from_range3:
mov 0,%l3
fzeros %f24
fzero %f26
ba,pt %icc,.cont
! delay slot
add %fp,junk,%o3
.align 16
.range0:
cmp %l0,%o4
bl,pt %icc,1f ! hx < 0x3e400000
! delay slot, harmless if branch taken
sethi %hi(0x7ff00000),%o7
cmp %l0,%o7
bl,a,pt %icc,2f ! branch if finite
! delay slot, squashed if branch not taken
st %o4,[%fp+biguns] ! set biguns
fzero %f0
fmuld %f2,%f0,%f2
st %f2,[%o0]
ba,pt %icc,2f
! delay slot
st %f3,[%o0+4]
1:
fdtoi %f2,%f4 ! raise inexact if not zero
sethi %hi(0x3ff00000),%o7
st %o7,[%o0]
st %g0,[%o0+4]
2:
addcc %i0,-1,%i0
ble,pn %icc,.end
! delay slot, harmless if branch taken
add %i3,%i4,%i3 ! y += stridey
andn %l1,%i5,%l0 ! hx &= ~0x80000000
fmovs %f8,%f0
fmovs %f11,%f3
ba,pt %icc,.loop0
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 16
.range1:
cmp %l1,%o4
bl,pt %icc,1f ! hx < 0x3e400000
! delay slot, harmless if branch taken
sethi %hi(0x7ff00000),%o7
cmp %l1,%o7
bl,a,pt %icc,2f ! branch if finite
! delay slot, squashed if branch not taken
st %o4,[%fp+biguns] ! set biguns
fzero %f8
fmuld %f10,%f8,%f10
st %f10,[%o1]
ba,pt %icc,2f
! delay slot
st %f11,[%o1+4]
1:
fdtoi %f10,%f12 ! raise inexact if not zero
sethi %hi(0x3ff00000),%o7
st %o7,[%o1]
st %g0,[%o1+4]
2:
addcc %i0,-1,%i0
ble,pn %icc,.last1_from_range1
! delay slot, harmless if branch taken
add %i3,%i4,%i3 ! y += stridey
andn %l2,%i5,%l1 ! hx &= ~0x80000000
fmovs %f16,%f8
fmovs %f19,%f11
ba,pt %icc,.loop1
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 16
.range2:
cmp %l2,%o4
bl,pt %icc,1f ! hx < 0x3e400000
! delay slot, harmless if branch taken
sethi %hi(0x7ff00000),%o7
cmp %l2,%o7
bl,a,pt %icc,2f ! branch if finite
! delay slot, squashed if branch not taken
st %o4,[%fp+biguns] ! set biguns
fzero %f16
fmuld %f18,%f16,%f18
st %f18,[%o2]
ba,pt %icc,2f
! delay slot
st %f19,[%o2+4]
1:
fdtoi %f18,%f20 ! raise inexact if not zero
sethi %hi(0x3ff00000),%o7
st %o7,[%o2]
st %g0,[%o2+4]
2:
addcc %i0,-1,%i0
ble,pn %icc,.last2_from_range2
! delay slot, harmless if branch taken
add %i3,%i4,%i3 ! y += stridey
andn %l3,%i5,%l2 ! hx &= ~0x80000000
fmovs %f24,%f16
fmovs %f27,%f19
ba,pt %icc,.loop2
! delay slot
add %i1,%i2,%i1 ! x += stridex
.align 16
.range3:
cmp %l3,%o4
bl,pt %icc,1f ! hx < 0x3e400000
! delay slot, harmless if branch taken
sethi %hi(0x7ff00000),%o7
cmp %l3,%o7
bl,a,pt %icc,2f ! branch if finite
! delay slot, squashed if branch not taken
st %o4,[%fp+biguns] ! set biguns
fzero %f24
fmuld %f26,%f24,%f26
st %f26,[%o3]
ba,pt %icc,2f
! delay slot
st %f27,[%o3+4]
1:
fdtoi %f26,%f28 ! raise inexact if not zero
sethi %hi(0x3ff00000),%o7
st %o7,[%o3]
st %g0,[%o3+4]
2:
addcc %i0,-1,%i0
ble,pn %icc,.last3_from_range3
! delay slot, harmless if branch taken
add %i3,%i4,%i3 ! y += stridey
ld [%i1],%l3
ld [%i1],%f24
ld [%i1+4],%f27
andn %l3,%i5,%l3 ! hx &= ~0x80000000
ba,pt %icc,.loop3
! delay slot
add %i1,%i2,%i1 ! x += stridex
SET_SIZE(__vcos_ultra3)