__vatanf.S revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
.file "__vatanf.S"
#include "libm.h"
RO_DATA
.align 64
.CONST_TBL:
.word 0x3fefffff, 0xfffccbbc ! K0 = 9.99999999976686608841e-01
.word 0xbfd55554, 0x51c6b90f ! K1 = -3.33333091601972730504e-01
.word 0x3fc98d6d, 0x926596cc ! K2 = 1.99628540499523379702e-01
.word 0x00020000, 0x00000000 ! DC1
.word 0xfffc0000, 0x00000000 ! DC2
.word 0x7ff00000, 0x00000000 ! DC3
.word 0x3ff00000, 0x00000000 ! DONE = 1.0
.word 0x40000000, 0x00000000 ! DTWO = 2.0
! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127]
.word 0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6
.word 0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91
.word 0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac
.word 0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26
.word 0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd
.word 0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b
.word 0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741
.word 0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24
.word 0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f
.word 0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427
.word 0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225
.word 0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca
.word 0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6
.word 0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f
.word 0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867
.word 0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397
.word 0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f
.word 0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805
.word 0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5
.word 0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60
.word 0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce
.word 0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8
.word 0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c
.word 0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d
.word 0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120
.word 0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c
.word 0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d
.word 0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30
.word 0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244
.word 0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab
.word 0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949
.word 0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804
.word 0x3ff00000, 0x00000000 ! 1.0
.word 0xbff00000, 0x00000000 ! -1.0
! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155]
.word 0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f
.word 0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf
.word 0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2
.word 0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3
.word 0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19
.word 0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30
.word 0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195
.word 0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302
.word 0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a
.word 0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1
.word 0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c
.word 0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c
.word 0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700
.word 0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712
.word 0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9
.word 0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444
.word 0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d
.word 0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4
.word 0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c
.word 0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2
.word 0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc
.word 0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd
.word 0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4
.word 0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634
.word 0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e
.word 0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f
.word 0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8
.word 0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5
.word 0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857
.word 0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd
.word 0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054
.word 0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0
.word 0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f
.word 0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc
.word 0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45
.word 0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f
.word 0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665
.word 0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0
.word 0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5
.word 0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27
.word 0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38
.word 0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2
.word 0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849
.word 0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff
.word 0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619
.word 0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa
.word 0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105
.word 0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7
.word 0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc
.word 0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb
.word 0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28
.word 0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1
.word 0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94
.word 0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6
.word 0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395
.word 0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7
.word 0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e
.word 0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5
.word 0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2
.word 0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886
.word 0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5
.word 0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf
.word 0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f
.word 0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4
.word 0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b
.word 0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886
.word 0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2
.word 0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf
.word 0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5
.word 0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4
.word 0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f
.word 0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886
.word 0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b
.word 0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf
.word 0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2
.word 0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4
.word 0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5
.word 0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886
#define DC2 %f2
#define DTWO %f6
#define DONE %f52
#define K0 %f54
#define K1 %f56
#define K2 %f58
#define DC1 %f60
#define DC3 %f62
#define stridex %o2
#define stridey %o3
#define MASK_0x7fffffff %i1
#define MASK_0x100000 %i5
#define tmp_px STACK_BIAS-32
#define tmp_counter STACK_BIAS-24
#define tmp0 STACK_BIAS-16
#define tmp1 STACK_BIAS-8
#define counter %l1
! sizeof temp storage - must be a multiple of 16 for V9
#define tmps 0x20
!--------------------------------------------------------------------
! !!!!! vatanf algorithm !!!!!
! ux = ((int*)px)[0];
! ax = ux & 0x7fffffff;
!
! if ( ax < 0x39b89c55 )
! {
! *(int*)py = ux;
! goto next;
! }
!
! if ( ax > 0x4c700518 )
! {
! if ( ax > 0x7f800000 )
! {
! float fpx = fabsf(*px);
! fpx *= fpx;
! *py = fpx;
! goto next;
! }
!
! sign = ux & 0x80000000;
! sign |= pi_2;
! *(int*)py = sign;
! goto next;
! }
!
! ftmp0 = *px;
! x = (double)ftmp0;
! px += stridex;
! y = vis_fpadd32(x,DC1);
! y = vis_fand(y,DC2);
! div = x * y;
! xx = x - y;
! div += DONE;
! i = ((unsigned long long*)&div)[0];
! y0 = vis_fand(div,DC3);
! i >>= 43;
! i &= 508;
! *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
! y0 = vis_fpsub32(dtmp0, y0);
! dtmp0 = div0 * y0;
! dtmp0 = DTWO - dtmp0;
! y0 *= dtmp0;
! dtmp1 = div0 * y0;
! dtmp1 = DTWO - dtmp1;
! y0 *= dtmp1;
! ax = ux & 0x7fffffff;
! ax += 0x00100000;
! ax >>= 18;
! ax &= -8;
! res = *(double*)((char*)parr1 + ax);
! ux >>= 28;
! ux &= -8;
! dtmp0 = *(double*)((char*)sign_arr + ux);
! res *= dtmp0;
! xx *= y0;
! x2 = xx * xx;
! dtmp0 = K2 * x2;
! dtmp0 += K1;
! dtmp0 *= x2;
! dtmp0 += K0;
! dtmp0 *= xx;
! res += dtmp0;
! ftmp0 = (float)res;
! py[0] = ftmp0;
! py += stridey;
!--------------------------------------------------------------------
ENTRY(__vatanf)
save %sp,-SA(MINFRAME)-tmps,%sp
PIC_SETUP(l7)
PIC_SET(l7,.CONST_TBL,l2)
st %i0,[%fp+tmp_counter]
sllx %i2,2,stridex
sllx %i4,2,stridey
or %g0,%i3,%o1
stx %i1,[%fp+tmp_px]
ldd [%l2],K0
ldd [%l2+8],K1
ldd [%l2+16],K2
ldd [%l2+24],DC1
ldd [%l2+32],DC2
ldd [%l2+40],DC3
ldd [%l2+48],DONE
ldd [%l2+56],DTWO
add %l2,64,%i4
add %l2,64+512,%l0
add %l2,64+512+16-0x1cc*8,%l7
sethi %hi(0x100000),MASK_0x100000
sethi %hi(0x7ffffc00),MASK_0x7fffffff
add MASK_0x7fffffff,1023,MASK_0x7fffffff
sethi %hi(0x39b89c00),%o4
add %o4,0x55,%o4
sethi %hi(0x4c700400),%o5
add %o5,0x118,%o5
.begin:
ld [%fp+tmp_counter],counter
ldx [%fp+tmp_px],%i3
st %g0,[%fp+tmp_counter]
.begin1:
cmp counter,0
ble,pn %icc,.exit
nop
lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0];
and %l6,MASK_0x7fffffff,%l5 ! (0_0) ax = ux & 0x7fffffff;
lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px;
cmp %l5,%o4 ! (0_0) ax ? 0x39b89c55
bl,pn %icc,.spec0 ! (0_0) if ( ax < 0x39b89c55 )
nop
cmp %l5,%o5 ! (0_0) ax ? 0x4c700518
bg,pn %icc,.spec1 ! (0_0) if ( ax > 0x4c700518 )
nop
add %i3,stridex,%l5 ! px += stridex;
fstod %f0,%f22 ! (0_0) ftmp0 = *px;
mov %l6,%i3
lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0];
and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff;
lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px;
add %l5,stridex,%l4 ! px += stridex;
fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1);
cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55
bl,pn %icc,.update0 ! (1_0) if ( ax < 0x39b89c55 )
nop
.cont0:
cmp %o7,%o5 ! (1_0) ax ? 0x4c700518
bg,pn %icc,.update1 ! (1_0) if ( ax > 0x4c700518 )
nop
.cont1:
fstod %f0,%f20 ! (1_0) x = (double)ftmp0;
mov %l6,%l5
fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2);
fmuld %f22,%f26,%f32 ! (0_0) div = x * y;
lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0];
fsubd %f22,%f26,%f22 ! (0_0) xx = x - y;
and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff;
lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px;
add %l4,stridex,%l3 ! px += stridex;
fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1);
cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55
bl,pn %icc,.update2 ! (2_0) if ( ax < 0x39b89c55 )
faddd DONE,%f32,%f32 ! (0_0) div += done;
.cont2:
cmp %o7,%o5 ! (2_0) ax ? 0x4c700518
bg,pn %icc,.update3 ! (2_0) if ( ax > 0x4c700518 )
nop
.cont3:
std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0];
mov %l6,%l4
fstod %f0,%f18 ! (2_0) x = (double)ftmp0;
fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2);
fmuld %f20,%f26,%f30 ! (1_0) div = x * y;
lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0];
fsubd %f20,%f26,%f20 ! (1_0) xx = x - y;
and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff;
lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px;
add %l3,stridex,%i0 ! px += stridex;
fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1);
cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55
bl,pn %icc,.update4 ! (3_0) if ( ax < 0x39b89c55 )
faddd DONE,%f30,%f30 ! (1_0) div += done;
.cont4:
cmp %o7,%o5 ! (3_0) ax ? 0x4c700518
bg,pn %icc,.update5 ! (3_0) if ( ax > 0x4c700518 )
nop
.cont5:
std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0];
mov %l6,%l3
fstod %f0,%f16 ! (3_0) x = (double)ftmp0;
ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2);
fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3);
srlx %o0,43,%o0 ! (0_0) i >>= 43;
and %o0,508,%l6 ! (0_0) i &= 508;
ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fmuld %f18,%f26,%f28 ! (2_0) div = x * y;
lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0];
fsubd %f18,%f26,%f18 ! (2_0) xx = x - y;
fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0);
and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff;
lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px;
add %i0,stridex,%i2 ! px += stridex;
fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1);
cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55
bl,pn %icc,.update6 ! (4_0) if ( ax < 0x39b89c55 )
faddd DONE,%f28,%f28 ! (2_0) div += done;
.cont6:
fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (4_0) ax ? 0x4c700518
bg,pn %icc,.update7 ! (4_0) if ( ax > 0x4c700518 )
nop
.cont7:
std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0];
mov %l6,%i0
fstod %f0,%f14 ! (4_0) x = (double)ftmp0;
ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2);
fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3);
fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0;
srlx %g1,43,%g1 ! (1_0) i >>= 43;
and %g1,508,%l6 ! (1_0) i &= 508;
ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fmuld %f16,%f26,%f34 ! (3_0) div = x * y;
lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0];
fsubd %f16,%f26,%f16 ! (3_0) xx = x - y;
fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0);
add %i2,stridex,%l2 ! px += stridex;
fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff;
lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px;
fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1);
cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55
bl,pn %icc,.update8 ! (5_0) if ( ax < 0x39b89c55 )
faddd DONE,%f34,%f34 ! (3_0) div += done;
.cont8:
fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (5_0) ax ? 0x4c700518
bg,pn %icc,.update9 ! (5_0) if ( ax > 0x4c700518 )
nop
.cont9:
std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0];
mov %l6,%i2
fstod %f0,%f36 ! (5_0) x = (double)ftmp0;
fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0;
ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2);
fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3);
fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0;
srlx %o0,43,%o0 ! (2_0) i >>= 43;
and %o0,508,%l6 ! (2_0) i &= 508;
fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1;
ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fmuld %f14,%f26,%f32 ! (4_0) div = x * y;
lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0];
fsubd %f14,%f26,%f14 ! (4_0) xx = x - y;
fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1;
add %l2,stridex,%g5 ! px += stridex;
fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff;
lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px;
fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1);
cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55
bl,pn %icc,.update10 ! (6_0) if ( ax < 0x39b89c55 )
faddd DONE,%f32,%f32 ! (4_0) div += done;
.cont10:
fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (6_0) ax ? 0x4c700518
bg,pn %icc,.update11 ! (6_0) if ( ax > 0x4c700518 )
nop
.cont11:
fmuld %f22,%f26,%f22 ! (0_0) xx *= y0;
mov %l6,%l2
std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0];
fstod %f0,%f10 ! (6_0) x = (double)ftmp0;
fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0;
ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2);
fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3);
fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx;
srlx %g1,43,%g1 ! (3_0) i >>= 43;
fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0;
and %g1,508,%l6 ! (3_0) i &= 508;
mov %i3,%o7
fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1;
ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fmuld %f36,%f26,%f30 ! (5_0) div = x * y;
srl %o7,28,%g1 ! (0_0) ux >>= 28;
add %g5,stridex,%i3 ! px += stridex;
fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o0 ! (0_0) ax = ux & 0x7fffffff;
lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0];
fsubd %f36,%f26,%f36 ! (5_0) xx = x - y;
fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1;
add %o0,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000;
and %g1,-8,%g1 ! (0_0) ux &= -8;
fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff;
lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px;
fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1);
cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55
bl,pn %icc,.update12 ! (7_0) if ( ax < 0x39b89c55 )
faddd DONE,%f30,%f30 ! (5_0) div += done;
.cont12:
fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (7_0) ax ? 0x4c700518
bg,pn %icc,.update13 ! (7_0) if ( ax > 0x4c700518 )
faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1;
.cont13:
fmuld %f20,%f26,%f20 ! (1_0) xx *= y0;
srl %o0,18,%o7 ! (0_0) ax >>= 18;
std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0];
fstod %f0,%f8 ! (7_0) x = (double)ftmp0;
fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (0_0) ux &= -8;
ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2);
add %o7,%l7,%o7 ! (0_0) (char*)parr1 + ax;
mov %l6,%g5
ldd [%l0+%g1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2;
srlx %o0,43,%o0 ! (4_0) i >>= 43;
ldd [%o7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax);
fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3);
fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx;
and %o0,508,%l6 ! (4_0) i &= 508;
mov %l5,%o7
fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0;
fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0;
srl %o7,28,%l5 ! (1_0) ux >>= 28;
ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fmuld %f10,%f26,%f28 ! (6_0) div = x * y;
faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0;
subcc counter,8,counter
bneg,pn %icc,.tail
or %g0,%o1,%o0
add %fp,tmp0,%g1
lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0];
ba .main_loop
add %i3,stridex,%l5 ! px += stridex;
.align 16
.main_loop:
fsubd %f10,%f26,%f10 ! (6_1) xx = x - y;
and %o7,MASK_0x7fffffff,%o1 ! (1_1) ax = ux & 0x7fffffff;
st %f12,[%g1] ! (7_1) py[0] = ftmp0;
fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2;
fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1;
srl %o7,28,%o7 ! (1_0) ux >>= 28;
add %o1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000;
fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o1 ! (0_0) ax = ux & 0x7fffffff;
lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px;
fpadd32 %f8,DC1,%f24 ! (7_1) y = vis_fpadd32(x,dconst1);
fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx;
cmp %o1,%o4 ! (0_0) ax ? 0x39b89c55
bl,pn %icc,.update14 ! (0_0) if ( ax < 0x39b89c55 )
faddd DONE,%f28,%f28 ! (6_1) div += done;
.cont14:
fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0;
cmp %o1,%o5 ! (0_0) ax ? 0x4c700518
bg,pn %icc,.update15 ! (0_0) if ( ax > 0x4c700518 )
faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1;
.cont15:
fmuld %f18,%f26,%f18 ! (2_1) xx *= y0;
srl %g1,18,%o1 ! (1_1) ax >>= 18;
std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0];
fstod %f0,%f22 ! (0_0) ftmp0 = *px;
fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0;
and %o1,-8,%o1 ! (1_1) ax &= -8;
ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (7_1) y = vis_fand(y,dconst2);
ldd [%o1+%l7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax);
and %o7,-8,%o7 ! (1_1) ux &= -8;
mov %l6,%i3
faddd %f48,%f44,%f12 ! (0_1) res += dtmp0;
fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2;
nop
ldd [%l0+%o7],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3);
fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx;
srlx %g1,43,%g1 ! (5_1) i >>= 43;
mov %l4,%o7
fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0;
and %g1,508,%l6 ! (5_1) i &= 508;
nop
bn,pn %icc,.exit
fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0;
add %o0,stridey,%g1 ! py += stridey;
ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res;
fmuld %f8,%f26,%f34 ! (7_1) div = x * y;
srl %o7,28,%o1 ! (2_1) ux >>= 28;
lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0];
faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0;
fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o7 ! (2_1) ax = ux & 0x7fffffff;
st %f12,[%o0] ! (0_1) py[0] = ftmp0;
fsubd %f8,%f26,%f8 ! (7_1) xx = x - y;
fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1;
add %l5,stridex,%l4 ! px += stridex;
add %o7,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000;
fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff;
lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px;
fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1);
fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx;
cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55
bl,pn %icc,.update16 ! (1_0) if ( ax < 0x39b89c55 )
faddd DONE,%f34,%f34 ! (7_1) div += done;
.cont16:
fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (1_0) ax ? 0x4c700518
bg,pn %icc,.update17 ! (1_0) if ( ax > 0x4c700518 )
faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1;
.cont17:
fmuld %f16,%f26,%f16 ! (3_1) xx *= y0;
srl %o0,18,%o7 ! (2_1) ax >>= 18;
std %f34,[%fp+tmp1] ! (7_1) i = ((unsigned long long*)&div)[0];
fstod %f0,%f20 ! (1_0) x = (double)ftmp0;
fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0;
ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0];
and %o1,-8,%o1 ! (2_1) ux &= -8;
fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2);
faddd %f48,%f44,%f12 ! (1_1) res += dtmp0;
and %o7,-8,%o7 ! (2_1) ax &= -8;
ldd [%l0+%o1],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
bn,pn %icc,.exit
ldd [%o7+%l7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax);
mov %l6,%l5
fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2;
fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3);
fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx;
srlx %o0,43,%o0 ! (6_1) i >>= 43;
mov %l3,%o7
fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0;
and %o0,508,%l6 ! (6_1) i &= 508;
add %l4,stridex,%l3 ! px += stridex;
bn,pn %icc,.exit
fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0;
add %g1,stridey,%o0 ! py += stridey;
ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res;
fmuld %f22,%f26,%f32 ! (0_0) div = x * y;
srl %o7,28,%o1 ! (3_1) ux >>= 28;
lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0];
faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0;
fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o7 ! (3_1) ax = ux & 0x7fffffff;
st %f12,[%g1] ! (1_1) py[0] = ftmp0;
fsubd %f22,%f26,%f22 ! (0_0) xx = x - y;
fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1;
add %o7,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000;
and %o1,-8,%o1 ! (3_1) ux &= -8;
fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff;
lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px;
fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1);
fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx;
cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55
bl,pn %icc,.update18 ! (2_0) if ( ax < 0x39b89c55 )
faddd DONE,%f32,%f32 ! (0_0) div += done;
.cont18:
fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (2_0) ax ? 0x4c700518
bg,pn %icc,.update19 ! (2_0) if ( ax > 0x4c700518 )
faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1;
.cont19:
fmuld %f14,%f26,%f14 ! (4_1) xx *= y0;
srl %g1,18,%o7 ! (3_1) ax >>= 18;
std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0];
fstod %f0,%f18 ! (2_0) x = (double)ftmp0;
fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (3_1) ax &= -8;
ldx [%fp+tmp1],%g1 ! (7_1) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2);
faddd %f48,%f44,%f12 ! (2_1) res += dtmp0;
mov %l6,%l4
ldd [%l0+%o1],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
bn,pn %icc,.exit
fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2;
ldd [%o7+%l7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax)
nop
fand %f34,DC3,%f24 ! (7_1) y0 = vis_fand(div,dconst3);
fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx;
srlx %g1,43,%g1 ! (7_1) i >>= 43;
mov %i0,%o7
fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0;
and %g1,508,%l6 ! (7_1) i &= 508;
add %l3,stridex,%i0 ! px += stridex;
bn,pn %icc,.exit
fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0;
add %o0,stridey,%g1 ! py += stridey;
ld [%i4+%l6],%f0 ! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res;
fmuld %f20,%f26,%f30 ! (1_0) div = x * y;
srl %o7,28,%o1 ! (4_1) ux >>= 28;
lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0];
faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0;
fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o7 ! (4_1) ax = ux & 0x7fffffff;
st %f12,[%o0] ! (2_1) py[0] = ftmp0;
fsubd %f20,%f26,%f20 ! (1_0) xx = x - y;
fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1;
add %o7,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000;
and %o1,-8,%o1 ! (4_1) ux &= -8;
fpsub32 %f0,%f24,%f38 ! (7_1) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff;
lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px;
fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1);
fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx;
cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55
bl,pn %icc,.update20 ! (3_0) if ( ax < 0x39b89c55 )
faddd DONE,%f30,%f30 ! (1_0) div += done;
.cont20:
fmuld %f34,%f38,%f42 ! (7_1) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (3_0) ax ? 0x4c700518
bg,pn %icc,.update21 ! (3_0) if ( ax > 0x4c700518 )
faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1;
.cont21:
fmuld %f36,%f26,%f36 ! (5_1) xx *= y0;
srl %o0,18,%o7 ! (4_1) ax >>= 18;
std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0];
fstod %f0,%f16 ! (3_0) x = (double)ftmp0;
fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (4_1) ax &= -8;
ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2);
faddd %f48,%f44,%f12 ! (3_1) res += dtmp0;
nop
ldd [%l0+%o1],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
bn,pn %icc,.exit
ldd [%o7+%l7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax);
mov %l6,%l3
fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2;
fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3);
fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx;
srlx %o0,43,%o0 ! (0_0) i >>= 43;
mov %i2,%o7
fsubd DTWO,%f42,%f44 ! (7_1) dtmp0 = dtwo - dtmp0;
and %o0,508,%l6 ! (0_0) i &= 508;
add %i0,stridex,%i2 ! px += stridex;
bn,pn %icc,.exit
fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0;
add %g1,stridey,%o0 ! py += stridey;
ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res;
fmuld %f18,%f26,%f28 ! (2_0) div = x * y;
srl %o7,28,%o1 ! (5_1) ux >>= 28;
lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0];
faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0;
fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o7 ! (5_1) ax = ux & 0x7fffffff;
st %f12,[%g1] ! (3_1) py[0] = ftmp0;
fsubd %f18,%f26,%f18 ! (2_0) xx = x - y;
fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1;
add %o7,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000;
and %o1,-8,%o1 ! (5_1) ux &= -8;
fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f38,%f44,%f38 ! (7_1) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff;
lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px;
fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1);
fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx;
cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55
bl,pn %icc,.update22 ! (4_0) if ( ax < 0x39b89c55 )
faddd DONE,%f28,%f28 ! (2_0) div += done;
.cont22:
fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (4_0) ax ? 0x4c700518
bg,pn %icc,.update23 ! (4_0) if ( ax > 0x4c700518 )
faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1;
.cont23:
fmuld %f10,%f26,%f10 ! (6_1) xx *= y0;
srl %g1,18,%o7 ! (5_1) ax >>= 18;
std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0];
fstod %f0,%f14 ! (4_0) x = (double)ftmp0;
fmuld %f34,%f38,%f34 ! (7_1) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (5_1) ax &= -8;
ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2);
faddd %f48,%f44,%f12 ! (4_1) res += dtmp0;
mov %l6,%i0
ldd [%l0+%o1],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
bn,pn %icc,.exit
ldd [%o7+%l7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax);
nop
fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2;
fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3);
fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx;
srlx %g1,43,%g1 ! (1_0) i >>= 43;
mov %l2,%o7
fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0;
and %g1,508,%l6 ! (1_0) i &= 508;
add %i2,stridex,%l2 ! px += stridex;
bn,pn %icc,.exit
fsubd DTWO,%f34,%f46 ! (7_1) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0;
add %o0,stridey,%g1 ! py += stridey;
ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res;
fmuld %f16,%f26,%f34 ! (3_0) div = x * y;
srl %o7,28,%o1 ! (6_1) ux >>= 28;
lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0];
faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0;
fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o7 ! (6_1) ax = ux & 0x7fffffff;
st %f12,[%o0] ! (4_1) py[0] = ftmp0;
fsubd %f16,%f26,%f16 ! (3_0) xx = x - y;
fmuld %f38,%f46,%f26 ! (7_1) y0 *= dtmp1;
add %o7,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000;
and %o1,-8,%o1 ! (6_1) ux &= -8;
fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff;
lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px;
fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1);
fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx;
cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55
bl,pn %icc,.update24 ! (5_0) if ( ax < 0x39b89c55 )
faddd DONE,%f34,%f34 ! (3_0) div += done;
.cont24:
fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (5_0) ax ? 0x4c700518
bg,pn %icc,.update25 ! (5_0) if ( ax > 0x4c700518 )
faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1;
.cont25:
fmuld %f8,%f26,%f8 ! (7_1) xx *= y0;
srl %o0,18,%o7 ! (6_1) ax >>= 18;
std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0];
fstod %f0,%f36 ! (5_0) x = (double)ftmp0;
fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (6_1) ax &= -8;
ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2);
faddd %f48,%f44,%f12 ! (5_1) res += dtmp0;
mov %l6,%i2
ldd [%l0+%o1],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
bn,pn %icc,.exit
ldd [%o7+%l7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax);
nop
fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2;
fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3);
fmuld %f8,%f8,%f50 ! (7_1) x2 = xx * xx;
srlx %o0,43,%o0 ! (2_0) i >>= 43;
mov %g5,%o7
fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0;
and %o0,508,%l6 ! (2_0) i &= 508;
add %l2,stridex,%g5 ! px += stridex;
bn,pn %icc,.exit
fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0;
add %g1,stridey,%o0 ! py += stridey;
ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res;
fmuld %f14,%f26,%f32 ! (4_0) div = x * y;
srl %o7,28,%o1 ! (7_1) ux >>= 28;
lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0];
faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0;
fmuld K2,%f50,%f4 ! (7_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o7 ! (7_1) ax = ux & 0x7fffffff;
st %f12,[%g1] ! (5_1) py[0] = ftmp0;
fsubd %f14,%f26,%f14 ! (4_0) xx = x - y;
fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1;
add %o7,MASK_0x100000,%g1 ! (7_1) ax += 0x00100000;
and %o1,-8,%o1 ! (7_1) ux &= -8;
fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff;
lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px;
fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1);
fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx;
cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55
bl,pn %icc,.update26 ! (6_0) if ( ax < 0x39b89c55 )
faddd DONE,%f32,%f32 ! (4_0) div += done;
.cont26:
fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (6_0) ax ? 0x4c700518
bg,pn %icc,.update27 ! (6_0) if ( ax > 0x4c700518 )
faddd %f4,K1,%f4 ! (7_1) dtmp0 += K1;
.cont27:
fmuld %f22,%f26,%f22 ! (0_0) xx *= y0;
srl %g1,18,%o7 ! (7_1) ax >>= 18;
std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0];
fstod %f0,%f10 ! (6_0) x = (double)ftmp0;
fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (7_1) ax &= -8;
ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2);
faddd %f48,%f44,%f12 ! (6_1) res += dtmp0;
mov %l6,%l2
ldd [%l0+%o1],%f48 ! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux);
bn,pn %icc,.exit
ldd [%o7+%l7],%f0 ! (7_1) res = *(double*)((char*)parr1 + ax);
nop
fmuld %f4,%f50,%f4 ! (7_1) dtmp0 *= x2;
fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3);
fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx;
srlx %g1,43,%g1 ! (3_0) i >>= 43;
mov %i3,%o7
fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0;
and %g1,508,%l6 ! (3_0) i &= 508;
add %g5,stridex,%i3 ! px += stridex;
bn,pn %icc,.exit
fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (7_1) res *= dtmp0;
add %o0,stridey,%g1 ! py += stridey;
ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res;
fmuld %f36,%f26,%f30 ! (5_0) div = x * y;
srl %o7,28,%o1 ! (0_0) ux >>= 28;
lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0];
faddd %f4,K0,%f42 ! (7_1) dtmp0 += K0;
fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o7 ! (0_0) ax = ux & 0x7fffffff;
st %f12,[%o0] ! (6_1) py[0] = ftmp0;
fsubd %f36,%f26,%f36 ! (5_0) xx = x - y;
fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1;
add %o7,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000;
and %o1,-8,%o1 ! (0_0) ux &= -8;
fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0;
and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff;
lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px;
fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1);
fmuld %f42,%f8,%f44 ! (7_1) dtmp0 *= xx;
cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55
bl,pn %icc,.update28 ! (7_0) if ( ax < 0x39b89c55 )
faddd DONE,%f30,%f30 ! (5_0) div += done;
.cont28:
fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0;
cmp %o7,%o5 ! (7_0) ax ? 0x4c700518
bg,pn %icc,.update29 ! (7_0) if ( ax > 0x4c700518 )
faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1;
.cont29:
fmuld %f20,%f26,%f20 ! (1_0) xx *= y0;
srl %o0,18,%o7 ! (0_0) ax >>= 18;
std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0];
fstod %f0,%f8 ! (7_0) x = (double)ftmp0;
fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (0_0) ux &= -8;
ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0];
fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2);
faddd %f48,%f44,%f12 ! (7_1) res += dtmp0;
subcc counter,8,counter
ldd [%l0+%o1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
bn,pn %icc,.exit
fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2;
mov %l6,%g5
ldd [%o7+%l7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax);
fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3);
fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx;
srlx %o0,43,%l6 ! (4_0) i >>= 43;
mov %l5,%o7
fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0;
add %g1,stridey,%o0 ! py += stridey;
and %l6,508,%l6 ! (4_0) i &= 508;
bn,pn %icc,.exit
fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0;
ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
add %i3,stridex,%l5 ! px += stridex;
fdtos %f12,%f12 ! (7_1) ftmp0 = (float)res;
lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0];
fmuld %f10,%f26,%f28 ! (6_0) div = x * y;
bpos,pt %icc,.main_loop
faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0;
srl %o7,28,%l5 ! (1_0) ux >>= 28;
st %f12,[%g1] ! (7_1) py[0] = ftmp0;
.tail:
addcc counter,7,counter
bneg,pn %icc,.begin
or %g0,%o0,%o1
fsubd %f10,%f26,%f10 ! (6_1) xx = x - y;
and %o7,MASK_0x7fffffff,%g1 ! (1_1) ax = ux & 0x7fffffff;
fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2;
fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1;
add %g1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000;
and %l5,-8,%l5 ! (1_1) ux &= -8;
fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0;
fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx;
faddd DONE,%f28,%f28 ! (6_1) div += done;
fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0;
faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1;
fmuld %f18,%f26,%f18 ! (2_1) xx *= y0;
srl %g1,18,%o7 ! (1_1) ax >>= 18;
std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0];
fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (1_1) ax &= -8;
ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0];
faddd %f48,%f44,%f12 ! (0_1) res += dtmp0;
add %o7,%l7,%o7 ! (1_1) (char*)parr1 + ax;
ldd [%l0+%l5],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2;
fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3);
ldd [%o7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax);
fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx;
fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0;
srlx %g1,43,%g1 ! (5_1) i >>= 43;
and %g1,508,%l6 ! (5_1) i &= 508;
mov %l4,%o7
fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0;
add %o0,stridey,%g1 ! py += stridey;
ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res;
srl %o7,28,%l4 ! (2_1) ux >>= 28;
st %f12,[%o0] ! (0_1) py[0] = ftmp0;
faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0;
subcc counter,1,counter
bneg,pn %icc,.begin
or %g0,%g1,%o1
fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o0 ! (2_1) ax = ux & 0x7fffffff;
fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1;
add %o0,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000;
and %l4,-8,%l4 ! (2_1) ux &= -8;
fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0;
fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx;
fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0;
faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1;
fmuld %f16,%f26,%f16 ! (3_1) xx *= y0;
srl %o0,18,%o7 ! (2_1) ax >>= 18;
fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (2_1) ax &= -8;
ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0];
faddd %f48,%f44,%f12 ! (1_1) res += dtmp0;
add %o7,%l7,%o7 ! (2_1) (char*)parr1 + ax;
ldd [%l0+%l4],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2;
fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3);
ldd [%o7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax);
fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx;
fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0;
srlx %o0,43,%o0 ! (6_1) i >>= 43;
and %o0,508,%l6 ! (6_1) i &= 508;
mov %l3,%o7
fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0;
add %g1,stridey,%o0 ! py += stridey;
ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res;
srl %o7,28,%l3 ! (3_1) ux >>= 28;
st %f12,[%g1] ! (1_1) py[0] = ftmp0;
faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0;
subcc counter,1,counter
bneg,pn %icc,.begin
or %g0,%o0,%o1
fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%g1 ! (3_1) ax = ux & 0x7fffffff;
fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1;
add %g1,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000;
and %l3,-8,%l3 ! (3_1) ux &= -8;
fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0);
fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0;
fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx;
fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0;
faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1;
fmuld %f14,%f26,%f14 ! (4_1) xx *= y0;
srl %g1,18,%o7 ! (3_1) ax >>= 18;
fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (3_1) ax &= -8;
faddd %f48,%f44,%f12 ! (2_1) res += dtmp0;
add %o7,%l7,%o7 ! (3_1) (char*)parr1 + ax;
ldd [%l0+%l3],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2;
ldd [%o7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax)
fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx;
fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0;
mov %i0,%o7
fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0;
add %o0,stridey,%g1 ! py += stridey;
fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res;
srl %o7,28,%i0 ! (4_1) ux >>= 28;
st %f12,[%o0] ! (2_1) py[0] = ftmp0;
faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0;
subcc counter,1,counter
bneg,pn %icc,.begin
or %g0,%g1,%o1
fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o0 ! (4_1) ax = ux & 0x7fffffff;
fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1;
add %o0,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000;
and %i0,-8,%i0 ! (4_1) ux &= -8;
fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0;
fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx;
faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1;
fmuld %f36,%f26,%f36 ! (5_1) xx *= y0;
srl %o0,18,%o7 ! (4_1) ax >>= 18;
fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0;
and %o7,-8,%o7 ! (4_1) ax &= -8;
faddd %f48,%f44,%f12 ! (3_1) res += dtmp0;
add %o7,%l7,%o7 ! (4_1) (char*)parr1 + ax;
ldd [%l0+%i0],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2;
ldd [%o7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax);
fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx;
mov %i2,%o7
fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1;
fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0;
add %g1,stridey,%o0 ! py += stridey;
fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res;
srl %o7,28,%i2 ! (5_1) ux >>= 28;
st %f12,[%g1] ! (3_1) py[0] = ftmp0;
faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0;
subcc counter,1,counter
bneg,pn %icc,.begin
or %g0,%o0,%o1
fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%g1 ! (5_1) ax = ux & 0x7fffffff;
fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1;
add %g1,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000;
and %i2,-8,%i2 ! (5_1) ux &= -8;
fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx;
faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1;
fmuld %f10,%f26,%f10 ! (6_1) xx *= y0;
srl %g1,18,%o7 ! (5_1) ax >>= 18;
and %o7,-8,%o7 ! (5_1) ax &= -8;
faddd %f48,%f44,%f12 ! (4_1) res += dtmp0;
add %o7,%l7,%o7 ! (5_1) (char*)parr1 + ax;
ldd [%l0+%i2],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2;
ldd [%o7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax);
fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx;
mov %l2,%o7
fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0;
add %o0,stridey,%g1 ! py += stridey;
fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res;
srl %o7,28,%l2 ! (6_1) ux >>= 28;
st %f12,[%o0] ! (4_1) py[0] = ftmp0;
faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0;
subcc counter,1,counter
bneg,pn %icc,.begin
or %g0,%g1,%o1
fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2;
and %o7,MASK_0x7fffffff,%o0 ! (6_1) ax = ux & 0x7fffffff;
add %o0,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000;
and %l2,-8,%l2 ! (6_1) ux &= -8;
fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx;
faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1;
srl %o0,18,%o7 ! (6_1) ax >>= 18;
and %o7,-8,%o7 ! (6_1) ax &= -8;
faddd %f48,%f44,%f12 ! (5_1) res += dtmp0;
add %o7,%l7,%o7 ! (6_1) (char*)parr1 + ax;
ldd [%l0+%l2],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2;
ldd [%o7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax);
fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0;
add %g1,stridey,%o0 ! py += stridey;
fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res;
st %f12,[%g1] ! (5_1) py[0] = ftmp0;
faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0;
subcc counter,1,counter
bneg,pn %icc,.begin
or %g0,%o0,%o1
fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx;
faddd %f48,%f44,%f12 ! (6_1) res += dtmp0;
add %o0,stridey,%g1 ! py += stridey;
fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res;
st %f12,[%o0] ! (6_1) py[0] = ftmp0;
ba .begin
or %g0,%g1,%o1 ! py += stridey;
.exit:
ret
restore %g0,%g0,%g0
.align 16
.spec0:
add %i3,stridex,%i3 ! px += stridex;
sub counter,1,counter
st %l6,[%o1] ! *(int*)py = ux;
ba .begin1
add %o1,stridey,%o1 ! py += stridey;
.align 16
.spec1:
sethi %hi(0x7f800000),%l3
sethi %hi(0x3fc90c00),%l4 ! pi_2
sethi %hi(0x80000000),%o0
add %l4,0x3db,%l4 ! pi_2
cmp %l5,%l3 ! if ( ax > 0x7f800000 )
bg,a,pn %icc,1f
fabss %f0,%f0 ! fpx = fabsf(*px);
and %l6,%o0,%l6 ! sign = ux & 0x80000000;
or %l6,%l4,%l6 ! sign |= pi_2;
add %i3,stridex,%i3 ! px += stridex;
sub counter,1,counter
st %l6,[%o1] ! *(int*)py = sign;
ba .begin1
add %o1,stridey,%o1 ! py += stridey;
1:
fmuls %f0,%f0,%f0 ! fpx *= fpx;
add %i3,stridex,%i3 ! px += stridex
sub counter,1,counter
st %f0,[%o1] ! *py = fpx;
ba .begin1
add %o1,stridey,%o1 ! py += stridey;
.align 16
.update0:
cmp counter,1
fzeros %f0
ble,a .cont0
sethi %hi(0x3fffffff),%l6
sub counter,1,counter
st counter,[%fp+tmp_counter]
stx %l5,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont0
or %g0,1,counter
.align 16
.update1:
cmp counter,1
fzeros %f0
ble,a .cont1
sethi %hi(0x3fffffff),%l6
sub counter,1,counter
st counter,[%fp+tmp_counter]
stx %l5,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont1
or %g0,1,counter
.align 16
.update2:
cmp counter,2
fzeros %f0
ble,a .cont2
sethi %hi(0x3fffffff),%l6
sub counter,2,counter
st counter,[%fp+tmp_counter]
stx %l4,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont2
or %g0,2,counter
.align 16
.update3:
cmp counter,2
fzeros %f0
ble,a .cont3
sethi %hi(0x3fffffff),%l6
sub counter,2,counter
st counter,[%fp+tmp_counter]
stx %l4,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont3
or %g0,2,counter
.align 16
.update4:
cmp counter,3
fzeros %f0
ble,a .cont4
sethi %hi(0x3fffffff),%l6
sub counter,3,counter
st counter,[%fp+tmp_counter]
stx %l3,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont4
or %g0,3,counter
.align 16
.update5:
cmp counter,3
fzeros %f0
ble,a .cont5
sethi %hi(0x3fffffff),%l6
sub counter,3,counter
st counter,[%fp+tmp_counter]
stx %l3,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont5
or %g0,3,counter
.align 16
.update6:
cmp counter,4
fzeros %f0
ble,a .cont6
sethi %hi(0x3fffffff),%l6
sub counter,4,counter
st counter,[%fp+tmp_counter]
stx %i0,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont6
or %g0,4,counter
.align 16
.update7:
cmp counter,4
fzeros %f0
ble,a .cont7
sethi %hi(0x3fffffff),%l6
sub counter,4,counter
st counter,[%fp+tmp_counter]
stx %i0,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont7
or %g0,4,counter
.align 16
.update8:
cmp counter,5
fzeros %f0
ble,a .cont8
sethi %hi(0x3fffffff),%l6
sub counter,5,counter
st counter,[%fp+tmp_counter]
stx %i2,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont8
or %g0,5,counter
.align 16
.update9:
cmp counter,5
fzeros %f0
ble,a .cont9
sethi %hi(0x3fffffff),%l6
sub counter,5,counter
st counter,[%fp+tmp_counter]
stx %i2,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont9
or %g0,5,counter
.align 16
.update10:
cmp counter,6
fzeros %f0
ble,a .cont10
sethi %hi(0x3fffffff),%l6
sub counter,6,counter
st counter,[%fp+tmp_counter]
stx %l2,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont10
or %g0,6,counter
.align 16
.update11:
cmp counter,6
fzeros %f0
ble,a .cont11
sethi %hi(0x3fffffff),%l6
sub counter,6,counter
st counter,[%fp+tmp_counter]
stx %l2,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont11
or %g0,6,counter
.align 16
.update12:
cmp counter,7
fzeros %f0
ble,a .cont12
sethi %hi(0x3fffffff),%l6
sub counter,7,counter
st counter,[%fp+tmp_counter]
stx %g5,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont12
or %g0,7,counter
.align 16
.update13:
cmp counter,7
fzeros %f0
ble,a .cont13
sethi %hi(0x3fffffff),%l6
sub counter,7,counter
st counter,[%fp+tmp_counter]
stx %g5,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont13
or %g0,7,counter
.align 16
.update14:
cmp counter,0
fzeros %f0
ble,a .cont14
sethi %hi(0x3fffffff),%l6
sub counter,0,counter
st counter,[%fp+tmp_counter]
stx %i3,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont14
or %g0,0,counter
.align 16
.update15:
cmp counter,0
fzeros %f0
ble,a .cont15
sethi %hi(0x3fffffff),%l6
sub counter,0,counter
st counter,[%fp+tmp_counter]
stx %i3,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont15
or %g0,0,counter
.align 16
.update16:
cmp counter,1
fzeros %f0
ble,a .cont16
sethi %hi(0x3fffffff),%l6
sub counter,1,counter
st counter,[%fp+tmp_counter]
stx %l5,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont16
or %g0,1,counter
.align 16
.update17:
cmp counter,1
fzeros %f0
ble,a .cont17
sethi %hi(0x3fffffff),%l6
sub counter,1,counter
st counter,[%fp+tmp_counter]
stx %l5,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont17
or %g0,1,counter
.align 16
.update18:
cmp counter,2
fzeros %f0
ble,a .cont18
sethi %hi(0x3fffffff),%l6
sub counter,2,counter
st counter,[%fp+tmp_counter]
stx %l4,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont18
or %g0,2,counter
.align 16
.update19:
cmp counter,2
fzeros %f0
ble,a .cont19
sethi %hi(0x3fffffff),%l6
sub counter,2,counter
st counter,[%fp+tmp_counter]
stx %l4,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont19
or %g0,2,counter
.align 16
.update20:
cmp counter,3
fzeros %f0
ble,a .cont20
sethi %hi(0x3fffffff),%l6
sub counter,3,counter
st counter,[%fp+tmp_counter]
stx %l3,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont20
or %g0,3,counter
.align 16
.update21:
cmp counter,3
fzeros %f0
ble,a .cont21
sethi %hi(0x3fffffff),%l6
sub counter,3,counter
st counter,[%fp+tmp_counter]
stx %l3,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont21
or %g0,3,counter
.align 16
.update22:
cmp counter,4
fzeros %f0
ble,a .cont22
sethi %hi(0x3fffffff),%l6
sub counter,4,counter
st counter,[%fp+tmp_counter]
stx %i0,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont22
or %g0,4,counter
.align 16
.update23:
cmp counter,4
fzeros %f0
ble,a .cont23
sethi %hi(0x3fffffff),%l6
sub counter,4,counter
st counter,[%fp+tmp_counter]
stx %i0,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont23
or %g0,4,counter
.align 16
.update24:
cmp counter,5
fzeros %f0
ble,a .cont24
sethi %hi(0x3fffffff),%l6
sub counter,5,counter
st counter,[%fp+tmp_counter]
stx %i2,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont24
or %g0,5,counter
.align 16
.update25:
cmp counter,5
fzeros %f0
ble,a .cont25
sethi %hi(0x3fffffff),%l6
sub counter,5,counter
st counter,[%fp+tmp_counter]
stx %i2,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont25
or %g0,5,counter
.align 16
.update26:
cmp counter,6
fzeros %f0
ble,a .cont26
sethi %hi(0x3fffffff),%l6
sub counter,6,counter
st counter,[%fp+tmp_counter]
stx %l2,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont26
or %g0,6,counter
.align 16
.update27:
cmp counter,6
fzeros %f0
ble,a .cont27
sethi %hi(0x3fffffff),%l6
sub counter,6,counter
st counter,[%fp+tmp_counter]
stx %l2,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont27
or %g0,6,counter
.align 16
.update28:
cmp counter,7
fzeros %f0
ble,a .cont28
sethi %hi(0x3fffffff),%l6
sub counter,7,counter
st counter,[%fp+tmp_counter]
stx %g5,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont28
or %g0,7,counter
.align 16
.update29:
cmp counter,7
fzeros %f0
ble,a .cont29
sethi %hi(0x3fffffff),%l6
sub counter,7,counter
st counter,[%fp+tmp_counter]
stx %g5,[%fp+tmp_px]
sethi %hi(0x3fffffff),%l6
ba .cont29
or %g0,7,counter
SET_SIZE(__vatanf)