__vrsqrt.S revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
.file "__vrsqrt.S"
#include "libm.h"
RO_DATA
.align 64
.CONST_TBL:
.word 0xbfe00000, 0x0000002f ! K1 =-5.00000000000005209867e-01;
.word 0x3fd80000, 0x00000058 ! K2 = 3.75000000000004884257e-01;
.word 0xbfd3ffff, 0xff444bc8 ! K3 =-3.12499999317136886551e-01;
.word 0x3fd17fff, 0xff5006fe ! K4 = 2.73437499359815081532e-01;
.word 0xbfcf80bb, 0xb33ef574 ! K5 =-2.46116125605037803130e-01;
.word 0x3fcce0af, 0xf8156949 ! K6 = 2.25606914648617522896e-01;
.word 0x001fffff, 0xffffffff ! DC0
.word 0x3fe00000, 0x00000000 ! DC1
.word 0x00002000, 0x00000000 ! DC2
.word 0x7fffc000, 0x00000000 ! DC3
.word 0x0007ffff, 0xffffffff ! DC4
.word 0x43200000, 0x00000000 ! D2ON51 = pow(2,51)
.word 0x3ff00000, 0x00000000 ! DONE = 1.0
#define stridex %l5
#define stridey %l7
#define counter %l0
#define TBL %l3
#define _0x7ff00000 %o0
#define _0x00100000 %o1
#define DC0 %f56
#define DC1 %f54
#define DC2 %f48
#define DC3 %f46
#define K6 %f42
#define K5 %f20
#define K4 %f52
#define K3 %f50
#define K2 %f14
#define K1 %f12
#define DONE %f4
#define tmp_counter %g5
#define tmp_px %o5
#define tmp0 STACK_BIAS-0x40
#define tmp1 STACK_BIAS-0x38
#define tmp2 STACK_BIAS-0x30
#define tmp3 STACK_BIAS-0x28
#define tmp4 STACK_BIAS-0x20
#define tmp5 STACK_BIAS-0x18
#define tmp6 STACK_BIAS-0x10
#define tmp7 STACK_BIAS-0x08
! sizeof temp storage - must be a multiple of 16 for V9
#define tmps 0x40
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! !!!!! algorithm !!!!!
! ((float*)&res)[0] = ((float*)px)[0];
! ((float*)&res)[1] = ((float*)px)[1];
! hx = *(int*)px;
! if ( hx >= 0x7ff00000 )
! {
! res = DONE / res;
! ((float*)py)[0] = ((float*)&res)[0];
! ((float*)py)[1] = ((float*)&res)[1];
! px += stridex;
! py += stridey;
! continue;
! }
! if ( hx < 0x00100000 )
! {
! ax = hx & 0x7fffffff;
! lx = ((int*)px)[1];
!
! if ( (ax | lx) == 0 )
! {
! res = DONE / res;
! ((float*)py)[0] = ((float*)&res)[0];
! ((float*)py)[1] = ((float*)&res)[1];
! px += stridex;
! py += stridey;
! continue;
! }
! else if ( hx >= 0 )
! {
! if ( hx < 0x00080000 )
! {
! res = *(long long*)&res;
! hx = *(int*)&res - (537 << 21);
! }
! else
! {
! res = vis_fand(res,DC4);
! res = *(long long*)&res;
! res += D2ON51;
! hx = *(int*)&res - (537 << 21);
! }
! }
! else
! {
! res = sqrt(res);
! ((float*)py)[0] = ((float*)&res)[0];
! ((float*)py)[1] = ((float*)&res)[1];
! px += stridex;
! py += stridey;
! continue;
! }
! }
!
! iexp = hx >> 21;
! iexp = -iexp;
! iexp += 0x5fe;
! lexp = iexp << 52;
! dlexp = *(double*)&lexp;
! hx >>= 10;
! hx &= 0x7f8;
! hx += 8;
! hx &= -16;
!
! res = vis_fand(res,DC0);
! res = vis_for(res,DC1);
! res_c = vis_fpadd32(res,DC2);
! res_c = vis_fand(res_c,DC3);
!
! addr = (char*)arr + hx;
! dexp_hi = ((double*)addr)[0];
! dexp_lo = ((double*)addr)[1];
! dtmp0 = dexp_hi * dexp_hi;
! xx = res - res_c;
! xx *= dtmp0;
! res = K6 * xx;
! res += K5;
! res *= xx;
! res += K4;
! res *= xx;
! res += K3;
! res *= xx;
! res += K2;
! res *= xx;
! res += K1;
! res *= xx;
! res = dexp_hi * res;
! res += dexp_lo;
! res += dexp_hi;
!
! res *= dlexp;
!
! ((float*)py)[0] = ((float*)&res)[0];
! ((float*)py)[1] = ((float*)&res)[1];
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
ENTRY(__vrsqrt)
save %sp,-SA(MINFRAME)-tmps,%sp
PIC_SETUP(l7)
PIC_SET(l7,.CONST_TBL,o3)
PIC_SET(l7,__vlibm_TBL_rsqrt,l3)
wr %g0,0x82,%asi
ldd [%o3],K1
sethi %hi(0x7ff00000),%o0
mov %i3,%o4
ldd [%o3+0x08],K2
sethi %hi(0x00100000),%o1
mov %i1,tmp_px
ldd [%o3+0x10],K3
sll %i2,3,stridex
mov %i0,tmp_counter
ldd [%o3+0x18],K4
sll %i4,3,stridey
ldd [%o3+0x20],K5
ldd [%o3+0x28],K6
ldd [%o3+0x30],DC0
ldd [%o3+0x38],DC1
ldd [%o3+0x40],DC2
ldd [%o3+0x48],DC3
.begin:
mov tmp_counter,counter
mov tmp_px,%i1
clr tmp_counter
.begin1:
cmp counter,0
ble,pn %icc,.exit
ldd [%o3+0x60],DONE
lda [%i1]%asi,%f0 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
sethi %hi(0x7ffffc00),%i0
lda [%i1+4]%asi,%f1 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
add %i0,1023,%i0
fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px;
sethi %hi(0x00080000),%i4
lda [%i1+4]%asi,%l4
add %i1,stridex,%l6 ! px += stridex
sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
lda [%l6]%asi,%f8 ! (0_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
lda [%l6+4]%asi,%f9 ! (0_0) ((float*)res)[1] = ((float*)px)[1];
sra %g1,10,%o2 ! (6_1) hx >>= 10;
and %g1,%i0,%i2
cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000
bge,pn %icc,.spec0 ! (6_1) if ( hx >= 0x7ff00000 )
and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000
bl,pn %icc,.spec1 ! (6_1) if ( hx < 0x00100000 )
sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
.cont_spec:
fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
add %o2,8,%l4 ! (6_1) hx += 8;
add %o7,1534,%o7 ! (6_1) iexp += 0x5fe;
lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (6_1) iexp << 52;
and %l4,-16,%l4 ! (6_1) hx = -16;
add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx;
stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp;
add %l6,stridex,%l6 ! px += stridex
ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0];
sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
lda [%l6]%asi,%f0 ! (1_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
sra %g1,10,%o2 ! (0_0) hx >>= 10;
sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
lda [%l6+4]%asi,%f1 ! (1_0) ((float*)res)[1] = ((float*)px)[1];
cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 )
fand %f18,DC3,%f6 ! (6_1) res_c = vis_fand(res_c,DC3);
.cont0:
and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
fmuld %f30,%f30,%f10 ! (6_1) dtmp0 = dexp_hi * dexp_hi;
cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000
bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 )
add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
.cont1:
fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
add %o2,8,%l2 ! (0_0) hx += 8;
fsubd %f44,%f6,%f6 ! (6_1) xx = res - res_c;
lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (0_0) iexp << 52;
and %l2,-16,%l2 ! (0_0) hx = -16;
add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx;
add %l6,stridex,%l6 ! px += stridex
stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp;
fmuld %f6,%f10,%f26 ! (6_1) xx *= dtmp0;
ldd [%l2],%f10 ! (0_0) dtmp0 = ((double*)addr)[0];
sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
lda [%l6]%asi,%f6 ! (2_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
sra %g1,10,%o2 ! (1_0) hx >>= 10;
cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 )
lda [%l6+4]%asi,%f7 ! (2_0) ((float*)res)[1] = ((float*)px)[1];
.cont2:
fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
fmuld %f10,%f10,%f10 ! (0_0) dtmp0 = dexp_hi * dexp_hi;
cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000
bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 )
and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
.cont3:
sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
add %o7,1534,%o7 ! (1_0) iexp += 0x5fe;
fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2);
fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx;
add %o2,8,%i2 ! (1_0) hx += 8;
fsubd %f28,%f8,%f32 ! (0_0) xx = res - res_c;
lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (1_0) iexp << 52;
and %i2,-16,%i2 ! (1_0) hx = -16;
add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx;
stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp;
fmuld %f32,%f10,%f32 ! (0_0) xx *= dtmp0;
add %l6,stridex,%l6 ! px += stridex
ldd [%i2],%f10 ! (1_0) dtmp0 = ((double*)addr)[0];
faddd %f62,K5,%f62 ! (6_1) res += K5;
sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
lda [%l6]%asi,%f0 ! (3_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
sra %g1,10,%o2 ! (2_0) hx >>= 10;
cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 )
lda [%l6+4]%asi,%f1 ! (3_0) ((float*)res)[1] = ((float*)px)[1];
.cont4:
fmuld %f62,%f26,%f40 ! (6_1) res *= xx;
fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
fmuld %f10,%f10,%f10 ! (1_0) dtmp0 = dexp_hi * dexp_hi;
cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000
bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 )
and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
.cont5:
sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
add %o7,1534,%o7 ! (2_0) iexp += 0x5fe;
fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2);
fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx;
add %o2,8,%i4 ! (2_0) hx += 8;
fsubd %f44,%f8,%f6 ! (1_0) xx = res - res_c;
faddd %f40,K4,%f40 ! (6_1) res += K4;
lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (2_0) iexp << 52;
and %i4,-16,%i4 ! (2_0) hx = -16;
add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx;
stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp;
fmuld %f6,%f10,%f38 ! (1_0) xx *= dtmp0;
ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0];
faddd %f62,K5,%f62 ! (0_0) res += K5;
fmuld %f40,%f26,%f34 ! (6_1) res *= xx;
add %l6,stridex,%l6 ! px += stridex
sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
lda [%l6]%asi,%f8 ! (4_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
sra %g1,10,%o2 ! (3_0) hx >>= 10;
cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 )
lda [%l6+4]%asi,%f9 ! (4_0) ((float*)res)[1] = ((float*)px)[1];
.cont6:
fmuld %f62,%f32,%f60 ! (0_0) res *= xx;
cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000
fand %f18,DC3,%f22 ! (2_0) res_c = vis_fand(res_c,DC3);
fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi;
bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 )
and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
faddd %f34,K3,%f6 ! (6_1) res += K3;
.cont7:
sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
fand %f8,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
add %o7,1534,%o7 ! (3_0) iexp += 0x5fe;
fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2);
fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx;
add %o2,8,%i5 ! (3_0) hx += 8;
fsubd %f28,%f22,%f28 ! (2_0) xx = res - res_c;
fmuld %f6,%f26,%f22 ! (6_1) res *= xx;
faddd %f60,K4,%f60 ! (0_0) res += K4;
lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (3_0) iexp << 52;
and %i5,-16,%i5 ! (3_0) hx = -16;
add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx;
stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp;
fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0;
add %l6,stridex,%i0 ! px += stridex
ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0];
faddd %f62,K5,%f62 ! (1_0) res += K5;
faddd %f22,K2,%f10 ! (6_1) res += K2;
fmuld %f60,%f32,%f34 ! (0_0) res *= xx;
sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
lda [%i0]%asi,%f0 ! (5_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
sra %g1,10,%o2 ! (4_0) hx >>= 10;
cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 )
lda [%i0+4]%asi,%f1 ! (5_0) ((float*)res)[1] = ((float*)px)[1];
.cont8:
fand %f18,DC3,%f40 ! (3_0) res_c = vis_fand(res_c,DC3);
fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
fmuld %f10,%f26,%f58 ! (6_1) res *= xx;
cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000
and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
faddd %f34,K3,%f60 ! (0_0) res += K3;
fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi;
bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 )
sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
.cont9:
add %o7,1534,%o7 ! (4_0) iexp += 0x5fe;
fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2);
fmuld K6,%f36,%f10 ! (2_0) res = K6 * xx;
add %o2,8,%l1 ! (4_0) hx += 8;
fsubd %f44,%f40,%f44 ! (3_0) xx = res - res_c;
fmuld %f60,%f32,%f60 ! (0_0) res *= xx;
faddd %f62,K4,%f6 ! (1_0) res += K4;
lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (4_0) iexp << 52;
and %l1,-16,%l1 ! (4_0) hx = -16;
faddd %f58,K1,%f58 ! (6_1) res += K1;
add %i0,stridex,%i1 ! px += stridex
add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx;
stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp;
fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0;
ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0];
faddd %f10,K5,%f62 ! (2_0) res += K5;
fmuld %f6,%f38,%f34 ! (1_0) res *= xx;
sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
nop
faddd %f60,K2,%f60 ! (0_0) res += K2;
for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
fmuld %f58,%f26,%f26 ! (6_1) res *= xx;
sra %g1,10,%o2 ! (5_0) hx >>= 10;
cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000
bge,pn %icc,.update10 ! (5_0) if ( hx >= 0x7ff00000 )
lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
.cont10:
fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3);
fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
fmuld %f60,%f32,%f58 ! (0_0) res *= xx;
cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000
and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
faddd %f34,K3,%f34 ! (1_0) res += K3;
fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res;
bl,pn %icc,.update11 ! (5_0) if ( hx < 0x00100000 )
nop
fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
.cont11:
ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1];
fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi;
fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2);
fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx;
add %o2,8,%i3 ! (5_0) hx += 8;
fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c;
fmuld %f34,%f38,%f24 ! (1_0) res *= xx;
or %g0,%o4,%i0
cmp counter,7
bl,pn %icc,.tail
faddd %f62,K4,%f34 ! (2_0) res += K4;
ba .main_loop
sub counter,7,counter ! counter
.align 16
.main_loop:
add %o7,1534,%o7 ! (5_0) iexp += 0x5fe;
and %i3,-16,%i3 ! (5_1) hx = -16;
lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px;
faddd %f58,K1,%f58 ! (0_1) res += K1;
add %i3,TBL,%i3 ! (5_1) addr = (char*)arr + hx;
sllx %o7,52,%o7 ! (5_1) iexp << 52;
stx %o7,[%fp+tmp0] ! (5_1) dlexp = *(double*)lexp;
faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo;
faddd %f22,K5,%f62 ! (3_1) res += K5;
add %i1,stridex,%l6 ! px += stridex
ldd [%i3],%f22 ! (5_1) dtmp0 = ((double*)addr)[0];
fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0;
faddd %f24,K2,%f26 ! (1_1) res += K2;
add %i0,stridey,%i1 ! px += stridey
ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0];
fmuld %f34,%f36,%f34 ! (2_1) res *= xx;
fmuld %f58,%f32,%f58 ! (0_1) res *= xx;
sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
lda [%l6]%asi,%f0 ! (0_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
lda [%l6+4]%asi,%f1 ! (0_0) ((float*)res)[1] = ((float*)px)[1];
sra %g1,10,%o2 ! (6_1) hx >>= 10;
fmuld %f22,%f22,%f10 ! (5_1) dtmp0 = dexp_hi * dexp_hi;
faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi;
fmuld %f62,%f40,%f32 ! (3_1) res *= xx;
cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000
ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp;
fand %f18,DC3,%f8 ! (5_1) res_c = vis_fand(res_c,DC3);
fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
bge,pn %icc,.update12 ! (6_1) if ( hx >= 0x7ff00000 )
and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
faddd %f34,K3,%f34 ! (2_1) res += K3;
.cont12:
fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res;
cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000
sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
fand %f0,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp;
bl,pn %icc,.update13 ! (6_1) if ( hx < 0x00100000 )
ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1];
fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
.cont13:
fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx;
add %o2,8,%l4 ! (6_1) hx += 8;
st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0];
fsubd %f28,%f8,%f6 ! (5_1) xx = res - res_c;
fmuld %f34,%f36,%f28 ! (2_1) res *= xx;
add %o7,1534,%o7 ! (6_1) iexp += 0x5fe;
st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1];
faddd %f32,K4,%f32 ! (3_1) res += K4;
lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (6_1) iexp << 52;
and %l4,-16,%l4 ! (6_1) hx = -16;
faddd %f26,K1,%f26 ! (1_1) res += K1;
add %i1,stridey,%i0 ! px += stridey
add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx;
stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp;
faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo;
fmuld %f6,%f10,%f58 ! (5_1) xx *= dtmp0;
add %l6,stridex,%l6 ! px += stridex
ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0];
faddd %f62,K5,%f62 ! (4_1) res += K5;
fmuld %f32,%f40,%f34 ! (3_1) res *= xx;
sra %g1,10,%o2 ! (0_0) hx >>= 10;
ldd [%i2],%f4 ! (1_1) dexp_hi = ((double*)addr)[0];
faddd %f28,K2,%f32 ! (2_1) res += K2;
fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
lda [%l6]%asi,%f6 ! (1_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
fmuld %f30,%f30,%f30 ! (6_1) dtmp0 = dexp_hi * dexp_hi;
sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
lda [%l6+4]%asi,%f7 ! (1_0) ((float*)res)[1] = ((float*)px)[1];
faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi;
fmuld %f62,%f60,%f38 ! (4_1) res *= xx;
cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp;
fand %f18,DC3,%f8 ! (6_1) res_c = vis_fand(res_c,DC3);
fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
bge,pn %icc,.update14 ! (0_0) if ( hx >= 0x7ff00000 )
and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
faddd %f34,K3,%f34 ! (3_1) res += K3;
.cont14:
fmuld %f4,%f26,%f26 ! (1_1) res = dexp_hi * res;
cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000
add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
fand %f6,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp;
bl,pn %icc,.update15 ! (0_0) if ( hx < 0x00100000 )
ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1];
fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
.cont15:
fmuld K6,%f58,%f62 ! (5_1) res = K6 * xx;
add %o2,8,%l2 ! (0_0) hx += 8;
st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0];
fsubd %f44,%f8,%f10 ! (6_1) xx = res - res_c;
fmuld %f34,%f40,%f44 ! (3_1) res *= xx;
nop
st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1];
faddd %f38,K4,%f38 ! (4_1) res += K4;
lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (0_0) iexp << 52;
and %l2,-16,%l2 ! (0_0) hx = -16;
faddd %f32,K1,%f32 ! (2_1) res += K1;
add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx;
add %l6,stridex,%l6 ! px += stridex
stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp;
faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo;
fmuld %f10,%f30,%f26 ! (6_1) xx *= dtmp0;
add %i0,stridey,%i1 ! px += stridey
ldd [%l2],%f30 ! (0_0) dtmp0 = ((double*)addr)[0];
faddd %f62,K5,%f62 ! (5_1) res += K5;
fmuld %f38,%f60,%f34 ! (4_1) res *= xx;
sra %g1,10,%o2 ! (1_0) hx >>= 10;
ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0];
faddd %f44,K2,%f38 ! (3_1) res += K2;
fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
lda [%l6]%asi,%f0 ! (2_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
fmuld %f30,%f30,%f30 ! (0_0) dtmp0 = dexp_hi * dexp_hi;
cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
lda [%l6+4]%asi,%f1 ! (2_0) ((float*)res)[1] = ((float*)px)[1];
faddd %f8,%f4,%f4 ! (1_1) res += dexp_hi;
fmuld %f62,%f58,%f36 ! (5_1) res *= xx;
bge,pn %icc,.update16 ! (1_0) if ( hx >= 0x7ff00000 )
ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp;
fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
.cont16:
fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000
and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
faddd %f34,K3,%f34 ! (4_1) res += K3;
fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res;
bl,pn %icc,.update17 ! (1_0) if ( hx < 0x00100000 )
sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
.cont17:
fmuld %f4,%f62,%f2 ! (1_1) res *= dlexp;
add %o7,1534,%o7 ! (1_0) iexp += 0x5fe;
ldd [%i4+8],%f4 ! (2_1) dexp_lo = ((double*)addr)[1];
fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2);
fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx;
add %o2,8,%i2 ! (1_0) hx += 8;
st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0];
fsubd %f28,%f8,%f6 ! (0_0) xx = res - res_c;
fmuld %f34,%f60,%f28 ! (4_1) res *= xx;
nop
st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1];
faddd %f36,K4,%f36 ! (5_1) res += K4;
lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (1_0) iexp << 52;
and %i2,-16,%i2 ! (1_0) hx = -16;
faddd %f38,K1,%f38 ! (3_1) res += K1;
add %i1,stridey,%i0 ! px += stridey
add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx;
stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp;
faddd %f32,%f4,%f8 ! (2_1) res += dexp_lo;
fmuld %f6,%f30,%f32 ! (0_0) xx *= dtmp0;
add %l6,stridex,%l6 ! px += stridex
ldd [%i2],%f30 ! (1_0) dtmp0 = ((double*)addr)[0];
faddd %f62,K5,%f62 ! (6_1) res += K5;
fmuld %f36,%f58,%f34 ! (5_1) res *= xx;
sra %g1,10,%o2 ! (2_0) hx >>= 10;
ldd [%i5],%f4 ! (3_1) dexp_hi = ((double*)addr)[0];
faddd %f28,K2,%f36 ! (4_1) res += K2;
fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
lda [%l6]%asi,%f6 ! (3_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
fmuld %f30,%f30,%f30 ! (1_0) dtmp0 = dexp_hi * dexp_hi;
cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
lda [%l6+4]%asi,%f7 ! (3_0) ((float*)res)[1] = ((float*)px)[1];
faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi;
fmuld %f62,%f26,%f40 ! (6_1) res *= xx;
bge,pn %icc,.update18 ! (2_0) if ( hx >= 0x7ff00000 )
ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp;
fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
.cont18:
fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000
and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
faddd %f34,K3,%f34 ! (5_1) res += K3;
fmuld %f4,%f38,%f38 ! (3_1) res = dexp_hi * res;
bl,pn %icc,.update19 ! (2_0) if ( hx < 0x00100000 )
sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
.cont19:
fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp;
add %o7,1534,%o7 ! (2_0) iexp += 0x5fe;
ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1];
fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2);
fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx;
add %o2,8,%i4 ! (2_0) hx += 8;
st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0];
fsubd %f44,%f8,%f10 ! (1_0) xx = res - res_c;
fmuld %f34,%f58,%f44 ! (5_1) res *= xx;
nop
st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1];
faddd %f40,K4,%f40 ! (6_1) res += K4;
lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (2_0) iexp << 52;
and %i4,-16,%i4 ! (2_0) hx = -16;
faddd %f36,K1,%f36 ! (4_1) res += K1;
add %l6,stridex,%l6 ! px += stridex
add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx;
stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp;
faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo;
fmuld %f10,%f30,%f38 ! (1_0) xx *= dtmp0;
add %i0,stridey,%i1 ! px += stridey
ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0];
faddd %f62,K5,%f62 ! (0_0) res += K5;
fmuld %f40,%f26,%f34 ! (6_1) res *= xx;
sra %g1,10,%o2 ! (3_0) hx >>= 10;
ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0];
faddd %f44,K2,%f40 ! (5_1) res += K2;
fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
lda [%l6]%asi,%f0 ! (4_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi;
cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
lda [%l6+4]%asi,%f1 ! (4_0) ((float*)res)[1] = ((float*)px)[1];
faddd %f8,%f4,%f8 ! (3_1) res += dexp_hi;
fmuld %f62,%f32,%f60 ! (0_0) res *= xx;
bge,pn %icc,.update20 ! (3_0) if ( hx >= 0x7ff00000 )
ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp;
fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3);
.cont20:
fmuld %f40,%f58,%f40 ! (5_1) res *= xx;
cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000
and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
faddd %f34,K3,%f10 ! (6_1) res += K3;
fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res;
bl,pn %icc,.update21 ! (3_0) if ( hx < 0x00100000 )
sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
.cont21:
fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp;
add %o7,1534,%o7 ! (3_0) iexp += 0x5fe;
ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1];
fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2);
fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx;
add %o2,8,%i5 ! (3_0) hx += 8;
st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0];
fsubd %f28,%f4,%f28 ! (2_0) xx = res - res_c;
fmuld %f10,%f26,%f4 ! (6_1) res *= xx;
nop
st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1];
faddd %f60,K4,%f60 ! (0_0) res += K4;
lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px;
sllx %o7,52,%o7 ! (3_0) iexp << 52;
and %i5,-16,%i5 ! (3_0) hx = -16;
faddd %f40,K1,%f40 ! (5_1) res += K1;
add %l6,stridex,%i0 ! px += stridex
add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx;
stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp;
faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo;
fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0;
add %i1,stridey,%l6 ! px += stridey
ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0];
faddd %f62,K5,%f62 ! (1_0) res += K5;
faddd %f4,K2,%f10 ! (6_1) res += K2;
sra %g1,10,%o2 ! (4_0) hx >>= 10;
nop
fmuld %f60,%f32,%f34 ! (0_0) res *= xx;
fmuld %f40,%f58,%f40 ! (5_1) res *= xx;
sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
lda [%i0]%asi,%f6 ! (5_0) ((float*)res)[0] = ((float*)px)[0];
for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi;
cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
lda [%i0+4]%asi,%f7 ! (5_0) ((float*)res)[1] = ((float*)px)[1];
faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi;
fand %f18,DC3,%f8 ! (3_0) res_c = vis_fand(res_c,DC3);
bge,pn %icc,.update22 ! (4_0) if ( hx >= 0x7ff00000 )
ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp;
fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
.cont22:
fmuld %f10,%f26,%f58 ! (6_1) res *= xx;
cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000
and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
faddd %f34,K3,%f60 ! (0_0) res += K3;
fmuld %f22,%f40,%f40 ! (5_1) res = dexp_hi * res;
bl,pn %icc,.update23 ! (4_0) if ( hx < 0x00100000 )
sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
.cont23:
fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp;
add %o7,1534,%o7 ! (4_0) iexp += 0x5fe;
ldd [%i3+8],%f34 ! (5_1) dexp_lo = ((double*)addr)[1];
fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2);
fmuld K6,%f36,%f30 ! (2_0) res = K6 * xx;
add %o2,8,%l1 ! (4_0) hx += 8;
st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0];
fsubd %f44,%f8,%f44 ! (3_0) xx = res - res_c;
fmuld %f60,%f32,%f60 ! (0_0) res *= xx;
sllx %o7,52,%o7 ! (4_0) iexp << 52;
st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1];
faddd %f62,K4,%f6 ! (1_0) res += K4;
lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px;
add %i0,stridex,%i1 ! px += stridex
and %l1,-16,%l1 ! (4_0) hx = -16;
faddd %f58,K1,%f58 ! (6_1) res += K1;
add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx;
add %l6,stridey,%i0 ! px += stridey
stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp;
faddd %f40,%f34,%f8 ! (5_1) res += dexp_lo;
fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0;
nop
ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0];
faddd %f30,K5,%f62 ! (2_0) res += K5;
fmuld %f6,%f38,%f34 ! (1_0) res *= xx;
sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
ldd [%l4],%f30 ! (6_1) dexp_hi = ((double*)addr)[0];
faddd %f60,K2,%f60 ! (0_0) res += K2;
for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
fmuld %f58,%f26,%f26 ! (6_1) res *= xx;
fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi;
cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000
lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
faddd %f8,%f22,%f22 ! (5_1) res += dexp_hi;
fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3);
bge,pn %icc,.update24 ! (5_0) if ( hx >= 0x7ff00000 )
ldd [%fp+tmp0],%f18 ! (5_1) dlexp = *(double*)lexp;
fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
.cont24:
fmuld %f60,%f32,%f58 ! (0_0) res *= xx;
sra %g1,10,%o2 ! (5_0) hx >>= 10;
cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000
faddd %f34,K3,%f34 ! (1_0) res += K3;
fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res;
bl,pn %icc,.update25 ! (5_0) if ( hx < 0x00100000 )
and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
.cont25:
fmuld %f22,%f18,%f2 ! (5_1) res *= dlexp;
subcc counter,7,counter ! counter -= 7;
ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1];
fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2);
fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx;
add %o2,8,%i3 ! (5_0) hx += 8;
st %f2,[%l6] ! (5_1) ((float*)py)[0] = ((float*)res)[0];
fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c;
fmuld %f34,%f38,%f24 ! (1_0) res *= xx;
st %f3,[%l6+4] ! (5_1) ((float*)py)[1] = ((float*)res)[1];
bpos,pt %icc,.main_loop
faddd %f62,K4,%f34 ! (2_0) res += K4;
add counter,7,counter
.tail:
add %o7,1534,%o7 ! (5_0) iexp += 0x5fe;
subcc counter,1,counter
bneg,a .begin
mov %i0,%o4
faddd %f58,K1,%f58 ! (0_1) res += K1;
faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo;
faddd %f22,K5,%f62 ! (3_1) res += K5;
fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0;
faddd %f24,K2,%f26 ! (1_1) res += K2;
add %i1,stridex,%l6 ! px += stridex
ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0];
fmuld %f34,%f36,%f34 ! (2_1) res *= xx;
fmuld %f58,%f32,%f58 ! (0_1) res *= xx;
add %i0,stridey,%i1 ! px += stridey
faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi;
fmuld %f62,%f40,%f32 ! (3_1) res *= xx;
ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp;
fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
faddd %f34,K3,%f34 ! (2_1) res += K3;
fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res;
fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp;
ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1];
fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx;
st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0];
fmuld %f34,%f36,%f28 ! (2_1) res *= xx;
st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1];
faddd %f32,K4,%f32 ! (3_1) res += K4;
subcc counter,1,counter
bneg,a .begin
mov %i1,%o4
faddd %f26,K1,%f26 ! (1_1) res += K1;
faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo;
add %l6,stridex,%l6 ! px += stridex
faddd %f62,K5,%f62 ! (4_1) res += K5;
fmuld %f32,%f40,%f34 ! (3_1) res *= xx;
add %i1,stridey,%i0 ! px += stridey
ldd [%i2],%f22 ! (1_1) dexp_hi = ((double*)addr)[0];
faddd %f28,K2,%f32 ! (2_1) res += K2;
fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi;
fmuld %f62,%f60,%f38 ! (4_1) res *= xx;
ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp;
fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
faddd %f34,K3,%f34 ! (3_1) res += K3;
fmuld %f22,%f26,%f26 ! (1_1) res = dexp_hi * res;
fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp;
ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1];
st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0];
fmuld %f34,%f40,%f44 ! (3_1) res *= xx;
st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1];
faddd %f38,K4,%f38 ! (4_1) res += K4;
subcc counter,1,counter
bneg,a .begin
mov %i0,%o4
faddd %f32,K1,%f32 ! (2_1) res += K1;
add %l6,stridex,%l6 ! px += stridex
faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo;
add %i0,stridey,%i1 ! px += stridey
fmuld %f38,%f60,%f34 ! (4_1) res *= xx;
ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0];
faddd %f44,K2,%f38 ! (3_1) res += K2;
fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
faddd %f8,%f22,%f22 ! (1_1) res += dexp_hi;
ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp;
fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
faddd %f34,K3,%f34 ! (4_1) res += K3;
fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res;
fmuld %f22,%f62,%f2 ! (1_1) res *= dlexp;
ldd [%i4+8],%f22 ! (2_1) dexp_lo = ((double*)addr)[1];
st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0];
fmuld %f34,%f60,%f28 ! (4_1) res *= xx;
st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1];
subcc counter,1,counter
bneg,a .begin
mov %i1,%o4
faddd %f38,K1,%f38 ! (3_1) res += K1;
faddd %f32,%f22,%f8 ! (2_1) res += dexp_lo;
add %l6,stridex,%l6 ! px += stridex
add %i1,stridey,%i0 ! px += stridey
ldd [%i5],%f22 ! (3_1) dexp_hi = ((double*)addr)[0];
faddd %f28,K2,%f36 ! (4_1) res += K2;
fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi;
ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp;
fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
fmuld %f22,%f38,%f38 ! (3_1) res = dexp_hi * res;
fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp;
ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1];
st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0];
st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1];
subcc counter,1,counter
bneg,a .begin
mov %i0,%o4
faddd %f36,K1,%f36 ! (4_1) res += K1;
faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo;
add %i0,stridey,%i1 ! px += stridey
add %l6,stridex,%l6 ! px += stridex
ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0];
fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
faddd %f8,%f22,%f8 ! (3_1) res += dexp_hi;
ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp;
fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res;
fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp;
ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1];
st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0];
st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1];
subcc counter,1,counter
bneg,a .begin
mov %i1,%o4
faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo;
add %l6,stridex,%i0 ! px += stridex
add %i1,stridey,%l6 ! px += stridey
faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi;
ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp;
fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp;
st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0];
st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1];
ba .begin
add %i1,stridey,%o4
.align 16
.spec0:
fdivd DONE,%f0,%f0 ! res = DONE / res;
add %i1,stridex,%i1 ! px += stridex
st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0];
st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1];
add %o4,stridey,%o4 ! py += stridey
ba .begin1
sub counter,1,counter
.align 16
.spec1:
orcc %i2,%l4,%g0
bz,a 2f
fdivd DONE,%f0,%f0 ! res = DONE / res;
cmp %g1,0
bl,a 2f
fsqrtd %f0,%f0 ! res = sqrt(res);
cmp %g1,%i4
bge,a 1f
ldd [%o3+0x50],%f18
fxtod %f0,%f0 ! res = *(long long*)&res;
st %f0,[%fp+tmp0]
fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
ld [%fp+tmp0],%g1
sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
sra %g1,10,%o2 ! (6_1) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
ba .cont_spec
sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
1:
fand %f0,%f18,%f0 ! res = vis_fand(res,DC4);
ldd [%o3+0x58],%f28
fxtod %f0,%f0 ! res = *(long long*)&res;
faddd %f0,%f28,%f0 ! res += D2ON51;
st %f0,[%fp+tmp0]
fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
ld [%fp+tmp0],%g1
sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
sra %g1,10,%o2 ! (6_1) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
ba .cont_spec
sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
2:
add %i1,stridex,%i1 ! px += stridex
st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0];
st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1];
add %o4,stridey,%o4 ! py += stridey
ba .begin1
sub counter,1,counter
.align 16
.update0:
cmp counter,1
ble .cont0
nop
sub %l6,stridex,tmp_px
sub counter,1,tmp_counter
ba .cont0
mov 1,counter
.align 16
.update1:
cmp counter,1
ble .cont1
sub %l6,stridex,%i1
ld [%i1+4],%i2
cmp %g1,0
bl 1f
orcc %g1,%i2,%g0
bz 1f
sethi %hi(0x00080000),%i3
cmp %g1,%i3
bge,a 2f
ldd [%o3+0x50],%f18
fxtod %f8,%f8 ! res = *(long long*)&res;
st %f8,[%fp+tmp7]
fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (0_0) hx >>= 10;
for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
sub %o7,537,%o7
sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
ba .cont1
add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
2:
fand %f8,%f18,%f8
fxtod %f8,%f8 ! res = *(long long*)&res;
ldd [%o3+0x58],%f18
faddd %f8,%f18,%f8
st %f8,[%fp+tmp7]
fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (0_0) hx >>= 10;
for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
sub %o7,537,%o7
sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
ba .cont1
add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
1:
sub %l6,stridex,tmp_px
sub counter,1,tmp_counter
ba .cont1
mov 1,counter
.align 16
.update2:
cmp counter,2
ble .cont2
nop
sub %l6,stridex,tmp_px
sub counter,2,tmp_counter
ba .cont2
mov 2,counter
.align 16
.update3:
cmp counter,2
ble .cont3
sub %l6,stridex,%i1
ld [%i1+4],%i2
cmp %g1,0
bl 1f
orcc %g1,%i2,%g0
bz 1f
sethi %hi(0x00080000),%i3
cmp %g1,%i3
bge,a 2f
ldd [%o3+0x50],%f18
fxtod %f0,%f0 ! res = *(long long*)&res;
st %f0,[%fp+tmp7]
fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
sra %g1,10,%o2 ! (1_0) hx >>= 10;
sub %o7,537,%o7
ba .cont3
and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
2:
fand %f0,%f18,%f0
fxtod %f0,%f0 ! res = *(long long*)&res;
ldd [%o3+0x58],%f18
faddd %f0,%f18,%f0
st %f0,[%fp+tmp7]
fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
sra %g1,10,%o2 ! (1_0) hx >>= 10;
sub %o7,537,%o7
ba .cont3
and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
1:
sub %l6,stridex,tmp_px
sub counter,2,tmp_counter
ba .cont3
mov 2,counter
.align 16
.update4:
cmp counter,3
ble .cont4
nop
sub %l6,stridex,tmp_px
sub counter,3,tmp_counter
ba .cont4
mov 3,counter
.align 16
.update5:
cmp counter,3
ble .cont5
sub %l6,stridex,%i1
ld [%i1+4],%i3
cmp %g1,0
bl 1f
orcc %g1,%i3,%g0
bz 1f
sethi %hi(0x00080000),%i4
cmp %g1,%i4
bge,a 2f
ldd [%o3+0x50],%f18
fxtod %f6,%f6 ! res = *(long long*)&res;
st %f6,[%fp+tmp7]
fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (2_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
ba .cont5
for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
2:
fand %f6,%f18,%f6
fxtod %f6,%f6 ! res = *(long long*)&res;
ldd [%o3+0x58],%f18
faddd %f6,%f18,%f6
st %f6,[%fp+tmp7]
fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (2_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
ba .cont5
for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
1:
sub %l6,stridex,tmp_px
sub counter,3,tmp_counter
ba .cont5
mov 3,counter
.align 16
.update6:
cmp counter,4
ble .cont6
nop
sub %l6,stridex,tmp_px
sub counter,4,tmp_counter
ba .cont6
mov 4,counter
.align 16
.update7:
sub %l6,stridex,%i1
cmp counter,4
ble .cont7
faddd %f34,K3,%f6 ! (6_1) res += K3;
ld [%i1+4],%i3
cmp %g1,0
bl 1f
orcc %g1,%i3,%g0
bz 1f
sethi %hi(0x00080000),%i5
cmp %g1,%i5
bge,a 2f
ldd [%o3+0x50],%f18
fxtod %f0,%f0 ! res = *(long long*)&res;
st %f0,[%fp+tmp7]
fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (3_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
ba .cont7
for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
2:
fand %f0,%f18,%f0
fxtod %f0,%f0 ! res = *(long long*)&res;
ldd [%o3+0x58],%f18
faddd %f0,%f18,%f0
st %f0,[%fp+tmp7]
fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (3_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
ba .cont7
for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
1:
sub %l6,stridex,tmp_px
sub counter,4,tmp_counter
ba .cont7
mov 4,counter
.align 16
.update8:
cmp counter,5
ble .cont8
nop
mov %l6,tmp_px
sub counter,5,tmp_counter
ba .cont8
mov 5,counter
.align 16
.update9:
ld [%l6+4],%i3
cmp counter,5
ble .cont9
fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
cmp %g1,0
bl 1f
orcc %g1,%i3,%g0
bz 1f
sethi %hi(0x00080000),%i1
cmp %g1,%i1
bge,a 2f
ldd [%o3+0x50],%f18
fxtod %f8,%f8 ! res = *(long long*)&res;
st %f8,[%fp+tmp7]
fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (4_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
ba .cont9
for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
2:
fand %f8,%f18,%f8
fxtod %f8,%f8 ! res = *(long long*)&res;
ldd [%o3+0x58],%f18
faddd %f8,%f18,%f8
st %f8,[%fp+tmp7]
fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (4_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
ba .cont9
for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
1:
mov %l6,tmp_px
sub counter,5,tmp_counter
ba .cont9
mov 5,counter
.align 16
.update10:
cmp counter,6
ble .cont10
nop
mov %i0,tmp_px
sub counter,6,tmp_counter
ba .cont10
mov 6,counter
.align 16
.update11:
ld [%i0+4],%i3
cmp counter,6
ble .cont11
fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
cmp %g1,0
bl 1f
orcc %g1,%i3,%g0
bz 1f
sethi %hi(0x00080000),%i3
cmp %g1,%i3
bge,a 2f
ldd [%o3+0x50],%f18
fxtod %f0,%f0 ! res = *(long long*)&res;
st %f0,[%fp+tmp7]
fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (5_0) hx >>= 10;
sub %o7,537,%o7
sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
ba .cont11
for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
2:
fand %f0,%f18,%f0
fxtod %f0,%f0 ! res = *(long long*)&res;
ldd [%o3+0x58],%f18
faddd %f0,%f18,%f0
st %f0,[%fp+tmp7]
fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (5_0) hx >>= 10;
sub %o7,537,%o7
sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
ba .cont11
for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
1:
mov %i0,tmp_px
sub counter,6,tmp_counter
ba .cont11
mov 6,counter
.align 16
.update12:
cmp counter,0
ble .cont12
faddd %f34,K3,%f34 ! (2_1) res += K3;
sub %l6,stridex,tmp_px
sub counter,0,tmp_counter
ba .cont12
mov 0,counter
.align 16
.update13:
sub %l6,stridex,%l4
cmp counter,0
ble .cont13
fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
ld [%l4+4],%l4
cmp %g1,0
bl 1f
orcc %g1,%l4,%g0
bz 1f
sethi %hi(0x00080000),%l4
cmp %g1,%l4
bge,a 2f
ldd [%o3+0x50],%f62
fxtod %f6,%f6 ! res = *(long long*)&res;
st %f6,[%fp+tmp7]
fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
sra %g1,10,%o2 ! (6_1) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
ba .cont13
fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
2:
fand %f6,%f62,%f6
fxtod %f6,%f6 ! res = *(long long*)&res;
ldd [%o3+0x58],%f62
faddd %f6,%f62,%f6
st %f6,[%fp+tmp7]
fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
sra %g1,10,%o2 ! (6_1) hx >>= 10;
for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
sub %o7,537,%o7
and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
ba .cont13
fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
1:
sub %l6,stridex,tmp_px
sub counter,0,tmp_counter
ba .cont13
mov 0,counter
.align 16
.update14:
cmp counter,1
ble .cont14
faddd %f34,K3,%f34 ! (3_1) res += K3;
sub %l6,stridex,tmp_px
sub counter,1,tmp_counter
ba .cont14
mov 1,counter
.align 16
.update15:
sub %l6,stridex,%l2
cmp counter,1
ble .cont15
fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
ld [%l2+4],%l2
cmp %g1,0
bl 1f
orcc %g1,%l2,%g0
bz 1f
sethi %hi(0x00080000),%l2
cmp %g1,%l2
bge,a 2f
ldd [%o3+0x50],%f62
fxtod %f0,%f0 ! res = *(long long*)&res;
st %f0,[%fp+tmp7]
fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (0_0) hx >>= 10;
sub %o7,537,%o7
for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
ba .cont15
fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
2:
fand %f0,%f62,%f0
fxtod %f0,%f0 ! res = *(long long*)&res;
ldd [%o3+0x58],%f62
faddd %f0,%f62,%f0
st %f0,[%fp+tmp7]
fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (0_0) hx >>= 10;
for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
sub %o7,537,%o7
sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
ba .cont15
fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
1:
sub %l6,stridex,tmp_px
sub counter,1,tmp_counter
ba .cont15
mov 1,counter
.align 16
.update16:
cmp counter,2
ble .cont16
fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
sub %l6,stridex,tmp_px
sub counter,2,tmp_counter
ba .cont16
mov 2,counter
.align 16
.update17:
sub %l6,stridex,%i2
cmp counter,2
ble .cont17
fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
ld [%i2+4],%i2
cmp %g1,0
bl 1f
orcc %g1,%i2,%g0
bz 1f
sethi %hi(0x00080000),%i2
cmp %g1,%i2
bge,a 2f
ldd [%o3+0x50],%f2
fxtod %f6,%f6 ! res = *(long long*)&res;
st %f6,[%fp+tmp7]
fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (1_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
ba .cont17
for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
2:
fand %f6,%f2,%f6
fxtod %f6,%f6 ! res = *(long long*)&res;
ldd [%o3+0x58],%f2
faddd %f6,%f2,%f6
st %f6,[%fp+tmp7]
fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (1_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
ba .cont17
for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
1:
sub %l6,stridex,tmp_px
sub counter,2,tmp_counter
ba .cont17
mov 2,counter
.align 16
.update18:
cmp counter,3
ble .cont18
fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
sub %l6,stridex,tmp_px
sub counter,3,tmp_counter
ba .cont18
mov 3,counter
.align 16
.update19:
sub %l6,stridex,%i4
cmp counter,3
ble .cont19
fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
ld [%i4+4],%i4
cmp %g1,0
bl 1f
orcc %g1,%i4,%g0
bz 1f
sethi %hi(0x00080000),%i4
cmp %g1,%i4
bge,a 2f
ldd [%o3+0x50],%f2
fxtod %f0,%f0 ! res = *(long long*)&res;
st %f0,[%fp+tmp7]
fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (2_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
ba .cont19
for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
2:
fand %f0,%f2,%f0
fxtod %f0,%f0 ! res = *(long long*)&res;
ldd [%o3+0x58],%f2
faddd %f0,%f2,%f0
st %f0,[%fp+tmp7]
fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (2_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
ba .cont19
for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
1:
sub %l6,stridex,tmp_px
sub counter,3,tmp_counter
ba .cont19
mov 3,counter
.align 16
.update20:
cmp counter,4
ble .cont20
fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3);
sub %l6,stridex,tmp_px
sub counter,4,tmp_counter
ba .cont20
mov 4,counter
.align 16
.update21:
sub %l6,stridex,%i5
cmp counter,4
ble .cont21
fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
ld [%i5+4],%i5
cmp %g1,0
bl 1f
orcc %g1,%i5,%g0
bz 1f
sethi %hi(0x00080000),%i5
cmp %g1,%i5
bge,a 2f
ldd [%o3+0x50],%f34
fxtod %f6,%f6 ! res = *(long long*)&res;
st %f6,[%fp+tmp7]
fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (3_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
ba .cont21
for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
2:
fand %f6,%f34,%f6
fxtod %f6,%f6 ! res = *(long long*)&res;
ldd [%o3+0x58],%f34
faddd %f6,%f34,%f6
st %f6,[%fp+tmp7]
fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (3_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
ba .cont21
for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
1:
sub %l6,stridex,tmp_px
sub counter,4,tmp_counter
ba .cont21
mov 4,counter
.align 16
.update22:
cmp counter,5
ble .cont22
fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
sub %i0,stridex,tmp_px
sub counter,5,tmp_counter
ba .cont22
mov 5,counter
.align 16
.update23:
sub %i0,stridex,%l1
cmp counter,5
ble .cont23
fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
ld [%l1+4],%l1
cmp %g1,0
bl 1f
orcc %g1,%l1,%g0
bz 1f
sethi %hi(0x00080000),%l1
cmp %g1,%l1
bge,a 2f
ldd [%o3+0x50],%f34
fxtod %f0,%f0 ! res = *(long long*)&res;
st %f0,[%fp+tmp7]
fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (4_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
ba .cont23
for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
2:
fand %f0,%f34,%f0
fxtod %f0,%f0 ! res = *(long long*)&res;
ldd [%o3+0x58],%f34
faddd %f0,%f34,%f0
st %f0,[%fp+tmp7]
fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (4_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
ba .cont23
for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
1:
sub %i0,stridex,tmp_px
sub counter,5,tmp_counter
ba .cont23
mov 5,counter
.align 16
.update24:
cmp counter,6
ble .cont24
fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
sub %i1,stridex,tmp_px
sub counter,6,tmp_counter
ba .cont24
mov 6,counter
.align 16
.update25:
sub %i1,stridex,%i3
cmp counter,6
ble .cont25
fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
ld [%i3+4],%i3
cmp %g1,0
bl 1f
orcc %g1,%i3,%g0
bz 1f
nop
sub %i1,stridex,%i3
ld [%i3],%f10
ld [%i3+4],%f11
sethi %hi(0x00080000),%i3
cmp %g1,%i3
bge,a 2f
ldd [%o3+0x50],%f60
fxtod %f10,%f10 ! res = *(long long*)&res;
st %f10,[%fp+tmp7]
fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (5_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
ba .cont25
for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
2:
fand %f10,%f60,%f10
fxtod %f10,%f10 ! res = *(long long*)&res;
ldd [%o3+0x58],%f60
faddd %f10,%f60,%f10
st %f10,[%fp+tmp7]
fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
ld [%fp+tmp7],%g1
sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
sra %g1,10,%o2 ! (5_0) hx >>= 10;
sub %o7,537,%o7
and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
ba .cont25
for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
1:
sub %i1,stridex,tmp_px
sub counter,6,tmp_counter
ba .cont25
mov 6,counter
.exit:
ret
restore
SET_SIZE(__vrsqrt)