/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
.file "__vhypotf.S"
#include "libm.h"
RO_DATA
.align 64
.CONST_TBL:
.word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01
.word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01
.word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff
.word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000
.word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000
.word 0x7fe00000, 0x00000000 ! DA0 = 0x7fe0000000000000
.word 0x47efffff, 0xe0000000 ! DFMAX = 3.402823e+38
.word 0x7f7fffff, 0x80808080 ! FMAX = 3.402823e+38 , SCALE = 0x80808080
.word 0x20000000, 0x00000000 ! DA1 = 0x2000000000000000
#define DC0 %f12
#define DC1 %f10
#define DC2 %f42
#define DA0 %f6
#define DA1 %f4
#define K2 %f26
#define K1 %f28
#define SCALE %f3
#define FMAX %f2
#define DFMAX %f50
#define stridex %l6
#define stridey %i4
#define stridez %l5
#define _0x7fffffff %o1
#define _0x7f3504f3 %o2
#define _0x1ff0 %l2
#define TBL %l1
#define counter %l0
#define tmp_px STACK_BIAS-0x30
#define tmp_py STACK_BIAS-0x28
#define tmp_counter STACK_BIAS-0x20
#define tmp0 STACK_BIAS-0x18
#define tmp1 STACK_BIAS-0x10
#define tmp2 STACK_BIAS-0x0c
#define tmp3 STACK_BIAS-0x08
#define tmp4 STACK_BIAS-0x04
! sizeof temp storage - must be a multiple of 16 for V9
#define tmps 0x30
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! !!!!! algorithm !!!!!
! hx0 = *(int*)px;
! x0 = *px;
! px += stridex;
!
! hy0 = *(int*)py;
! y0 = *py;
! py += stridey;
!
! hx0 &= 0x7fffffff;
! hy0 &= 0x7fffffff;
!
! if ( hx >= 0x7f3504f3 || hy >= 0x7f3504f3 )
! {
! if ( hx >= 0x7f800000 || hy >= 0x7f800000 )
! {
! if ( hx == 0x7f800000 || hy == 0x7f800000 )
! *(int*)pz = 0x7f800000;
! else *pz = x * y;
! }
! else
! {
! hyp = sqrt(x * (double)x + y * (double)y);
! if ( hyp <= DMAX ) ftmp0 = (float)hyp;
! else ftmp0 = FMAX * FMAX;
! *pz = ftmp0;
! }
! pz += stridez;
! continue;
! }
! if ( (hx | hy) == 0 )
! {
! *pz = 0;
! pz += stridez;
! continue;
! }
! dx0 = x0 * (double)x0;
! dy0 = y0 * (double)y0;
! db0 = dx0 + dy0;
!
! iexp0 = ((int*)&db0)[0];
!
! h0 = vis_fand(db0,DC0);
! h0 = vis_for(h0,DC1);
! h_hi0 = vis_fand(h0,DC2);
!
! db0 = vis_fand(db0,DA0);
! db0 = vis_fmul8x16(SCALE, db0);
! db0 = vis_fpadd32(db0,DA1);
!
! iexp0 >>= 8;
! di0 = iexp0 & 0x1ff0;
! si0 = (char*)sqrt_arr + di0;
!
! dtmp0 = ((double*)((char*)div_arr + di0))[0];
! xx0 = h0 - h_hi0;
! xx0 *= dmp0;
!
! dtmp0 = ((double*)si0)[1];
! res0 = K2 * xx0;
! res0 += K1;
! res0 *= xx0;
! res0 += DC1;
! res0 = dtmp0 * res0;
! res0 *= db0;
! ftmp0 = (float)res0;
! *pz = ftmp0;
! pz += stridez;
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
ENTRY(__vhypotf)
save %sp,-SA(MINFRAME)-tmps,%sp
PIC_SETUP(l7)
PIC_SET(l7,.CONST_TBL,o3)
PIC_SET(l7,__vlibm_TBL_sqrtf,l1)
#ifdef __sparcv9
ldx [%fp+STACK_BIAS+176],stridez
#else
ld [%fp+STACK_BIAS+92],stridez
#endif
st %i0,[%fp+tmp_counter]
stx %i1,[%fp+tmp_px]
stx %i3,[%fp+tmp_py]
ldd [%o3],K1
sethi %hi(0x7ffffc00),%o1
ldd [%o3+8],K2
sethi %hi(0x7f350400),%o2
ldd [%o3+16],DC0
add %o1,1023,_0x7fffffff
add %o2,0xf3,_0x7f3504f3
ldd [%o3+24],DC1
sll %i2,2,stridex
ld [%o3+56],FMAX
ldd [%o3+32],DC2
sll %i4,2,stridey
ldd [%o3+40],DA0
sll stridez,2,stridez
ldd [%o3+48],DFMAX
ld [%o3+60],SCALE
or %g0,0xff8,%l2
ldd [%o3+64],DA1
sll %l2,1,_0x1ff0
or %g0,%i5,%l7
.begin:
ld [%fp+tmp_counter],counter
ldx [%fp+tmp_px],%i1
ldx [%fp+tmp_py],%i2
st %g0,[%fp+tmp_counter]
.begin1:
cmp counter,0
ble,pn %icc,.exit
lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px;
lda [%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py;
lda [%i1]0x82,%f17 ! (3_0) x0 = *px;
and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff;
cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3
bge,pn %icc,.spec ! (3_0) if ( hx >= 0x7f3504f3 )
and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff;
cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3
bge,pn %icc,.spec ! (3_0) if ( hy >= 0x7f3504f3 )
or %g0,%i2,%o7
orcc %l3,%l4,%g0
bz,pn %icc,.spec1
add %i1,stridex,%i1 ! px += stridex
fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0;
lda [%i2]0x82,%f17 ! (3_0) y0 = *py;
lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px;
lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py;
and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff;
fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0;
cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3
bge,pn %icc,.update0 ! (4_0) if ( hx >= 0x7f3504f3 )
and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
orcc %l3,%l4,%g0
bz,pn %icc,.update0
lda [%i1]0x82,%f17 ! (4_0) x0 = *px;
.cont0:
faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0;
fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0;
cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3
lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py;
add %o7,stridey,%i5 ! py += stridey
lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px;
bge,pn %icc,.update1 ! (4_1) if ( hy >= 0x7f3504f3 )
st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0];
.cont1:
and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff;
fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0;
lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px;
add %i1,stridex,%i1 ! px += stridex
lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py;
cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3
bge,pn %icc,.update2 ! (0_0) if ( hx >= 0x7f3504f3 )
add %i5,stridey,%o4 ! py += stridey
.cont2:
faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0;
fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0;
and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff;
lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py;
cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3
bge,pn %icc,.update3 ! (0_0) if ( hy >= 0x7f3504f3 )
st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0];
orcc %l3,%l4,%g0
bz,pn %icc,.update3
.cont3:
lda [%i1+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px;
fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0);
and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff;
fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0;
cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3
lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py;
add %i1,stridex,%i1 ! px += stridex
lda [%i1]0x82,%f17 ! (1_0) x0 = *px;
bge,pn %icc,.update4 ! (1_0) if ( hx >= 0x7f3504f3 )
add %o4,stridey,%i5 ! py += stridey
.cont4:
and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff;
for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1);
cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3
ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0];
faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0;
fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0;
add %i1,stridex,%i1 ! px += stridex
lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py;
srax %o0,8,%o0 ! (3_1) iexp0 >>= 8;
bge,pn %icc,.update5 ! (1_0) if ( hy >= 0x7f3504f3 )
fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2);
orcc %l3,%l4,%g0
bz,pn %icc,.update5
.cont5:
lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px;
and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0;
st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0];
fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0);
ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0;
fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0;
add %i5,stridey,%i2 ! py += stridey
lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py;
and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff;
lda [%i1]0x82,%f17 ! (2_0) x0 = *px;
cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3
fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0;
and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff;
for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1);
bge,pn %icc,.update6 ! (2_0) if ( hx >= 0x7f3504f3 )
ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0];
.cont6:
faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0;
fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0;
cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3
lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py;
add %i1,stridex,%i1 ! px += stridex
bge,pn %icc,.update7 ! (2_0) if ( hy >= 0x7f3504f3 )
fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2);
orcc %l3,%l4,%g0
bz,pn %icc,.update7
nop
.cont7:
fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0;
srax %o3,8,%o3 ! (4_1) iexp0 >>= 8;
lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px;
and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0;
st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0];
fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0);
ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
add %i2,stridey,%o7 ! py += stridey
fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0;
fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0;
lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py;
and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff;
faddd %f56,K1,%f54 ! (3_1) res0 += K1;
cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3
lda [%i1]0x82,%f17 ! (3_0) x0 = *px;
add %i1,stridex,%i1 ! px += stridex
bge,pn %icc,.update8 ! (3_0) if ( hx >= 0x7f3504f3 )
fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0;
.cont8:
and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff;
for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1);
cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3
ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0];
faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0;
fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0;
bge,pn %icc,.update9 ! (3_0) if ( hy >= 0x7f3504f3 )
lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py;
orcc %l3,%l4,%g0
bz,pn %icc,.update9
nop
.cont9:
fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0;
lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px;
fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2);
fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0;
srax %g1,8,%o5 ! (0_0) iexp0 >>= 8;
lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py;
fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0);
and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0;
st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0];
fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0);
ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0];
add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0;
and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff;
fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0;
fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0;
cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3
bge,pn %icc,.update10 ! (4_0) if ( hx >= 0x7f3504f3 )
faddd %f40,DC1,%f40 ! (3_1) res0 += DC1;
fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0);
and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1];
faddd %f54,K1,%f54 ! (4_1) res0 += K1;
lda [%i1]0x82,%f17 ! (4_0) x0 = *px;
.cont10:
fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0;
cmp counter,5
for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1);
ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0];
fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0;
faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0;
bl,pn %icc,.tail
nop
ba .main_loop
sub counter,5,counter
.align 16
.main_loop:
fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0;
cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3
lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py;
fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1);
fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0;
add %o7,stridey,%i5 ! py += stridey
st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0];
fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2);
fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0;
srax %g1,8,%g5 ! (1_1) iexp0 >>= 8;
bge,pn %icc,.update11 ! (4_1) if ( hy >= 0x7f3504f3 )
fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0);
orcc %l3,%l4,%g0
nop
bz,pn %icc,.update11
fzero %f52
.cont11:
fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0;
and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0;
lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px;
fand %f30,DC0,%f60 ! (2_1) h0 = vis_fand(db0,DC0);
ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0;
add %i1,stridex,%i0 ! px += stridex
fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0;
fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0;
nop
lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px;
faddd %f58,DC1,%f36 ! (4_2) res0 += DC1;
faddd %f56,K1,%f58 ! (0_1) res0 += K1;
and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff;
ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1];
fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0);
lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py;
cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3
bge,pn %icc,.update12 ! (0_0) if ( hx >= 0x7f3504f3 )
fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0;
.cont12:
fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0;
add %l7,stridez,%o7 ! pz += stridez
st %f14,[%l7] ! (3_2) *pz = ftmp0;
for %f60,DC1,%f46 ! (2_1) h0 = vis_for(h0,DC1);
fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0;
add %i5,stridey,%o4 ! py += stridey
ld [%fp+tmp4],%g1 ! (2_1) iexp0 = ((int*)&db0)[0];
faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0;
fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0;
and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff;
lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py;
fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1);
fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0;
cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3
st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0];
fand %f46,DC2,%f58 ! (2_1) h_hi0 = vis_fand(h0,DC2);
fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0;
srax %g1,8,%g1 ! (2_1) iexp0 >>= 8;
bge,pn %icc,.update13 ! (0_0) if ( hy >= 0x7f3504f3 )
fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0);
orcc %l3,%l4,%g0
nop
bz,pn %icc,.update13
fzero %f52
.cont13:
fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0;
and %g1,_0x1ff0,%g1 ! (2_1) di0 = iexp0 & 0x1ff0;
lda [%i0+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px;
fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0);
ldd [TBL+%g1],%f22 ! (2_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0;
add %i0,stridex,%i1 ! px += stridex
fsubd %f46,%f58,%f58 ! (2_1) xx0 = h0 - h_hi0;
fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0;
add %o7,stridez,%i0 ! pz += stridez
lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py;
faddd %f38,DC1,%f36 ! (0_1) res0 += DC1;
faddd %f56,K1,%f38 ! (1_1) res0 += K1;
and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff;
ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1];
fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0);
lda [%i1]0x82,%f17 ! (1_0) x0 = *px;
cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3
bge,pn %icc,.update14 ! (1_0) if ( hx >= 0x7f3504f3 )
fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0;
.cont14:
fmuld %f58,%f22,%f58 ! (2_1) xx0 *= dmp0;
and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff;
add %o4,stridey,%i5 ! py += stridey
for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1);
fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0;
cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3
ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0];
faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0;
fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0;
add %i1,stridex,%i1 ! px += stridex
lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py;
fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1);
fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0;
st %f14,[%o7] ! (4_2) *pz = ftmp0;
bge,pn %icc,.update15 ! (1_0) if ( hy >= 0x7f3504f3 )
fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2);
orcc %l3,%l4,%g0
bz,pn %icc,.update15
nop
.cont15:
fmuld K2,%f58,%f54 ! (2_1) res0 = K2 * xx0;
srax %o0,8,%o0 ! (3_1) iexp0 >>= 8;
st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0];
fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0);
fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0;
and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0;
lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px;
fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0);
ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0;
add %i0,stridez,%i3 ! pz += stridez
fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0;
fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0;
add %i5,stridey,%i2 ! py += stridey
lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py;
faddd %f44,DC1,%f44 ! (1_1) res0 += DC1;
fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0);
and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff;
ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1];
faddd %f54,K1,%f54 ! (2_1) res0 += K1;
lda [%i1]0x82,%f17 ! (2_0) x0 = *px;
cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3
add %i3,stridez,%o4 ! pz += stridez
fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0;
fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0;
and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff;
st %f14,[%i0] ! (0_1) *pz = ftmp0;
for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1);
fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0;
bge,pn %icc,.update16 ! (2_0) if ( hx >= 0x7f3504f3 )
ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0];
faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0;
.cont16:
fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0;
cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3
lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py;
fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1);
fmuld %f54,%f58,%f54 ! (2_1) res0 *= xx0;
add %i1,stridex,%l7 ! px += stridex
bge,pn %icc,.update17 ! (2_0) if ( hy >= 0x7f3504f3 )
fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2);
orcc %l3,%l4,%g0
nop
bz,pn %icc,.update17
fzero %f52
.cont17:
fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0;
srax %o3,8,%o3 ! (4_1) iexp0 >>= 8;
st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0];
fand %f30,DA0,%f40 ! (2_1) db0 = vis_fand(db0,DA0);
fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0;
and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0;
lda [%l7]0x82,%l3 ! (3_0) hx0 = *(int*)px;
fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0);
ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
add %g1,TBL,%g1 ! (2_1) si0 = (char*)sqrt_arr + di0;
add %i2,stridey,%o7 ! py += stridey
fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0;
fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0;
lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py;
add %l7,stridex,%i1 ! px += stridex
faddd %f54,DC1,%f36 ! (2_1) res0 += DC1;
faddd %f56,K1,%f54 ! (3_1) res0 += K1;
and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff;
ldd [%g1+8],%f56 ! (2_1) dtmp0 = ((double*)si0)[1];
fmul8x16 SCALE,%f40,%f40 ! (2_1) db0 = vis_fmul8x16(SCALE, db0);
lda [%l7]0x82,%f17 ! (3_0) x0 = *px;
cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3
bge,pn %icc,.update18 ! (3_0) if ( hx >= 0x7f3504f3 )
fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0;
.cont18:
fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0;
and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff;
st %f14,[%i3] ! (1_1) *pz = ftmp0;
for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1);
fmuld %f56,%f36,%f36 ! (2_1) res0 = dtmp0 * res0;
cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3
ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0];
faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0;
fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0;
bge,pn %icc,.update19 ! (3_0) if ( hy >= 0x7f3504f3 )
lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py;
fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1);
.cont19:
fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0;
orcc %l3,%l4,%g0
st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0];
fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2);
fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0;
srax %g1,8,%o5 ! (0_0) iexp0 >>= 8;
lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px;
fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0);
fmuld %f36,%f62,%f62 ! (2_1) res0 *= db0;
and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0;
bz,pn %icc,.update19a
fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0);
.cont19a:
ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0];
add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0;
and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff;
fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0;
fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0;
cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3
lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py;
faddd %f40,DC1,%f40 ! (3_1) res0 += DC1;
fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0);
bge,pn %icc,.update20 ! (4_0) if ( hx >= 0x7f3504f3 )
ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1];
faddd %f54,K1,%f54 ! (4_1) res0 += K1;
lda [%i1]0x82,%f17 ! (4_0) x0 = *px;
.cont20:
subcc counter,5,counter ! counter -= 5
add %o4,stridez,%l7 ! pz += stridez
fdtos %f62,%f14 ! (2_1) ftmp0 = (float)res0;
fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0;
and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
st %f14,[%o4] ! (2_1) *pz = ftmp0;
for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1);
ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0];
fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0;
bpos,pt %icc,.main_loop
faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0;
add counter,5,counter
.tail:
subcc counter,1,counter
bneg .begin
nop
fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1);
fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0;
fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2);
fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0;
srax %g1,8,%g5 ! (1_1) iexp0 >>= 8;
fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0);
fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0;
and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0;
ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0;
fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0;
faddd %f58,DC1,%f36 ! (4_2) res0 += DC1;
faddd %f56,K1,%f58 ! (0_1) res0 += K1;
ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1];
fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0);
fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0;
fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0;
add %l7,stridez,%o7 ! pz += stridez
st %f14,[%l7] ! (3_2) *pz = ftmp0;
subcc counter,1,counter
bneg .begin
or %g0,%o7,%l7
fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0;
fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1);
fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0;
fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0;
fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0);
fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0;
add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0;
faddd %f38,DC1,%f36 ! (0_1) res0 += DC1;
faddd %f56,K1,%f38 ! (1_1) res0 += K1;
ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1];
fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0);
add %o7,stridez,%i0 ! pz += stridez
fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0;
fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0;
fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1);
fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0;
add %i0,stridez,%i3 ! pz += stridez
st %f14,[%o7] ! (4_2) *pz = ftmp0;
subcc counter,1,counter
bneg .begin
or %g0,%i0,%l7
fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0);
fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0;
add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0;
faddd %f44,DC1,%f44 ! (1_1) res0 += DC1;
fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0);
ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1];
add %i3,stridez,%o4 ! pz += stridez
fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0;
st %f14,[%i0] ! (0_1) *pz = ftmp0;
subcc counter,1,counter
bneg .begin
or %g0,%i3,%l7
fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0;
fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1);
fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0;
fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0;
st %f14,[%i3] ! (1_1) *pz = ftmp0;
ba .begin
or %g0,%o4,%l7
.align 16
.spec1:
st %g0,[%l7] ! *pz = 0;
add %l7,stridez,%l7 ! pz += stridez
add %i2,stridey,%i2 ! py += stridey
ba .begin1
sub counter,1,counter ! counter--
.align 16
.spec:
sethi %hi(0x7f800000),%i0
cmp %l3,%i0 ! hx ? 0x7f800000
bge,pt %icc,2f ! if ( hx >= 0x7f800000 )
ld [%i2],%f8
cmp %l4,%i0 ! hy ? 0x7f800000
bge,pt %icc,2f ! if ( hy >= 0x7f800000 )
nop
fsmuld %f17,%f17,%f44 ! x * (double)x
fsmuld %f8,%f8,%f24 ! y * (double)y
faddd %f44,%f24,%f24 ! x * (double)x + y * (double)y
fsqrtd %f24,%f24 ! hyp = sqrt(x * (double)x + y * (double)y);
fcmped %f24,DFMAX ! hyp ? DMAX
fbug,a 1f ! if ( hyp > DMAX )
fmuls FMAX,FMAX,%f20 ! ftmp0 = FMAX * FMAX;
fdtos %f24,%f20 ! ftmp0 = (float)hyp;
1:
st %f20,[%l7] ! *pz = ftmp0;
add %l7,stridez,%l7 ! pz += stridez
add %i1,stridex,%i1 ! px += stridex
add %i2,stridey,%i2 ! py += stridey
ba .begin1
sub counter,1,counter ! counter--
2:
fcmps %f17,%f8 ! exceptions
cmp %l3,%i0 ! hx ? 0x7f800000
be,a %icc,1f ! if ( hx == 0x7f800000 )
st %i0,[%l7] ! *(int*)pz = 0x7f800000;
cmp %l4,%i0 ! hy ? 0x7f800000
be,a %icc,1f ! if ( hy == 0x7f800000
st %i0,[%l7] ! *(int*)pz = 0x7f800000;
fmuls %f17,%f8,%f8 ! x * y
st %f8,[%l7] ! *pz = x * y;
1:
add %l7,stridez,%l7 ! pz += stridez
add %i1,stridex,%i1 ! px += stridex
add %i2,stridey,%i2 ! py += stridey
ba .begin1
sub counter,1,counter ! counter--
.align 16
.update0:
cmp counter,1
ble .cont0
fzeros %f17
stx %i1,[%fp+tmp_px]
add %o7,stridey,%i5
stx %i5,[%fp+tmp_py]
sub counter,1,counter
st counter,[%fp+tmp_counter]
ba .cont0
or %g0,1,counter
.align 16
.update1:
cmp counter,1
ble .cont1
fzeros %f17
stx %i1,[%fp+tmp_px]
stx %i5,[%fp+tmp_py]
sub counter,1,counter
st counter,[%fp+tmp_counter]
ba .cont1
or %g0,1,counter
.align 16
.update2:
cmp counter,2
ble .cont2
fzeros %f8
stx %i1,[%fp+tmp_px]
stx %o4,[%fp+tmp_py]
sub counter,2,counter
st counter,[%fp+tmp_counter]
ba .cont2
or %g0,2,counter
.align 16
.update3:
cmp counter,2
ble .cont3
fzeros %f17
stx %i1,[%fp+tmp_px]
stx %o4,[%fp+tmp_py]
sub counter,2,counter
st counter,[%fp+tmp_counter]
ba .cont3
or %g0,2,counter
.align 16
.update4:
cmp counter,3
ble .cont4
fzeros %f17
stx %i1,[%fp+tmp_px]
stx %i5,[%fp+tmp_py]
sub counter,3,counter
st counter,[%fp+tmp_counter]
ba .cont4
or %g0,3,counter
.align 16
.update5:
cmp counter,3
ble .cont5
fzeros %f17
sub %i1,stridex,%i2
stx %i2,[%fp+tmp_px]
stx %i5,[%fp+tmp_py]
sub counter,3,counter
st counter,[%fp+tmp_counter]
ba .cont5
or %g0,3,counter
.align 16
.update6:
cmp counter,4
ble .cont6
fzeros %f17
stx %i1,[%fp+tmp_px]
stx %i2,[%fp+tmp_py]
sub counter,4,counter
st counter,[%fp+tmp_counter]
ba .cont6
or %g0,4,counter
.align 16
.update7:
cmp counter,4
ble .cont7
fzeros %f17
sub %i1,stridex,%o7
stx %o7,[%fp+tmp_px]
stx %i2,[%fp+tmp_py]
sub counter,4,counter
st counter,[%fp+tmp_counter]
ba .cont7
or %g0,4,counter
.align 16
.update8:
cmp counter,5
ble .cont8
fzeros %f17
sub %i1,stridex,%o5
stx %o5,[%fp+tmp_px]
stx %o7,[%fp+tmp_py]
sub counter,5,counter
st counter,[%fp+tmp_counter]
ba .cont8
or %g0,5,counter
.align 16
.update9:
cmp counter,5
ble .cont9
fzeros %f17
sub %i1,stridex,%o5
stx %o5,[%fp+tmp_px]
stx %o7,[%fp+tmp_py]
sub counter,5,counter
st counter,[%fp+tmp_counter]
ba .cont9
or %g0,5,counter
.align 16
.update10:
fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0);
and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1];
faddd %f54,K1,%f54 ! (4_1) res0 += K1;
cmp counter,6
ble .cont10
fzeros %f17
stx %i1,[%fp+tmp_px]
add %o7,stridey,%i5
stx %i5,[%fp+tmp_py]
sub counter,6,counter
st counter,[%fp+tmp_counter]
ba .cont10
or %g0,6,counter
.align 16
.update11:
cmp counter,1
ble .cont11
fzeros %f17
stx %i1,[%fp+tmp_px]
stx %i5,[%fp+tmp_py]
sub counter,1,counter
st counter,[%fp+tmp_counter]
ba .cont11
or %g0,1,counter
.align 16
.update12:
cmp counter,2
ble .cont12
fzeros %f8
stx %i0,[%fp+tmp_px]
add %i5,stridey,%o4
stx %o4,[%fp+tmp_py]
sub counter,2,counter
st counter,[%fp+tmp_counter]
ba .cont12
or %g0,2,counter
.align 16
.update13:
cmp counter,2
ble .cont13
fzeros %f17
stx %i0,[%fp+tmp_px]
stx %o4,[%fp+tmp_py]
sub counter,2,counter
st counter,[%fp+tmp_counter]
ba .cont13
or %g0,2,counter
.align 16
.update14:
cmp counter,3
ble .cont14
fzeros %f17
stx %i1,[%fp+tmp_px]
add %o4,stridey,%i5
stx %i5,[%fp+tmp_py]
sub counter,3,counter
st counter,[%fp+tmp_counter]
ba .cont14
or %g0,3,counter
.align 16
.update15:
cmp counter,3
ble .cont15
fzeros %f17
sub %i1,stridex,%i2
stx %i2,[%fp+tmp_px]
stx %i5,[%fp+tmp_py]
sub counter,3,counter
st counter,[%fp+tmp_counter]
ba .cont15
or %g0,3,counter
.align 16
.update16:
faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0;
cmp counter,4
ble .cont16
fzeros %f17
stx %i1,[%fp+tmp_px]
stx %i2,[%fp+tmp_py]
sub counter,4,counter
st counter,[%fp+tmp_counter]
ba .cont16
or %g0,4,counter
.align 16
.update17:
cmp counter,4
ble .cont17
fzeros %f17
stx %i1,[%fp+tmp_px]
stx %i2,[%fp+tmp_py]
sub counter,4,counter
st counter,[%fp+tmp_counter]
ba .cont17
or %g0,4,counter
.align 16
.update18:
cmp counter,5
ble .cont18
fzeros %f17
stx %l7,[%fp+tmp_px]
stx %o7,[%fp+tmp_py]
sub counter,5,counter
st counter,[%fp+tmp_counter]
ba .cont18
or %g0,5,counter
.align 16
.update19:
fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1);
cmp counter,5
ble .cont19
fzeros %f17
stx %l7,[%fp+tmp_px]
stx %o7,[%fp+tmp_py]
sub counter,5,counter
st counter,[%fp+tmp_counter]
ba .cont19
or %g0,5,counter
.align 16
.update19a:
cmp counter,5
ble .cont19a
fzeros %f17
stx %l7,[%fp+tmp_px]
stx %o7,[%fp+tmp_py]
sub counter,5,counter
st counter,[%fp+tmp_counter]
ba .cont19a
or %g0,5,counter
.align 16
.update20:
faddd %f54,K1,%f54 ! (4_1) res0 += K1;
cmp counter,6
ble .cont20
fzeros %f17
stx %i1,[%fp+tmp_px]
add %o7,stridey,%g1
stx %g1,[%fp+tmp_py]
sub counter,6,counter
st counter,[%fp+tmp_counter]
ba .cont20
or %g0,6,counter
.exit:
ret
restore
SET_SIZE(__vhypotf)