25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER START
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * The contents of this file are subject to the terms of the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Common Development and Distribution License (the "License").
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You may not use this file except in compliance with the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * See the License for the specific language governing permissions
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * and limitations under the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER END
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Use is subject to license terms.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .file "__vatan.S"
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#include "libm.h"
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis RO_DATA
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! following is the C version of the ATAN algorithm
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! #include <math.h>
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! #include <stdio.h>
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! double jkatan(double *x)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! double f, z, ans, ansu, ansl, tmp, poly, conup, conlo, dummy;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! int index, sign, intf, intz;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! extern const double __vlibm_TBL_atan1[];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! long *pf = (long *) &f, *pz = (long *) &z;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis!
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! * Error = -3.08254E-18 On the interval |x| < 1/64 */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis!
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! /* define dummy names for readability. Use parray to help compiler optimize loads */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! #define p3 parray[0]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! #define p2 parray[1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! #define p1 parray[2]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! #define soffset 3
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis!
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! static const double parray[] = {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! -1.428029046844299722E-01, /* p[3] */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! 1.999999917247000615E-01, /* p[2] */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! -3.333333333329292858E-01, /* p[1] */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! 1.0, /* not used for p[0], though */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! -1.0, /* used to flip sign of answer */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! };
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis!
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! f = *x; /* fetch argument */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! intf = pf[0]; /* grab upper half */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! sign = intf & 0x80000000; /* sign of argument */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! intf ^= sign; /* abs(upper argument) */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! sign = (unsigned) sign >> 31; /* sign bit = 0 or 1 */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! pf[0] = intf;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis!
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! if( (intf > 0x7ff00000) ||
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! ((intf == 0x7ff00000) && (pf[1] !=0)) ) return (*x-*x);/* return NaN if x=NaN*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! if( intf < 0x3e300000 ) /* avoid underflow for small arg */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! dummy = 1.0e37 + f;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! dummy = dummy;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! return (*x);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! if( intf > 0x43600000 ) /* avoid underflow for big arg */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! index = 2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! f = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! f = parray[soffset + sign] * f; /* put sign bit on ans */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! return (f);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis!
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! index = 0; /* points to 0,0 in table */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! if (intf > 0x40500000) /* if(|x| > 64 */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! { f = -1.0/f;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! index = 2; /* point to pi/2 upper, lower */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! pz[0] = intz; /* store as a double (z) */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! pz[1] = 0; /* ...lower */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! f = (f - z)/(1.0 + f*z); /* get reduced argument */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! index += 4; /* skip over 0,0,pi/2,pi/2 */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! conup = __vlibm_TBL_atan1[index]; /* upper table */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! conlo = __vlibm_TBL_atan1[index+1]; /* lower table */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! tmp = f*f;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! poly = (f*tmp)*((p3*tmp + p2)*tmp + p1);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! ansu = conup + f; /* compute atan(f) upper */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! ansl = (((conup - ansu) + f) + poly) + conlo;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! ans = ansu + ansl;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! ans = parray[soffset + sign] * ans;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! return ans;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* 8 bytes = 1 double f.p. word */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#define WSIZE 8
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .align 32 !align with full D-cache line
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis.COEFFS:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .double 0r-1.428029046844299722E-01 !p[3]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .double 0r1.999999917247000615E-01 !p[2]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .double 0r-3.333333333329292858E-01 !p[1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .double 0r-1.0, !constant -1.0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x00008000,0x0 !for fp rounding of reduced arg
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x7fff0000,0x0 !for fp truncation
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x47900000,0 !a number close to 1.0E37
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x80000000,0x0 !mask for fp sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x3f800000,0x0 !1.0/128.0 dummy "safe" argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .type .COEFFS,#object
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ENTRY(__vatan)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis save %sp,-SA(MINFRAME)-16,%sp
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis PIC_SETUP(g5)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis PIC_SET(g5,__vlibm_TBL_atan1,o4)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis PIC_SET(g5,.COEFFS,o0)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis __vatan(int n, double *x, int stridex, double *y, stridey)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis computes y(i) = atan( x(i) ), for 1=1,n. Stridex, stridey
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis are the distance between x and y elements
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %i0 n
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %i1 address of x
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %i2 stride x
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %i3 address of y
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %i4 stride y
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %i0,0 !if n <=0,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ble,pn %icc,.RETURN !....then do nothing
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sll %i2,3,%i2 !convert stride to byte count
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sll %i4,3,%i4 !convert stride to byte count
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* pre-load constants before beginning main loop */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o0],%f58 !load p[3]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 2,%i5 !argcount = 3
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o0+WSIZE],%f60 !load p[2]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %fp,STACK_BIAS-8,%l1 !yaddr1 = &dummy
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fzero %f18 !ansu1 = 0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o0+2*WSIZE],%f62 !load p[1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %fp,STACK_BIAS-8,%l2 !yaddr2 = &dummy
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fzero %f12 !(poly1) = 0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o0+3*WSIZE],%f56 !-1.0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fzero %f14 !tmp1 = 0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o0+4*WSIZE],%f52 !load rounding mask
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fzero %f16 !conup1 = 0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o0+5*WSIZE],%f54 !load truncation mask
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fzero %f36 !f1 = 0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o0+6*WSIZE],%f50 !1.0e37
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fzero %f38 !f2 = 0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o0+7*WSIZE],%f32 !mask for sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o4+2*WSIZE],%f46 !pi/2 upper
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o4+(2*WSIZE+8)],%f48 !pi/2 lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x40500000),%l6 !64.0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x3f900000),%l7 !1/64.0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 0,%l4 !index1 = 0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 0,%l5 !index2 = 0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis.MAINLOOP:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis.LOOP0:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis deccc %i0 !--n
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bneg 1f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov %i1,%o5 !xuse = x (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba 2f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop !delay slot
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis1:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis PIC_SET(g5,.COEFFS+8*WSIZE,o5)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dec %i5 !argcount--
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis2:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x80000000),%o7 !mask for sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*2 */ sethi %hi(0x43600000),%o1 !big = 0x43600000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ld [%o5],%o0 !intf = pf[0] = f upper
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o4+%l5],%f26 !conup2 = __vlibm_TBL_atan1[index2]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*4 */ andn %o0,%o7,%o0 !intf = fabs(intf)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o5],%f34 !f = *x into f34
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sub %o1,%o0,%o1 !(-) if intf > big
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*6 */ sub %o0,%o2,%o2 !(-) if intf < small
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fand %f34,%f32,%f40 !sign0 = sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f38,%f38,%f24 !tmp2= f2*f2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*7 */ orcc %o1,%o2,%g0 !(-) if either true
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bneg,pn %icc,.SPECIAL0 !if (-) goto special cases below
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fabsd %f34,%f34 !abs(f) (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis !----------------------
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x8000),%o7 !rounding bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*8 */ fpadd32 %f34,%f52,%f0 !intf + 0x00008000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f26,%f38,%f28 !ansu2 = conup2 + f2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*9*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f58,%f24,%f22 !p[3]*tmp2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*10 */ sethi %hi(0x7fff0000),%o7 !mask for rounding argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f34,%f0,%f10 !f*z
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fsubd %f34,%f0,%f20 !f - z
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %o4,%l4,%l4 !base addr + index1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f14,%f12,%f12 !poly1 = (f1*tmp1)*((p3*tmp1 + p2)*tmp1 + p1)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f16,%f36,%f16 !(conup1 - ansu1) + f1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*12 */ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f22,%f60,%f22 !p[3]*tmp2 + p[2]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%l4+WSIZE],%f14 !conlo1 = __vlibm_TBL_atan1[index+1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*13 */ sub %o0,%l7,%o2 !intz - 0x3f900000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fsubd %f10,%f56,%f10 !(f*z - (-1.0))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f16,%f12,%f12 !((conup1 - ansu1) + f1) + poly1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%l6 !(|f| > 64)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ble .ELSE0 !if(|f| > 64) then
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*15 */ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba .ENDIF0 !continue
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*16 */ fdivd %f56,%f34,%f34 !f = -1.0/f (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .ELSE0: !else f( |x| >= (1/64))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%l7 !if intf >= 1/64
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bl .ENDIF0 !if( |x| >= (1/64) ) then...
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %o3,4,%o1 !index = index + 4
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*16 */ fdivd %f20,%f10,%f34 !f = (f - z)/(1.0 + f*z), reduced argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .ENDIF0:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*17*/ sll %o1,3,%l3 !index0 = index
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov %i3,%l0 !yaddr0 = address of y
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f12,%f14,%f12 !ansl1 = (((conup1 - ansu)1 + f1) + poly1) + conlo1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f22,%f24,%f22 !(p3*tmp2 + p2)*tmp2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fsubd %f26,%f28,%f26 !conup2 - ansu2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*20*/ add %i1,%i2,%i1 !x += stridex
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i3,%i4,%i3 !y += stridey
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f18,%f12,%f36 !ans1 = ansu1 + ansl1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f38,%f24,%f24 !f*tmp2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f22,%f62,%f22 !(p3*tmp2 + p2)*tmp2 + p1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*23*/ for %f36,%f42,%f36 !sign(ans1) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis std %f36,[%l1] !*yaddr1 = ans1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %o4,%l5,%l5 !base addr + index2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f24,%f22,%f22 !poly2 = (f2*tmp2)*((p3*tmp2 + p2)*tmp2 + p1)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f26,%f38,%f26 !(conup2 - ansu2) + f2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %i5,0 !if argcount =0, we are done
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis be .RETURN
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis.LOOP1:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*25*/ deccc %i0 !--n
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bneg 1f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov %i1,%o5 !xuse = x (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba 2f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop !delay slot
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis1:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis PIC_SET(g5,.COEFFS+8*WSIZE,o5)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dec %i5 !argcount--
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis2:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*26*/ sethi %hi(0x80000000),%o7 !mask for sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x43600000),%o1 !big = 0x43600000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ld [%o5],%o0 !intf = pf[0] = f upper
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*28*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis andn %o0,%o7,%o0 !intf = fabs(intf)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o5],%f36 !f = *x into f36
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*30*/ sub %o1,%o0,%o1 !(-) if intf > big
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sub %o0,%o2,%o2 !(-) if intf < small
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fand %f36,%f32,%f42 !sign1 = sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*31*/ orcc %o1,%o2,%g0 !(-) if either true
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bneg,pn %icc,.SPECIAL1 !if (-) goto special cases below
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fabsd %f36,%f36 !abs(f) (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis !----------------------
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*32*/ fpadd32 %f36,%f52,%f0 !intf + 0x00008000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%l5+WSIZE],%f24 !conlo2 = __vlibm_TBL_atan1[index2+1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*33*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x8000),%o7 !rounding bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f26,%f22,%f22 !((conup2 - ansu2) + f2) + poly2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*34*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x7fff0000),%o7 !mask for rounding argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f36,%f0,%f10 !f*z
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fsubd %f36,%f0,%f20 !f - z
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*35*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f22,%f24,%f22 !ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*37*/ sub %o0,%l7,%o2 !intz - 0x3f900000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fsubd %f10,%f56,%f10 !(f*z - (-1.0))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o4+%l3],%f6 !conup0 = __vlibm_TBL_atan1[index0]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%l6 !(|f| > 64)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ble .ELSE1 !if(|f| > 64) then
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*38*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba .ENDIF1 !continue
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*40*/ fdivd %f56,%f36,%f36 !f = -1.0/f (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .ELSE1: !else f( |x| >= (1/64))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%l7 !if intf >= 1/64
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bl .ENDIF1 !if( |x| >= (1/64) ) then...
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %o3,4,%o1 !index = index + 4
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*40*/ fdivd %f20,%f10,%f36 !f = (f - z)/(1.0 + f*z), reduced argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .ENDIF1:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*41*/sll %o1,3,%l4 !index1 = index
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov %i3,%l1 !yaddr1 = address of y
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f34,%f34,%f4 !tmp0= f0*f0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f28,%f22,%f38 !ans2 = ansu2 + ansl2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*44*/add %i1,%i2,%i1 !x += stridex
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i3,%i4,%i3 !y += stridey
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f58,%f4,%f2 !p[3]*tmp0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f6,%f34,%f8 !ansu0 = conup0 + f0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for %f38,%f44,%f38 !sign(ans2) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis std %f38,[%l2] !*yaddr2 = ans2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %i5,0 !if argcount =0, we are done
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis be .RETURN
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis.LOOP2:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*46*/ deccc %i0 !--n
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bneg 1f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov %i1,%o5 !xuse = x (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba 2f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop !delay slot
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis1:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis PIC_SET(g5,.COEFFS+8*WSIZE,o5)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dec %i5 !argcount--
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis2:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*47*/ sethi %hi(0x80000000),%o7 !mask for sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x43600000),%o1 !big = 0x43600000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ld [%o5],%o0 !intf = pf[0] = f upper
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*49*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis andn %o0,%o7,%o0 !intf = fabs(intf)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o5],%f38 !f = *x into f38
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*51*/ sub %o1,%o0,%o1 !(-) if intf > big
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sub %o0,%o2,%o2 !(-) if intf < small
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fand %f38,%f32,%f44 !sign2 = sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*52*/ orcc %o1,%o2,%g0 !(-) if either true
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bneg,pn %icc,.SPECIAL2 !if (-) goto special cases below
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fabsd %f38,%f38 !abs(f) (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis !----------------------
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*53*/ fpadd32 %f38,%f52,%f0 !intf + 0x00008000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f2,%f60,%f2 !p[3]*tmp0 + p[2]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*54*/ sethi %hi(0x8000),%o7 !rounding bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*55*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x7fff0000),%o7 !mask for rounding argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f38,%f0,%f10 !f*z
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fsubd %f38,%f0,%f20 !f - z
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*56*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f2,%f4,%f2 !(p3*tmp0 + p2)*tmp0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fsubd %f6,%f8,%f6 !conup0 - ansu0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*58*/ sub %o0,%l7,%o2 !intz - 0x3f900000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fsubd %f10,%f56,%f10 !(f*z - (-1.0))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o4+%l4],%f16 !conup1 = __vlibm_TBL_atan1[index1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%l6 !(|f| > 64)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ble .ELSE2 !if(|f| > 64) then
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*60*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba .ENDIF2 !continue
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*61*/ fdivd %f56,%f38,%f38 !f = -1.0/f (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .ELSE2: !else f( |x| >= (1/64))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%l7 !if intf >= 1/64
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bl .ENDIF2 !if( |x| >= (1/64) ) then...
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %o3,4,%o1 !index = index + 4
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*61*/ fdivd %f20,%f10,%f38 !f = (f - z)/(1.0 + f*z), reduced argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .ENDIF2:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*62*/ sll %o1,3,%l5 !index2 = index
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov %i3,%l2 !yaddr2 = address of y
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f34,%f4,%f4 !f0*tmp0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f2,%f62,%f2 !(p3*tmp0 + p2)*tmp0 + p1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f36,%f36,%f14 !tmp1= f1*f1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*65*/add %o4,%l3,%l3 !base addr + index0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f4,%f2,%f2 !poly0 = (f0*tmp0)*((p3*tmp0 + p2)*tmp0 + p1)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f6,%f34,%f6 !(conup0 - ansu0) + f0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f58,%f14,%f12 !p[3]*tmp1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f16,%f36,%f18 !ansu1 = conup1 + f1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%l3+WSIZE],%f4 !conlo0 = __vlibm_TBL_atan1[index0+1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*68*/ add %i1,%i2,%i1 !x += stridex
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i3,%i4,%i3 !y += stridey
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f6,%f2,%f2 !((conup0 - ansu0) + f0) + poly0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f12,%f60,%f12 !p[3]*tmp1 + p[2]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*71*/faddd %f2,%f4,%f2 !ansl0 = (((conup0 - ansu)0 + f0) + poly0) + conlo0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f12,%f14,%f12 !(p3*tmp1 + p2)*tmp1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fsubd %f16,%f18,%f16 !conup1 - ansu1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*74*/faddd %f8,%f2,%f34 !ans0 = ansu0 + ansl0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f36,%f14,%f14 !f1*tmp1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f12,%f62,%f12 !(p3*tmp1 + p2)*tmp1 + p1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*77*/ for %f34,%f40,%f34 !sign(ans0) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis std %f34,[%l0] !*yaddr0 = ans, always gets stored (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %i5,0 !if argcount =0, we are done
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bg .MAINLOOP
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis.RETURN:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ret
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis restore %g0,%g0,%g0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*------------SPECIAL CASE HANDLING FOR LOOP0 ------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* at this point
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %i1 x address
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %o0 intf
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %o2 intf - 0x3e300000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %f34,36,38 f0,f1,f2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %f40,42,44 sign0,sign1,sign2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .align 32 !align on I-cache boundary
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis.SPECIAL0:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bpos 1f !if >=...continue
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba 3f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f34,%f50,%f30 !dummy op just to generate exception (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis1:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ld [%o5+4],%o5 !load x lower word
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sllx %o0,32,%o0 !left justify intf
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sllx %g1,32,%g1 !left justify Inf
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis or %o0,%o5,%o0 !merge in lower intf
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%g1 !if intf > 0x7ff00000 00000000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ble,pt %xcc,2f !pass thru if NaN
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f34,%f34,%f34 !...... (x*x) trigger invalid exception
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba 3f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis2:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f46,%f48,%f34 !ans = pi/2 upper + pi/2 lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis3:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i1,%i2,%i1 !x += stridex
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for %f34,%f40,%f34 !sign(ans) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis std %f34,[%i3] !*y = ans
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba .LOOP0 !keep looping
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i3,%i4,%i3 !y += stridey (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*-----------SPECIAL CASE HANDLING FOR LOOP1 -------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .align 32 !align on I-cache boundary
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis.SPECIAL1:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bpos 1f !if >=...continue
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba 3f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f36,%f50,%f30 !dummy op just to generate exception (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis1:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ld [%o5+4],%o5 !load x lower word
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sllx %o0,32,%o0 !left justify intf
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sllx %g1,32,%g1 !left justify Inf
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis or %o0,%o5,%o0 !merge in lower intf
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%g1 !if intf > 0x7ff00000 00000000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ble,pt %xcc,2f !pass thru if NaN
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f36,%f36,%f36 !...... (x*x) trigger invalid exception
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba 3f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis2:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f46,%f48,%f36 !ans = pi/2 upper + pi/2 lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis3:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i1,%i2,%i1 !x += stridex
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for %f36,%f42,%f36 !sign(ans) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis std %f36,[%i3] !*y = ans
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba .LOOP1 !keep looping
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i3,%i4,%i3 !y += stridey (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*------------SPECIAL CASE HANDLING FOR LOOP2 ------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .align 32 !align on I-cache boundary
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis.SPECIAL2:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bpos 1f !if >=...continue
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba 3f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f38,%f50,%f30 !dummy op just to generate exception (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis1:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ld [%o5+4],%o5 !load x lower word
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sllx %o0,32,%o0 !left justify intf
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sllx %g1,32,%g1 !left justify Inf
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis or %o0,%o5,%o0 !merge in lower intf
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%g1 !if intf > 0x7ff00000 00000000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ble,pt %xcc,2f !pass thru if NaN
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f38,%f38,%f38 !...... (x*x) trigger invalid exception
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba 3f
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis nop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis2:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f46,%f48,%f38 !ans = pi/2 upper + pi/2 lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis3:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i1,%i2,%i1 !x += stridex
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for %f38,%f44,%f38 !sign(ans) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis std %f38,[%i3] !*y = ans
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ba .LOOP2 !keep looping
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i3,%i4,%i3 !y += stridey
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis SET_SIZE(__vatan)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! .ident "03-20-96 Sparc V9 3-way-unrolled version"