25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER START
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * The contents of this file are subject to the terms of the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Common Development and Distribution License (the "License").
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You may not use this file except in compliance with the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * See the License for the specific language governing permissions
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * and limitations under the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER END
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Use is subject to license terms.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! following is the C version of the ATAN algorithm
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! double jkatan(double *x)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! double f, z, ans, ansu, ansl, tmp, poly, conup, conlo, dummy;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! extern const double __vlibm_TBL_atan1[];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! long *pf = (long *) &f, *pz = (long *) &z;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! * Error = -3.08254E-18 On the interval |x| < 1/64 */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! /* define dummy names for readability. Use parray to help compiler optimize loads */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! static const double parray[] = {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! f = *x; /* fetch argument */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! sign = intf & 0x80000000; /* sign of argument */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! sign = (unsigned) sign >> 31; /* sign bit = 0 or 1 */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! ((intf == 0x7ff00000) && (pf[1] !=0)) ) return (*x-*x);/* return NaN if x=NaN*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! if( intf < 0x3e300000 ) /* avoid underflow for small arg */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! if( intf > 0x43600000 ) /* avoid underflow for big arg */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! f = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! f = parray[soffset + sign] * f; /* put sign bit on ans */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! { f = -1.0/f;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! index = 2; /* point to pi/2 upper, lower */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! pz[0] = intz; /* store as a double (z) */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! f = (f - z)/(1.0 + f*z); /* get reduced argument */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! index += 4; /* skip over 0,0,pi/2,pi/2 */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! conup = __vlibm_TBL_atan1[index]; /* upper table */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! conlo = __vlibm_TBL_atan1[index+1]; /* lower table */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! ansu = conup + f; /* compute atan(f) upper */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! ansl = (((conup - ansu) + f) + poly) + conlo;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* 8 bytes = 1 double f.p. word */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x00008000,0x0 !for fp rounding of reduced arg
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x47900000,0 !a number close to 1.0E37
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x80000000,0x0 !mask for fp sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x3f800000,0x0 !1.0/128.0 dummy "safe" argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis __vatan(int n, double *x, int stridex, double *y, stridey)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis computes y(i) = atan( x(i) ), for 1=1,n. Stridex, stridey
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis are the distance between x and y elements
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %i1 address of x
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %i3 address of y
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sll %i2,3,%i2 !convert stride to byte count
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sll %i4,3,%i4 !convert stride to byte count
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* pre-load constants before beginning main loop */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o0+4*WSIZE],%f52 !load rounding mask
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o0+5*WSIZE],%f54 !load truncation mask
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x80000000),%o7 !mask for sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*2 */ sethi %hi(0x43600000),%o1 !big = 0x43600000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o4+%l5],%f26 !conup2 = __vlibm_TBL_atan1[index2]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*6 */ sub %o0,%o2,%o2 !(-) if intf < small
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*7 */ orcc %o1,%o2,%g0 !(-) if either true
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bneg,pn %icc,.SPECIAL0 !if (-) goto special cases below
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis !----------------------
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*8 */ fpadd32 %f34,%f52,%f0 !intf + 0x00008000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*9*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*10 */ sethi %hi(0x7fff0000),%o7 !mask for rounding argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f14,%f12,%f12 !poly1 = (f1*tmp1)*((p3*tmp1 + p2)*tmp1 + p1)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f16,%f36,%f16 !(conup1 - ansu1) + f1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*12 */ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%l4+WSIZE],%f14 !conlo1 = __vlibm_TBL_atan1[index+1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f16,%f12,%f12 !((conup1 - ansu1) + f1) + poly1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*15 */ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*16 */ fdivd %f56,%f34,%f34 !f = -1.0/f (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*16 */ fdivd %f20,%f10,%f34 !f = (f - z)/(1.0 + f*z), reduced argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f12,%f14,%f12 !ansl1 = (((conup1 - ansu)1 + f1) + poly1) + conlo1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f18,%f12,%f36 !ans1 = ansu1 + ansl1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f22,%f62,%f22 !(p3*tmp2 + p2)*tmp2 + p1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*23*/ for %f36,%f42,%f36 !sign(ans1) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f24,%f22,%f22 !poly2 = (f2*tmp2)*((p3*tmp2 + p2)*tmp2 + p1)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f26,%f38,%f26 !(conup2 - ansu2) + f2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*26*/ sethi %hi(0x80000000),%o7 !mask for sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x43600000),%o1 !big = 0x43600000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*28*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*31*/ orcc %o1,%o2,%g0 !(-) if either true
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bneg,pn %icc,.SPECIAL1 !if (-) goto special cases below
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis !----------------------
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*32*/ fpadd32 %f36,%f52,%f0 !intf + 0x00008000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%l5+WSIZE],%f24 !conlo2 = __vlibm_TBL_atan1[index2+1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*33*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f26,%f22,%f22 !((conup2 - ansu2) + f2) + poly2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*34*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x7fff0000),%o7 !mask for rounding argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*35*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f22,%f24,%f22 !ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o4+%l3],%f6 !conup0 = __vlibm_TBL_atan1[index0]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*38*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*40*/ fdivd %f56,%f36,%f36 !f = -1.0/f (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*40*/ fdivd %f20,%f10,%f36 !f = (f - z)/(1.0 + f*z), reduced argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f28,%f22,%f38 !ans2 = ansu2 + ansl2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for %f38,%f44,%f38 !sign(ans2) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*47*/ sethi %hi(0x80000000),%o7 !mask for sign bit
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x43600000),%o1 !big = 0x43600000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*49*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*52*/ orcc %o1,%o2,%g0 !(-) if either true
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bneg,pn %icc,.SPECIAL2 !if (-) goto special cases below
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis !----------------------
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*53*/ fpadd32 %f38,%f52,%f0 !intf + 0x00008000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*55*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x7fff0000),%o7 !mask for rounding argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*56*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%o4+%l4],%f16 !conup1 = __vlibm_TBL_atan1[index1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*60*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*61*/ fdivd %f56,%f38,%f38 !f = -1.0/f (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*61*/ fdivd %f20,%f10,%f38 !f = (f - z)/(1.0 + f*z), reduced argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f2,%f62,%f2 !(p3*tmp0 + p2)*tmp0 + p1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f4,%f2,%f2 !poly0 = (f0*tmp0)*((p3*tmp0 + p2)*tmp0 + p1)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ldd [%l3+WSIZE],%f4 !conlo0 = __vlibm_TBL_atan1[index0+1]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f6,%f2,%f2 !((conup0 - ansu0) + f0) + poly0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*71*/faddd %f2,%f4,%f2 !ansl0 = (((conup0 - ansu)0 + f0) + poly0) + conlo0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*74*/faddd %f8,%f2,%f34 !ans0 = ansu0 + ansl0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f12,%f62,%f12 !(p3*tmp1 + p2)*tmp1 + p1
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*77*/ for %f34,%f40,%f34 !sign(ans0) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis std %f34,[%l0] !*yaddr0 = ans, always gets stored (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*------------SPECIAL CASE HANDLING FOR LOOP0 ------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* at this point
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %o2 intf - 0x3e300000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %f34,36,38 f0,f1,f2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis %f40,42,44 sign0,sign1,sign2
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f34,%f50,%f30 !dummy op just to generate exception (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%g1 !if intf > 0x7ff00000 00000000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f34,%f34,%f34 !...... (x*x) trigger invalid exception
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f46,%f48,%f34 !ans = pi/2 upper + pi/2 lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for %f34,%f40,%f34 !sign(ans) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i3,%i4,%i3 !y += stridey (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*-----------SPECIAL CASE HANDLING FOR LOOP1 -------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f36,%f50,%f30 !dummy op just to generate exception (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%g1 !if intf > 0x7ff00000 00000000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f36,%f36,%f36 !...... (x*x) trigger invalid exception
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f46,%f48,%f36 !ans = pi/2 upper + pi/2 lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for %f36,%f42,%f36 !sign(ans) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis add %i3,%i4,%i3 !y += stridey (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*------------SPECIAL CASE HANDLING FOR LOOP2 ------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f38,%f50,%f30 !dummy op just to generate exception (delay slot)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cmp %o0,%g1 !if intf > 0x7ff00000 00000000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f38,%f38,%f38 !...... (x*x) trigger invalid exception
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f46,%f48,%f38 !ans = pi/2 upper + pi/2 lower
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for %f38,%f44,%f38 !sign(ans) = sign of argument
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /*--------------------------------------------------------------------------*/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! .ident "03-20-96 Sparc V9 3-way-unrolled version"