25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER START
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * The contents of this file are subject to the terms of the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Common Development and Distribution License (the "License").
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You may not use this file except in compliance with the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * See the License for the specific language governing permissions
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * and limitations under the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER END
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Use is subject to license terms.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/********************************************************************
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * vexp() algorithm is from mopt:f_exp.c. Basics are included here
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * to supplement comments within this file. vexp() has been unrolled
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * to a depth of 3. Only element 0 is documented.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * 2^44 to allow *2^k w/o shifting within the FP registers. These
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * had to be removed for CHEETAH to avoid the fdtox of a very large
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * number, which would trap to kernel (2^52).
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Let x = (k + j/256)ln2 + r
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * then exp(x) = exp(ln2^(k+j/256)) * exp(r)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * = 2^k * 2^(j/256) * exp(r)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * where r is polynomial approximation
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * = 1 + r*(1+r*(B1+r*(B2+r*B3)))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * p = r*(1+r*(B1+r*(B2+r*B3))) ! notice, not quite exp(r)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * q = 2^(j/256) (high 64 bits)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * t = 2^(j/256) (extra precision) ! both from _TBL_exp_z[]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * 2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p )
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * then actual computation is 2^k * ( q + ( t + q*p ) )
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ********************************************************************/
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x43f00000,0x00000000 ! scaling 2^12 two96
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x41371547,0x652b82fe ! scaling 2^12 invln2_256
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis .word 0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ! .word 0x43300000,0x00000000 ! scaling two96
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ! .word 0x40771547,0x652b82fe ! scaling invln2_256
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ! .word 0x3f662e42,0xfee00000 ! scaling ln2_256h
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ! .word 0x3d6a39ef,0x35793c76 ! scaling ln2_256l
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis wr %g0,0x82,%asi ! set %asi for non-faulting loads
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bl,pn %icc,.range0 ! if hx < 0x3e300000 or > 0x40862e41
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for %f2,TWO96,%f2 ! used to strip least sig bits
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f0,INVLN2_256,%f4 ! x/ (ln2/256) , creating k
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bl,pn %icc,.range1 ! if hx < 0x3e300000 or > 0x40862e41
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f20,INVLN2_256,%f24 ! okay to put this here; for alignment
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis bl,pn %icc,.range2 ! if hx < 0x3e300000 or > 0x40862e41
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f4,%f2,%f4 ! creating k+j/256, sra to zero bits
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f8,LN2_256H,%f2 ! closest LN2_256 to x
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f8,LN2_256L,%f4 ! closest LN2_256 to x , added prec
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis!!!!!!!!!!!!!!!!!!! New polynomial reorder starts here
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ! Alternate polynomial grouping allowing non-sequential calc of p
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) )
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis st %f8,[%fp+m0] ! store k, to shift return/use
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis st %f18,[%fp+m1] ! store k, to shift return/use
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis st %f28,[%fp+m2] ! store k, to shift return/use
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ld [%fp+m0],%l0 ! get nonshifted k into intreg
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ld [%fp+m1],%l1 ! get nonshifted k into intreg
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ld [%fp+m2],%l2 ! get nonshifted k into intreg
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sra %l0,8,%l3 ! shift k tobe offset 256-8byte
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sra %l1,8,%l4 ! shift k tobe offset 256-8byte
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sra %l2,8,%l5 ! shift k tobe offset 256-8byte
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis!!!!!!!!!!!!!!!!!!! poly-reorder - ends here
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f0,%f4,%f0 ! start exp(x) = exp(r) * tbl[j]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f0,%f6,%f6 ! cont exp(x) : apply tbl[j] high bits
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis faddd %f6,%f4,%f6 ! cont exp(x) : apply tbl[j+1] low bits
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fpadd32 %f6,%f8,%f6 ! done exp(x) : apply 2^k
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fbg,a,pt %fcc0,3f ! if x is huge and positive
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! x is near the extremes but within range; return to the loop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f4,%f4,%f4 ! huge*huge or tiny*tiny
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fbg,a,pt %fcc0,3f ! if x is huge and positive
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! x is near the extremes but within range; return to the loop
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fmuld %f14,%f14,%f14 ! huge*huge or tiny*tiny
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis fbg,a,pt %fcc0,3f ! if x is huge and positive
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis! x is near the extremes but within range; return to the loop