sqrtl.c revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER START
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * The contents of this file are subject to the terms of the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Common Development and Distribution License (the "License").
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You may not use this file except in compliance with the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * See the License for the specific language governing permissions
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * and limitations under the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER END
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Use is subject to license terms.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisextern int __swapTE(int);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisextern int __swapEX(int);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisextern enum fp_direction_type __swapRD(enum fp_direction_type);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * in struct longdouble, msw consists of
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * unsigned short sgn:1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * unsigned short exp:15;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * unsigned short frac1:16;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* array indices used to access words within a double */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* structure used to access words within a quad */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis unsigned int msw;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis long double d;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* default NaN returned for sqrt(neg) */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisstatic const union longdouble
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis qnan = { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff };
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* signalling NaN used to raise invalid */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisstatic const union {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis unsigned u[2];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* array indices used to access words within a double */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* structure used to access words within a quad */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis unsigned int msw;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis long double d;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* default NaN returned for sqrt(neg) */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisstatic const union longdouble
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis qnan = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff };
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* signalling NaN used to raise invalid */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisstatic const union {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis unsigned u[2];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#endif /* __LITTLE_ENDIAN */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisstatic const double
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis* Extract the exponent and normalized significand (represented as
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis* an array of five doubles) from a finite, nonzero quad.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis__q_unpack(const union longdouble *x, double *s)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis unsigned int l[2];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* get the normalized significand and exponent */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ex = (int) ((x->l.msw & 0x7fffffff) >> 16);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* extract the significand into five doubles */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis s[0] = u.d - b;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis s[1] = u.d - b;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis s[2] = u.d - b;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis s[3] = u.d - b;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis s[4] = u.d - b;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis* Pack an exponent and array of three doubles representing a finite,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis* nonzero number into a quad. Assume the sign is already there and
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis* the rounding mode has been fudged accordingly.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis__q_pack(const double *z, int exp, enum fp_direction_type rm,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis unsigned int l[2];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* bias exponent and strip off integer bit */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis s[0] = z[0] - one;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * chop the significand to obtain the fraction;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * use round-to-minus-infinity to ensure chopping
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* extract the first eighty bits of fraction */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis u.d = two36 + (s[0] + t);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis s[0] -= (u.d - two36);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis u.d = two4 + (s[0] + t);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis s[0] -= (u.d - two4);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis u.d = twom28 + (s[0] + t);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis s[0] -= (u.d - twom28);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* condense the remaining fraction; errors here won't matter */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis t = s[0] + s[1];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* get the last word of fraction */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis s[0] -= (u.d - twom60);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * keep track of what's left for rounding; note that
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * t2 will be non-negative due to rounding mode
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis t = s[0] + s[1];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* decide whether to round the fraction up */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (rm == fp_positive || (rm == fp_nearest && (t > twom113 ||
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis (t == twom113 && (t2 != zero || frac4 & 1)))))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* round up and renormalize if necessary */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* assemble the result */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis* Compute the square root of x and place the TP result in s.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis__q_tp_sqrt(const double *x, double *s)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* approximate the divisor for the Newton iteration */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* compute the first five "digits" of the square root */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis tt[0] = t[0] + t[0];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis r[0] = ((x[0] - t[0] * t[0]) + x[1]) + x[2];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis t[1] = (rr * (r[0] + x[3]) + two6) - two6;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis t[2] = (rr * (r[0] + r[1]) + twom18) - twom18;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis t[3] = (rr * (r[0] + r[1]) + twom42) - twom42;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis r[0] = ((r[0] - tt[0] * t[3]) + r[1]) - tt[1] * t[3];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis t[4] = (rr * (r[0] + r[1]) + twom66) - twom66;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* here we just need to get the sign of the remainder */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis c = (((((r[0] - tt[0] * t[4]) - tt[1] * t[4]) + r[1])
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis - tt[2] * t[4]) - (t[3] + t[3]) * t[4]) - t[4] * t[4];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* reduce to three doubles */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* if the third term might lie on a rounding boundary, perturb it */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (c != zero && t[2] == (twom62 + t[2]) - twom62)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* condense the square root */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis c = t[0] + t[1];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis c = s[0] + t[2];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis volatile double t;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* clear cexc */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* check for zero operand */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (!((x.l.msw & 0x7fffffff) | x.l.frac2 | x.l.frac3 | x.l.frac4))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* handle nan and inf cases */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if ((x.l.msw & 0xffff) | x.l.frac2 | x.l.frac3 | x.l.frac4)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* snan, signal invalid */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* sqrt(-inf), signal invalid */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* sqrt(inf), return inf */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* handle negative numbers */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* now x is finite, positive */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* make exponent even */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* put everything together */