sparc/fp/_Q_sqrt.c

2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License, Version 1.0 only
2N/A * (the "License").  You may not use this file except in compliance
2N/A * with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A/*
2N/A * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
2N/A * Use is subject to license terms.
2N/A */
2N/A
2N/A#pragma ident   "%Z%%M% %I% %E% SMI"
2N/A
2N/A#include "quad.h"
2N/A
2N/Astatic const double C[] = {
2N/A    0.0,
2N/A    0.5,
2N/A    1.0,
2N/A    68719476736.0,
2N/A    536870912.0,
2N/A    48.0,
2N/A    16.0,
2N/A    1.52587890625000000000e-05,
2N/A    2.86102294921875000000e-06,
2N/A    5.96046447753906250000e-08,
2N/A    3.72529029846191406250e-09,
2N/A    1.70530256582424044609e-13,
2N/A    7.10542735760100185871e-15,
2N/A    8.67361737988403547206e-19,
2N/A    2.16840434497100886801e-19,
2N/A    1.27054942088145050860e-21,
2N/A    1.21169035041947413311e-27,
2N/A    9.62964972193617926528e-35,
2N/A    4.70197740328915003187e-38
2N/A};
2N/A
2N/A#define zero        C[0]
2N/A#define half        C[1]
2N/A#define one     C[2]
2N/A#define two36       C[3]
2N/A#define two29       C[4]
2N/A#define three2p4    C[5]
2N/A#define two4        C[6]
2N/A#define twom16      C[7]
2N/A#define three2m20   C[8]
2N/A#define twom24      C[9]
2N/A#define twom28      C[10]
2N/A#define three2m44   C[11]
2N/A#define twom47      C[12]
2N/A#define twom60      C[13]
2N/A#define twom62      C[14]
2N/A#define three2m71   C[15]
2N/A#define three2m91   C[16]
2N/A#define twom113     C[17]
2N/A#define twom124     C[18]
2N/A
2N/Astatic const unsigned
2N/A    fsr_re = 0x00000000u,
2N/A    fsr_rn = 0xc0000000u;
2N/A
2N/A#ifdef __sparcv9
2N/A
2N/A/*
2N/A * _Qp_sqrt(pz, x) sets *pz = sqrt(*x).
2N/A */
2N/Avoid
2N/A_Qp_sqrt(union longdouble *pz, const union longdouble *x)
2N/A
2N/A#else
2N/A
2N/A/*
2N/A * _Q_sqrt(x) returns sqrt(*x).
2N/A */
2N/Aunion longdouble
2N/A_Q_sqrt(const union longdouble *x)
2N/A
2N/A#endif  /* __sparcv9 */
2N/A
2N/A{
2N/A    union longdouble    z;
2N/A    union xdouble       u;
2N/A    double          c, d, rr, r[2], tt[3], xx[4], zz[5];
2N/A    unsigned int        xm, fsr, lx, wx[3];
2N/A    unsigned int        msw, frac2, frac3, frac4, rm;
2N/A    int         ex, ez;
2N/A
2N/A    if (QUAD_ISZERO(*x)) {
2N/A        Z = *x;
2N/A        QUAD_RETURN(Z);
2N/A    }
2N/A
2N/A    xm = x->l.msw;
2N/A
2N/A    __quad_getfsrp(&fsr);
2N/A
2N/A    /* handle nan and inf cases */
2N/A    if ((xm & 0x7fffffff) >= 0x7fff0000) {
2N/A        if ((x->l.msw & 0xffff) | x->l.frac2 | x->l.frac3 |
2N/A            x->l.frac4) {
2N/A            if (!(x->l.msw & 0x8000)) {
2N/A                /* snan, signal invalid */
2N/A                if (fsr & FSR_NVM) {
2N/A                    __quad_fsqrtq(x, &Z);
2N/A                } else {
2N/A                    Z = *x;
2N/A                    Z.l.msw |= 0x8000;
2N/A                    fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
2N/A                        FSR_NVC;
2N/A                    __quad_setfsrp(&fsr);
2N/A                }
2N/A                QUAD_RETURN(Z);
2N/A            }
2N/A            Z = *x;
2N/A            QUAD_RETURN(Z);
2N/A        }
2N/A        if (x->l.msw & 0x80000000) {
2N/A            /* sqrt(-inf), signal invalid */
2N/A            if (fsr & FSR_NVM) {
2N/A                __quad_fsqrtq(x, &Z);
2N/A            } else {
2N/A                Z.l.msw = 0x7fffffff;
2N/A                Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff;
2N/A                fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC;
2N/A                __quad_setfsrp(&fsr);
2N/A            }
2N/A            QUAD_RETURN(Z);
2N/A        }
2N/A        /* sqrt(inf), return inf */
2N/A        Z = *x;
2N/A        QUAD_RETURN(Z);
2N/A    }
2N/A
2N/A    /* handle negative numbers */
2N/A    if (xm & 0x80000000) {
2N/A        if (fsr & FSR_NVM) {
2N/A            __quad_fsqrtq(x, &Z);
2N/A        } else {
2N/A            Z.l.msw = 0x7fffffff;
2N/A            Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff;
2N/A            fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC;
2N/A            __quad_setfsrp(&fsr);
2N/A        }
2N/A        QUAD_RETURN(Z);
2N/A    }
2N/A
2N/A    /* now x is finite, positive */
2N/A    __quad_setfsrp((unsigned *)&fsr_re);
2N/A
2N/A    /* get the normalized significand and exponent */
2N/A    ex = (int)(xm >> 16);
2N/A    lx = xm & 0xffff;
2N/A    if (ex) {
2N/A        lx |= 0x10000;
2N/A        wx[0] = x->l.frac2;
2N/A        wx[1] = x->l.frac3;
2N/A        wx[2] = x->l.frac4;
2N/A    } else {
2N/A        if (lx | (x->l.frac2 & 0xfffe0000)) {
2N/A            wx[0] = x->l.frac2;
2N/A            wx[1] = x->l.frac3;
2N/A            wx[2] = x->l.frac4;
2N/A            ex = 1;
2N/A        } else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {
2N/A            lx = x->l.frac2;
2N/A            wx[0] = x->l.frac3;
2N/A            wx[1] = x->l.frac4;
2N/A            wx[2] = 0;
2N/A            ex = -31;
2N/A        } else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {
2N/A            lx = x->l.frac3;
2N/A            wx[0] = x->l.frac4;
2N/A            wx[1] = wx[2] = 0;
2N/A            ex = -63;
2N/A        } else {
2N/A            lx = x->l.frac4;
2N/A            wx[0] = wx[1] = wx[2] = 0;
2N/A            ex = -95;
2N/A        }
2N/A        while ((lx & 0x10000) == 0) {
2N/A            lx = (lx << 1) | (wx[0] >> 31);
2N/A            wx[0] = (wx[0] << 1) | (wx[1] >> 31);
2N/A            wx[1] = (wx[1] << 1) | (wx[2] >> 31);
2N/A            wx[2] <<= 1;
2N/A            ex--;
2N/A        }
2N/A    }
2N/A    ez = ex - 0x3fff;
2N/A    if (ez & 1) {
2N/A        /* make exponent even */
2N/A        lx = (lx << 1) | (wx[0] >> 31);
2N/A        wx[0] = (wx[0] << 1) | (wx[1] >> 31);
2N/A        wx[1] = (wx[1] << 1) | (wx[2] >> 31);
2N/A        wx[2] <<= 1;
2N/A        ez--;
2N/A    }
2N/A
2N/A    /* extract the significands into doubles */
2N/A    c = twom16;
2N/A    xx[0] = (double)((int)lx) * c;
2N/A
2N/A    c *= twom24;
2N/A    xx[0] += (double)((int)(wx[0] >> 8)) * c;
2N/A
2N/A    c *= twom24;
2N/A    xx[1] = (double)((int)(((wx[0] << 16) | (wx[1] >> 16)) &
2N/A        0xffffff)) * c;
2N/A
2N/A    c *= twom24;
2N/A    xx[2] = (double)((int)(((wx[1] << 8) | (wx[2] >> 24)) &
2N/A        0xffffff)) * c;
2N/A
2N/A    c *= twom24;
2N/A    xx[3] = (double)((int)(wx[2] & 0xffffff)) * c;
2N/A
2N/A    /* approximate the divisor for the Newton iteration */
2N/A    c = xx[0] + xx[1];
2N/A    c = __quad_dp_sqrt(&c);
2N/A    rr = half / c;
2N/A
2N/A    /* compute the first five "digits" of the square root */
2N/A    zz[0] = (c + two29) - two29;
2N/A    tt[0] = zz[0] + zz[0];
2N/A    r[0] = (xx[0] - zz[0] * zz[0]) + xx[1];
2N/A
2N/A    zz[1] = (rr * (r[0] + xx[2]) + three2p4) - three2p4;
2N/A    tt[1] = zz[1] + zz[1];
2N/A    r[0] -= tt[0] * zz[1];
2N/A    r[1] = xx[2] - zz[1] * zz[1];
2N/A    c = (r[1] + three2m20) - three2m20;
2N/A    r[0] += c;
2N/A    r[1] = (r[1] - c) + xx[3];
2N/A
2N/A    zz[2] = (rr * (r[0] + r[1]) + three2m20) - three2m20;
2N/A    tt[2] = zz[2] + zz[2];
2N/A    r[0] -= tt[0] * zz[2];
2N/A    r[1] -= tt[1] * zz[2];
2N/A    c = (r[1] + three2m44) - three2m44;
2N/A    r[0] += c;
2N/A    r[1] = (r[1] - c) - zz[2] * zz[2];
2N/A
2N/A    zz[3] = (rr * (r[0] + r[1]) + three2m44) - three2m44;
2N/A    r[0] = ((r[0] - tt[0] * zz[3]) + r[1]) - tt[1] * zz[3];
2N/A    r[1] = -tt[2] * zz[3];
2N/A    c = (r[1] + three2m91) - three2m91;
2N/A    r[0] += c;
2N/A    r[1] = (r[1] - c) - zz[3] * zz[3];
2N/A
2N/A    zz[4] = (rr * (r[0] + r[1]) + three2m71) - three2m71;
2N/A
2N/A    /* reduce to three doubles, making sure zz[1] is positive */
2N/A    zz[0] += zz[1] - twom47;
2N/A    zz[1] = twom47 + zz[2] + zz[3];
2N/A    zz[2] = zz[4];
2N/A
2N/A    /* if the third term might lie on a rounding boundary, perturb it */
2N/A    if (zz[2] == (twom62 + zz[2]) - twom62) {
2N/A        /* here we just need to get the sign of the remainder */
2N/A        c = (((((r[0] - tt[0] * zz[4]) - tt[1] * zz[4]) + r[1])
2N/A            - tt[2] * zz[4]) - (zz[3] + zz[3]) * zz[4]) - zz[4] * zz[4];
2N/A        if (c < zero)
2N/A            zz[2] -= twom124;
2N/A        else if (c > zero)
2N/A            zz[2] += twom124;
2N/A    }
2N/A
2N/A    /*
2N/A     * propagate carries/borrows, using round-to-negative-infinity mode
2N/A     * to make all terms nonnegative (note that we can't encounter a
2N/A     * borrow so large that the roundoff is unrepresentable because
2N/A     * we took care to make zz[1] positive above)
2N/A     */
2N/A    __quad_setfsrp(&fsr_rn);
2N/A    c = zz[1] + zz[2];
2N/A    zz[2] += (zz[1] - c);
2N/A    zz[1] = c;
2N/A    c = zz[0] + zz[1];
2N/A    zz[1] += (zz[0] - c);
2N/A    zz[0] = c;
2N/A
2N/A    /* adjust exponent and strip off integer bit */
2N/A    ez = (ez >> 1) + 0x3fff;
2N/A    zz[0] -= one;
2N/A
2N/A    /* the first 48 bits of fraction come from zz[0] */
2N/A    u.d = d = two36 + zz[0];
2N/A    msw = u.l.lo;
2N/A    zz[0] -= (d - two36);
2N/A
2N/A    u.d = d = two4 + zz[0];
2N/A    frac2 = u.l.lo;
2N/A    zz[0] -= (d - two4);
2N/A
2N/A    /* the next 32 come from zz[0] and zz[1] */
2N/A    u.d = d = twom28 + (zz[0] + zz[1]);
2N/A    frac3 = u.l.lo;
2N/A    zz[0] -= (d - twom28);
2N/A
2N/A    /* condense the remaining fraction; errors here won't matter */
2N/A    c = zz[0] + zz[1];
2N/A    zz[1] = ((zz[0] - c) + zz[1]) + zz[2];
2N/A    zz[0] = c;
2N/A
2N/A    /* get the last word of fraction */
2N/A    u.d = d = twom60 + (zz[0] + zz[1]);
2N/A    frac4 = u.l.lo;
2N/A    zz[0] -= (d - twom60);
2N/A
2N/A    /* keep track of what's left for rounding; note that the error */
2N/A    /* in computing c will be non-negative due to rounding mode */
2N/A    c = zz[0] + zz[1];
2N/A
2N/A    /* get the rounding mode */
2N/A    rm = fsr >> 30;
2N/A
2N/A    /* round and raise exceptions */
2N/A    fsr &= ~FSR_CEXC;
2N/A    if (c != zero) {
2N/A        fsr |= FSR_NXC;
2N/A
2N/A        /* decide whether to round the fraction up */
2N/A        if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 ||
2N/A            (c == twom113 && ((frac4 & 1) || (c - zz[0] != zz[1])))))) {
2N/A            /* round up and renormalize if necessary */
2N/A            if (++frac4 == 0)
2N/A                if (++frac3 == 0)
2N/A                    if (++frac2 == 0)
2N/A                        if (++msw == 0x10000) {
2N/A                            msw = 0;
2N/A                            ez++;
2N/A                        }
2N/A        }
2N/A    }
2N/A
2N/A    /* stow the result */
2N/A    z.l.msw = (ez << 16) | msw;
2N/A    z.l.frac2 = frac2;
2N/A    z.l.frac3 = frac3;
2N/A    z.l.frac4 = frac4;
2N/A
2N/A    if ((fsr & FSR_CEXC) & (fsr >> 23)) {
2N/A        __quad_setfsrp(&fsr);
2N/A        __quad_fsqrtq(x, &Z);
2N/A    } else {
2N/A        Z = z;
2N/A        fsr |= (fsr & 0x1f) << 5;
2N/A        __quad_setfsrp(&fsr);
2N/A    }
2N/A    QUAD_RETURN(Z);
2N/A}