25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER START
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * The contents of this file are subject to the terms of the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Common Development and Distribution License (the "License").
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You may not use this file except in compliance with the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * See the License for the specific language governing permissions
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * and limitations under the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER END
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/*
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Use is subject to license terms.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#include <sys/isa_defs.h>
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#include "libm_inlines.h"
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#ifdef _LITTLE_ENDIAN
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#define HI(x) *(1+(int*)x)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#define LO(x) *(unsigned*)x
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#else
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#define HI(x) *(int*)x
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#define LO(x) *(1+(unsigned*)x)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#endif
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#ifdef __RESTRICT
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#define restrict _Restrict
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#else
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#define restrict
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#endif
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis/* double rsqrt(double x)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Method :
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * 1. Special cases:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * for x = NaN => QNaN;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * for x = +Inf => 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * for x is negative, -Inf => QNaN + invalid;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * for x = +0 => +Inf + divide-by-zero;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * for x = -0 => -Inf + divide-by-zero.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * 2. Computes reciprocal square root from:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * x = m * 2**n
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Where:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * m = [0.5, 2),
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * n = ((exponent + 1) & ~1).
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Then:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * rsqrt(x) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * 2. Computes 1/sqrt(m) from:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Where:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * m = m0 + dm,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * m0 = 2.0 for m = [1.0+127/128, 2.0), k = 128.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Then:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * 1/sqrt(m0) is looked up in a table,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * 1/m0 is computed as (1/sqrt(m0)) * (1/sqrt(m0)).
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * 1/sqrt(1 + (1/m0)*dm) is computed using approximation:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * 1/sqrt(1 + z) = (((((a6 * z + a5) * z + a4) * z + a3)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * * z + a2) * z + a1) * z + a0
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * where z = [-1/128, 1/128].
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Accuracy:
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * The maximum relative error for the approximating
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * polynomial is 2**(-56.26).
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Maximum error observed: less than 0.563 ulp after 1.500.000.000
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * results.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisextern double sqrt (double);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisextern const double __vlibm_TBL_rsqrt[];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisstatic void
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis__vrsqrt_n(int n, double * restrict px, int stridex, double * restrict py, int stridey);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#pragma no_inline(__vrsqrt_n)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis#define RETURN(ret) \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis{ \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *py = (ret); \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis py += stridey; \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (n_n == 0) \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis spx = px; spy = py; \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis hx = HI(px); \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis continue; \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis } \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis n--; \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis break; \
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis}
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisstatic const double
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis DONE = 1.0,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis K1 = -5.00000000000005209867e-01,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis K2 = 3.75000000000004884257e-01,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis K3 = -3.12499999317136886551e-01,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis K4 = 2.73437499359815081532e-01,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis K5 = -2.46116125605037803130e-01,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis K6 = 2.25606914648617522896e-01;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisvoid
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis__vrsqrt(int n, double * restrict px, int stridex, double * restrict py, int stridey)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis{
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double *spx, *spy;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int ax, lx, hx, n_n;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double res;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis while (n > 1)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis n_n = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis spx = px;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis spy = py;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis hx = HI(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for (; n > 1 ; n--)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis px += stridex;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (hx >= 0x7ff00000) /* X = NaN or Inf */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = *(px - stridex);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis RETURN (DONE / res)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis py += stridey;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (hx < 0x00100000) /* X = denormal, zero or negative */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis py -= stridey;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ax = hx & 0x7fffffff;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis lx = LO((px - stridex));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = *(px - stridex);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if ((ax | lx) == 0) /* |X| = zero */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis RETURN (DONE / res)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis else if (hx >= 0) /* X = denormal */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double res_c0, dsqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int ind0, sqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double xx0, dexp_hi0, dexp_lo0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int hx0, resh0, res_ch0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = *(long long*)&res;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis hx0 = HI(&res);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sqrt_exp0 = (0x817 - (hx0 >> 21)) << 20;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis resh0 = (hx0 & 0x001fffff) | 0x3fe00000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res_ch0 = (resh0 + 0x00002000) & 0x7fffc000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res) = resh0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res_c0) = res_ch0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res_c0) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx0 = dexp_hi0 * dexp_hi0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx0 = (res - res_c0) * xx0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = dexp_hi0 * res + dexp_lo0 + dexp_hi0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&dsqrt_exp0) = sqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&dsqrt_exp0) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res *= dsqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis RETURN (res)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis else /* X = negative */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis RETURN (sqrt(res))
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis n_n++;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis hx = HI(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (n_n > 0)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis __vrsqrt_n(n_n, spx, stridex, spy, stridey);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (n > 0)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis hx = HI(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (hx >= 0x7ff00000) /* X = NaN or Inf */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = *px;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *py = DONE / res;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis else if (hx < 0x00100000) /* X = denormal, zero or negative */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ax = hx & 0x7fffffff;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis lx = LO(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = *px;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if ((ax | lx) == 0) /* |X| = zero */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *py = DONE / res;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis else if (hx >= 0) /* X = denormal */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double res_c0, dsqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int ind0, sqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double xx0, dexp_hi0, dexp_lo0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int hx0, resh0, res_ch0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = *(long long*)&res;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis hx0 = HI(&res);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sqrt_exp0 = (0x817 - (hx0 >> 21)) << 20;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis resh0 = (hx0 & 0x001fffff) | 0x3fe00000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res_ch0 = (resh0 + 0x00002000) & 0x7fffc000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res) = resh0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res_c0) = res_ch0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res_c0) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx0 = dexp_hi0 * dexp_hi0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx0 = (res - res_c0) * xx0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = dexp_hi0 * res + dexp_lo0 + dexp_hi0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&dsqrt_exp0) = sqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&dsqrt_exp0) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res *= dsqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *py = res;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis else /* X = negative */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *py = sqrt(res);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis else
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double res_c0, dsqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int ind0, sqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double xx0, dexp_hi0, dexp_lo0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int resh0, res_ch0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sqrt_exp0 = (0x5fe - (hx >> 21)) << 20;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ind0 = (((hx >> 10) & 0x7f8) + 8) & -16;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis resh0 = (hx & 0x001fffff) | 0x3fe00000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res_ch0 = (resh0 + 0x00002000) & 0x7fffc000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res) = resh0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res) = LO(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res_c0) = res_ch0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res_c0) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx0 = dexp_hi0 * dexp_hi0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx0 = (res - res_c0) * xx0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res = dexp_hi0 * res + dexp_lo0 + dexp_hi0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&dsqrt_exp0) = sqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&dsqrt_exp0) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res *= dsqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *py = res;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis}
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisstatic void
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis__vrsqrt_n(int n, double * restrict px, int stridex, double * restrict py, int stridey)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis{
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double res0, res_c0, dsqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double res1, res_c1, dsqrt_exp1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double res2, res_c2, dsqrt_exp2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int ind0, sqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int ind1, sqrt_exp1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int ind2, sqrt_exp2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double xx0, dexp_hi0, dexp_lo0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double xx1, dexp_hi1, dexp_lo1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis double xx2, dexp_hi2, dexp_lo2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int hx0, resh0, res_ch0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int hx1, resh1, res_ch1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int hx2, resh2, res_ch2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&dsqrt_exp0) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&dsqrt_exp1) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&dsqrt_exp2) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res_c0) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res_c1) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res_c2) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for(; n > 2 ; n -= 3)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis hx0 = HI(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res0) = LO(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis px += stridex;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis hx1 = HI(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res1) = LO(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis px += stridex;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis hx2 = HI(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res2) = LO(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis px += stridex;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sqrt_exp0 = (0x5fe - (hx0 >> 21)) << 20;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sqrt_exp1 = (0x5fe - (hx1 >> 21)) << 20;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sqrt_exp2 = (0x5fe - (hx2 >> 21)) << 20;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ind1 = (((hx1 >> 10) & 0x7f8) + 8) & -16;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ind2 = (((hx2 >> 10) & 0x7f8) + 8) & -16;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis resh0 = (hx0 & 0x001fffff) | 0x3fe00000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis resh1 = (hx1 & 0x001fffff) | 0x3fe00000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis resh2 = (hx2 & 0x001fffff) | 0x3fe00000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res_ch0 = (resh0 + 0x00002000) & 0x7fffc000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res_ch1 = (resh1 + 0x00002000) & 0x7fffc000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res_ch2 = (resh2 + 0x00002000) & 0x7fffc000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res0) = resh0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res1) = resh1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res2) = resh2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res_c0) = res_ch0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res_c1) = res_ch1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res_c2) = res_ch2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_hi1 = ((double*)((char*)__vlibm_TBL_rsqrt + ind1))[0];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_hi2 = ((double*)((char*)__vlibm_TBL_rsqrt + ind2))[0];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_lo1 = ((double*)((char*)__vlibm_TBL_rsqrt + ind1))[1];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_lo2 = ((double*)((char*)__vlibm_TBL_rsqrt + ind2))[1];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx0 = dexp_hi0 * dexp_hi0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx1 = dexp_hi1 * dexp_hi1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx2 = dexp_hi2 * dexp_hi2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx0 = (res0 - res_c0) * xx0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx1 = (res1 - res_c1) * xx1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx2 = (res2 - res_c2) * xx2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res0 = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res1 = (((((K6 * xx1 + K5) * xx1 + K4) * xx1 + K3) * xx1 + K2) * xx1 + K1) * xx1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res2 = (((((K6 * xx2 + K5) * xx2 + K4) * xx2 + K3) * xx2 + K2) * xx2 + K1) * xx2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res0 = dexp_hi0 * res0 + dexp_lo0 + dexp_hi0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res1 = dexp_hi1 * res1 + dexp_lo1 + dexp_hi1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res2 = dexp_hi2 * res2 + dexp_lo2 + dexp_hi2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&dsqrt_exp0) = sqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&dsqrt_exp1) = sqrt_exp1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&dsqrt_exp2) = sqrt_exp2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res0 *= dsqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res1 *= dsqrt_exp1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res2 *= dsqrt_exp2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *py = res0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis py += stridey;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *py = res1;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis py += stridey;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *py = res2;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis py += stridey;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis for(; n > 0 ; n--)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis hx0 = HI(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sqrt_exp0 = (0x5fe - (hx0 >> 21)) << 20;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis resh0 = (hx0 & 0x001fffff) | 0x3fe00000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res_ch0 = (resh0 + 0x00002000) & 0x7fffc000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res0) = resh0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res0) = LO(px);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&res_c0) = res_ch0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&res_c0) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis px += stridex;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx0 = dexp_hi0 * dexp_hi0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx0 = (res0 - res_c0) * xx0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res0 = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res0 = dexp_hi0 * res0 + dexp_lo0 + dexp_hi0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis HI(&dsqrt_exp0) = sqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis LO(&dsqrt_exp0) = 0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis res0 *= dsqrt_exp0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis *py = res0;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis py += stridey;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis}