/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/isa_defs.h>
#include "libm_inlines.h"
#ifdef _LITTLE_ENDIAN
#define HI(x) *(1+(int*)x)
#define LO(x) *(unsigned*)x
#else
#define HI(x) *(int*)x
#define LO(x) *(1+(unsigned*)x)
#endif
#ifdef __RESTRICT
#define restrict _Restrict
#else
#define restrict
#endif
/* double hypot(double x, double y)
*
* Method :
* 1. Special cases:
* x or y is +Inf or -Inf => +Inf
* x or y is NaN => QNaN
* 2. Computes hypot(x,y):
* hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
* Where:
* m = max(|x|,|y|)
* xnm = x * (1/m)
* ynm = y * (1/m)
*
* Compute xnm * xnm + ynm * ynm by simulating
* muti-precision arithmetic.
*
* Accuracy:
* Maximum error observed: less than 0.872 ulp after 16.777.216.000
* results.
*/
extern double sqrt(double);
extern double fabs(double);
static const unsigned long long LCONST[] = {
0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */
0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */
0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */
};
static void
__vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
int stridey, double * restrict pz, int stridez);
#pragma no_inline(__vhypot_n)
#define RETURN(ret) \
{ \
*pz = (ret); \
py += stridey; \
pz += stridez; \
if (n_n == 0) \
{ \
hx0 = HI(px); \
hy0 = HI(py); \
spx = px; spy = py; spz = pz; \
continue; \
} \
n--; \
break; \
}
void
__vhypot(int n, double * restrict px, int stridex, double * restrict py,
int stridey, double * restrict pz, int stridez)
{
int hx0, hx1, hy0, j0, diff;
double x_hi, x_lo, y_hi, y_lo;
double scl = 0;
double x, y, res;
double *spx, *spy, *spz;
int n_n;
double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */
double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */
double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */
while (n > 1)
{
n_n = 0;
spx = px;
spy = py;
spz = pz;
hx0 = HI(px);
hy0 = HI(py);
for (; n > 1 ; n--)
{
px += stridex;
hx0 &= 0x7fffffff;
hy0 &= 0x7fffffff;
if (hx0 >= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */
{
diff = hy0 - hx0;
j0 = diff >> 31;
j0 = hy0 - (diff & j0);
j0 &= 0x7ff00000;
x = *(px - stridex);
y = *py;
x = fabs(x);
y = fabs(y);
if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
{
int lx = LO((px - stridex));
int ly = LO(py);
if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
else res = x + y;
RETURN (res)
}
else
{
j0 = diff >> 31;
if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
{
x *= D2ONM1022;
y *= D2ONM1022;
x_hi = (x + D2ON28) - D2ON28;
x_lo = x - x_hi;
y_hi = (y + D2ON28) - D2ON28;
y_lo = y - y_hi;
res = (x_hi * x_hi + y_hi * y_hi);
res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
res = sqrt (res);
res = D2ONP1022 * res;
RETURN (res)
}
else RETURN (x + y)
}
}
if (hy0 >= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */
{
diff = hy0 - hx0;
j0 = diff >> 31;
j0 = hy0 - (diff & j0);
j0 &= 0x7ff00000;
x = *(px - stridex);
y = *py;
x = fabs(x);
y = fabs(y);
if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
{
int lx = LO((px - stridex));
int ly = LO(py);
if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
else res = x + y;
RETURN (res)
}
else
{
j0 = diff >> 31;
if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
{
x *= D2ONM1022;
y *= D2ONM1022;
x_hi = (x + D2ON28) - D2ON28;
x_lo = x - x_hi;
y_hi = (y + D2ON28) - D2ON28;
y_lo = y - y_hi;
res = (x_hi * x_hi + y_hi * y_hi);
res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
res = sqrt (res);
res = D2ONP1022 * res;
RETURN (res)
}
else RETURN (x + y)
}
}
hx1 = HI(px);
if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */
{
x = *(px - stridex);
y = *py;
x *= D2ONP1022;
y *= D2ONP1022;
x_hi = (x + D2ON28) - D2ON28;
x_lo = x - x_hi;
y_hi = (y + D2ON28) - D2ON28;
y_lo = y - y_hi;
res = (x_hi * x_hi + y_hi * y_hi);
res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
res = sqrt(res);
res = D2ONM1022 * res;
RETURN (res)
}
hx0 = hx1;
py += stridey;
pz += stridez;
n_n++;
hy0 = HI(py);
}
if (n_n > 0)
__vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez);
}
if (n > 0)
{
x = *px;
y = *py;
hx0 = HI(px);
hy0 = HI(py);
hx0 &= 0x7fffffff;
hy0 &= 0x7fffffff;
diff = hy0 - hx0;
j0 = diff >> 31;
j0 = hy0 - (diff & j0);
j0 &= 0x7ff00000;
if (j0 >= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
{
x = fabs(x);
y = fabs(y);
if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
{
int lx = LO(px);
int ly = LO(py);
if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
else res = x + y;
*pz = res;
return;
}
else
{
j0 = diff >> 31;
if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
{
x *= D2ONM1022;
y *= D2ONM1022;
x_hi = (x + D2ON28) - D2ON28;
x_lo = x - x_hi;
y_hi = (y + D2ON28) - D2ON28;
y_lo = y - y_hi;
res = (x_hi * x_hi + y_hi * y_hi);
res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
res = sqrt (res);
res = D2ONP1022 * res;
*pz = res;
return;
}
else
{
*pz = x + y;
return;
}
}
}
if (j0 < 0x00100000) /* X and Y are subnormal */
{
x *= D2ONP1022;
y *= D2ONP1022;
x_hi = (x + D2ON28) - D2ON28;
x_lo = x - x_hi;
y_hi = (y + D2ON28) - D2ON28;
y_lo = y - y_hi;
res = (x_hi * x_hi + y_hi * y_hi);
res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
res = sqrt(res);
res = D2ONM1022 * res;
*pz = res;
return;
}
HI(&scl) = (0x7fe00000 - j0);
x *= scl;
y *= scl;
x_hi = (x + D2ON28) - D2ON28;
y_hi = (y + D2ON28) - D2ON28;
x_lo = x - x_hi;
y_lo = y - y_hi;
res = (x_hi * x_hi + y_hi * y_hi);
res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
res = sqrt(res);
HI(&scl) = j0;
res = scl * res;
*pz = res;
}
}
static void
__vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
int stridey, double * restrict pz, int stridez)
{
int hx0, hy0, j0, diff0;
double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
double x0, y0, res0;
double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */
for(; n > 0 ; n--)
{
x0 = *px;
y0 = *py;
hx0 = HI(px);
hy0 = HI(py);
hx0 &= 0x7fffffff;
hy0 &= 0x7fffffff;
diff0 = hy0 - hx0;
j0 = diff0 >> 31;
j0 = hy0 - (diff0 & j0);
j0 &= 0x7ff00000;
px += stridex;
py += stridey;
HI(&scl0) = (0x7fe00000 - j0);
x0 *= scl0;
y0 *= scl0;
x_hi0 = (x0 + D2ON28) - D2ON28;
y_hi0 = (y0 + D2ON28) - D2ON28;
x_lo0 = x0 - x_hi0;
y_lo0 = y0 - y_hi0;
res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
res0 = sqrt(res0);
HI(&scl0) = j0;
res0 = scl0 * res0;
*pz = res0;
pz += stridez;
}
}