__vcos.c revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/isa_defs.h>
#include <sys/ccompile.h>
#ifdef _LITTLE_ENDIAN
#define HI(x) *(1+(int*)x)
#define LO(x) *(unsigned*)x
#else
#define HI(x) *(int*)x
#define LO(x) *(1+(unsigned*)x)
#endif
#ifdef __RESTRICT
#define restrict _Restrict
#else
#define restrict
#endif
/*
* vcos.1.c
*
* Vector cosine function. Just slight modifications to vsin.8.c, mainly
* in the primary range part.
*
* Modification to primary range processing. If an argument that does not
* fall in the primary range is encountered, then processing is continued
* in the medium range.
*
*/
extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
static const double
half[2] = { 0.5, -0.5 },
one = 1.0,
invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */
pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */
pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
pp1 = -1.666666666605760465276263943134982554676e-0001,
pp2 = 8.333261209690963126718376566146180944442e-0003,
qq1 = -4.999999999977710986407023955908711557870e-0001,
qq2 = 4.166654863857219350645055881018842089580e-0002,
poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
-4.999999999999931701464060878888294524481e-0001 },
poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
4.166666666394861917535640593963708222319e-0002 },
poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
-1.388888552656142867832756687736851681462e-0003 },
poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
2.478519423681460796618128289454530524759e-0005 };
static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
/* Don't __ the following; acomp will handle it */
extern double fabs(double);
extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
/*
* y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
*
* Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
* Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
*
* elts < 2^-27 use the approximation 1.0 ~ cos(x).
*/
void
__vcos(int n, double * restrict x, int stridex, double * restrict y,
int stridey)
{
double x0_or_one[4], x1_or_one[4], x2_or_one[4];
double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
int i, biguns, nsave, sxsave, sysave;
volatile int v __GNU_UNUSED;
nsave = n;
xsave = x;
sxsave = stridex;
ysave = y;
sysave = stridey;
biguns = 0;
do /* MAIN LOOP */
{
/* Gotos here so _break_ exits MAIN LOOP. */
LOOP0: /* Find first arg in right range. */
xsb0 = HI(x); /* get most significant word */
hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
if (hx0 > 0x3fe921fb) {
/* Too big: arg reduction needed, so leave for second part */
biguns = 1;
goto MEDIUM;
}
if (hx0 < 0x3e400000) {
/* Too small. cos x ~ 1. */
v = *x;
*y = 1.0;
x += stridex;
y += stridey;
i = 0;
if (--n <= 0)
break;
goto LOOP0;
}
x0 = *x;
py0 = y;
x += stridex;
y += stridey;
i = 1;
if (--n <= 0)
break;
LOOP1: /* Get second arg, same as above. */
xsb1 = HI(x);
hx1 = xsb1 & ~0x80000000;
if (hx1 > 0x3fe921fb)
{
biguns = 2;
goto MEDIUM;
}
if (hx1 < 0x3e400000)
{
v = *x;
*y = 1.0;
x += stridex;
y += stridey;
i = 1;
if (--n <= 0)
break;
goto LOOP1;
}
x1 = *x;
py1 = y;
x += stridex;
y += stridey;
i = 2;
if (--n <= 0)
break;
LOOP2: /* Get third arg, same as above. */
xsb2 = HI(x);
hx2 = xsb2 & ~0x80000000;
if (hx2 > 0x3fe921fb)
{
biguns = 3;
goto MEDIUM;
}
if (hx2 < 0x3e400000)
{
v = *x;
*y = 1.0;
x += stridex;
y += stridey;
i = 2;
if (--n <= 0)
break;
goto LOOP2;
}
x2 = *x;
py2 = y;
/*
* 0x3fc40000 = 5/32 ~ 0.15625
* Get msb after subtraction. Will be 1 only if
* hx0 - 5/32 is negative.
*/
i = (hx0 - 0x3fc40000) >> 31;
i |= ((hx1 - 0x3fc40000) >> 30) & 2;
i |= ((hx2 - 0x3fc40000) >> 29) & 4;
switch (i)
{
double a0, a1, a2, w0, w1, w2;
double t0, t1, t2, z0, z1, z2;
unsigned j0, j1, j2;
case 0: /* All are > 5/32 */
j0 = (xsb0 + 0x4000) & 0xffff8000;
j1 = (xsb1 + 0x4000) & 0xffff8000;
j2 = (xsb2 + 0x4000) & 0xffff8000;
HI(&t0) = j0;
HI(&t1) = j1;
HI(&t2) = j2;
LO(&t0) = 0;
LO(&t1) = 0;
LO(&t2) = 0;
x0 -= t0;
x1 -= t1;
x2 -= t2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (qq1 + z0 * qq2);
t1 = z1 * (qq1 + z1 * qq2);
t2 = z2 * (qq1 + z2 * qq2);
w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb0 = (xsb0 >> 30) & 2;
xsb1 = (xsb1 >> 30) & 2;
xsb2 = (xsb2 >> 30) & 2;
a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
a1 = __vlibm_TBL_sincos_hi[j1+1];
a2 = __vlibm_TBL_sincos_hi[j2+1];
/* cos_lo(t) sin_hi(t) */
t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
*py0 = a0 + t0;
*py1 = a1 + t1;
*py2 = a2 + t2;
break;
case 1:
j1 = (xsb1 + 0x4000) & 0xffff8000;
j2 = (xsb2 + 0x4000) & 0xffff8000;
HI(&t1) = j1;
HI(&t2) = j2;
LO(&t1) = 0;
LO(&t2) = 0;
x1 -= t1;
x2 -= t2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (poly3[1] + z0 * poly4[1]);
t1 = z1 * (qq1 + z1 * qq2);
t2 = z2 * (qq1 + z2 * qq2);
t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb1 = (xsb1 >> 30) & 2;
xsb2 = (xsb2 >> 30) & 2;
a1 = __vlibm_TBL_sincos_hi[j1+1];
a2 = __vlibm_TBL_sincos_hi[j2+1];
t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
*py0 = one + t0;
*py1 = a1 + t1;
*py2 = a2 + t2;
break;
case 2:
j0 = (xsb0 + 0x4000) & 0xffff8000;
j2 = (xsb2 + 0x4000) & 0xffff8000;
HI(&t0) = j0;
HI(&t2) = j2;
LO(&t0) = 0;
LO(&t2) = 0;
x0 -= t0;
x2 -= t2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (qq1 + z0 * qq2);
t1 = z1 * (poly3[1] + z1 * poly4[1]);
t2 = z2 * (qq1 + z2 * qq2);
w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb0 = (xsb0 >> 30) & 2;
xsb2 = (xsb2 >> 30) & 2;
a0 = __vlibm_TBL_sincos_hi[j0+1];
a2 = __vlibm_TBL_sincos_hi[j2+1];
t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
*py0 = a0 + t0;
*py1 = one + t1;
*py2 = a2 + t2;
break;
case 3:
j2 = (xsb2 + 0x4000) & 0xffff8000;
HI(&t2) = j2;
LO(&t2) = 0;
x2 -= t2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (poly3[1] + z0 * poly4[1]);
t1 = z1 * (poly3[1] + z1 * poly4[1]);
t2 = z2 * (qq1 + z2 * qq2);
t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb2 = (xsb2 >> 30) & 2;
a2 = __vlibm_TBL_sincos_hi[j2+1];
t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
*py0 = one + t0;
*py1 = one + t1;
*py2 = a2 + t2;
break;
case 4:
j0 = (xsb0 + 0x4000) & 0xffff8000;
j1 = (xsb1 + 0x4000) & 0xffff8000;
HI(&t0) = j0;
HI(&t1) = j1;
LO(&t0) = 0;
LO(&t1) = 0;
x0 -= t0;
x1 -= t1;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (qq1 + z0 * qq2);
t1 = z1 * (qq1 + z1 * qq2);
t2 = z2 * (poly3[1] + z2 * poly4[1]);
w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb0 = (xsb0 >> 30) & 2;
xsb1 = (xsb1 >> 30) & 2;
a0 = __vlibm_TBL_sincos_hi[j0+1];
a1 = __vlibm_TBL_sincos_hi[j1+1];
t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
*py0 = a0 + t0;
*py1 = a1 + t1;
*py2 = one + t2;
break;
case 5:
j1 = (xsb1 + 0x4000) & 0xffff8000;
HI(&t1) = j1;
LO(&t1) = 0;
x1 -= t1;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (poly3[1] + z0 * poly4[1]);
t1 = z1 * (qq1 + z1 * qq2);
t2 = z2 * (poly3[1] + z2 * poly4[1]);
t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb1 = (xsb1 >> 30) & 2;
a1 = __vlibm_TBL_sincos_hi[j1+1];
t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
*py0 = one + t0;
*py1 = a1 + t1;
*py2 = one + t2;
break;
case 6:
j0 = (xsb0 + 0x4000) & 0xffff8000;
HI(&t0) = j0;
LO(&t0) = 0;
x0 -= t0;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (qq1 + z0 * qq2);
t1 = z1 * (poly3[1] + z1 * poly4[1]);
t2 = z2 * (poly3[1] + z2 * poly4[1]);
w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb0 = (xsb0 >> 30) & 2;
a0 = __vlibm_TBL_sincos_hi[j0+1];
t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
*py0 = a0 + t0;
*py1 = one + t1;
*py2 = one + t2;
break;
case 7: /* All are < 5/32 */
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (poly3[1] + z0 * poly4[1]);
t1 = z1 * (poly3[1] + z1 * poly4[1]);
t2 = z2 * (poly3[1] + z2 * poly4[1]);
t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
*py0 = one + t0;
*py1 = one + t1;
*py2 = one + t2;
break;
}
x += stridex;
y += stridey;
i = 0;
} while (--n > 0); /* END MAIN LOOP */
/*
* CLEAN UP last 0, 1, or 2 elts.
*/
if (i > 0) /* Clean up elts at tail. i < 3. */
{
double a0, a1, w0, w1;
double t0, t1, z0, z1;
unsigned j0, j1;
if (i > 1)
{
if (hx1 < 0x3fc40000)
{
z1 = x1 * x1;
t1 = z1 * (poly3[1] + z1 * poly4[1]);
t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
t1 = one + t1;
*py1 = t1;
}
else
{
j1 = (xsb1 + 0x4000) & 0xffff8000;
HI(&t1) = j1;
LO(&t1) = 0;
x1 -= t1;
z1 = x1 * x1;
t1 = z1 * (qq1 + z1 * qq2);
w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb1 = (xsb1 >> 30) & 2;
a1 = __vlibm_TBL_sincos_hi[j1+1];
t1 = __vlibm_TBL_sincos_lo[j1+1]
- (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
*py1 = a1 + t1;
}
}
if (hx0 < 0x3fc40000)
{
z0 = x0 * x0;
t0 = z0 * (poly3[1] + z0 * poly4[1]);
t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
t0 = one + t0;
*py0 = t0;
}
else
{
j0 = (xsb0 + 0x4000) & 0xffff8000;
HI(&t0) = j0;
LO(&t0) = 0;
x0 -= t0;
z0 = x0 * x0;
t0 = z0 * (qq1 + z0 * qq2);
w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb0 = (xsb0 >> 30) & 2;
a0 = __vlibm_TBL_sincos_hi[j0+1];
t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
*py0 = a0 + t0;
}
} /* END CLEAN UP */
return;
/*
* Take care of BIGUNS.
*
* We have jumped here in the middle of processing after having
* encountered a medium range argument. Therefore things are in a
* bit of a tizzy.
*/
MEDIUM:
x0_or_one[1] = 1.0;
x1_or_one[1] = 1.0;
x2_or_one[1] = 1.0;
x0_or_one[3] = -1.0;
x1_or_one[3] = -1.0;
x2_or_one[3] = -1.0;
y0_or_zero[1] = 0.0;
y1_or_zero[1] = 0.0;
y2_or_zero[1] = 0.0;
y0_or_zero[3] = 0.0;
y1_or_zero[3] = 0.0;
y2_or_zero[3] = 0.0;
if (biguns == 3)
{
biguns = 0;
xsb0 = xsb0 >> 31;
xsb1 = xsb1 >> 31;
goto loop2;
}
else if (biguns == 2)
{
xsb0 = xsb0 >> 31;
biguns = 0;
goto loop1;
}
biguns = 0;
do
{
double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
unsigned hx;
int n0, n1, n2;
/*
* Find 3 more to work on: Not already done, not too big.
*/
loop0:
hx = HI(x);
xsb0 = hx >> 31;
hx &= ~0x80000000;
if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */
{
if (hx >= 0x7ff00000) /* Inf or NaN */
{
x0 = *x;
*y = x0 - x0;
}
else
biguns = 1;
x += stridex;
y += stridey;
i = 0;
if (--n <= 0)
break;
goto loop0;
}
x0 = *x;
py0 = y;
x += stridex;
y += stridey;
i = 1;
if (--n <= 0)
break;
loop1:
hx = HI(x);
xsb1 = hx >> 31;
hx &= ~0x80000000;
if (hx > 0x413921fb)
{
if (hx >= 0x7ff00000)
{
x1 = *x;
*y = x1 - x1;
}
else
biguns = 1;
x += stridex;
y += stridey;
i = 1;
if (--n <= 0)
break;
goto loop1;
}
x1 = *x;
py1 = y;
x += stridex;
y += stridey;
i = 2;
if (--n <= 0)
break;
loop2:
hx = HI(x);
xsb2 = hx >> 31;
hx &= ~0x80000000;
if (hx > 0x413921fb)
{
if (hx >= 0x7ff00000)
{
x2 = *x;
*y = x2 - x2;
}
else
biguns = 1;
x += stridex;
y += stridey;
i = 2;
if (--n <= 0)
break;
goto loop2;
}
x2 = *x;
py2 = y;
n0 = (int) (x0 * invpio2 + half[xsb0]);
n1 = (int) (x1 * invpio2 + half[xsb1]);
n2 = (int) (x2 * invpio2 + half[xsb2]);
fn0 = (double) n0;
fn1 = (double) n1;
fn2 = (double) n2;
n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
n1 = (n1 + 1) & 3;
n2 = (n2 + 1) & 3;
a0 = x0 - fn0 * pio2_1;
a1 = x1 - fn1 * pio2_1;
a2 = x2 - fn2 * pio2_1;
w0 = fn0 * pio2_2;
w1 = fn1 * pio2_2;
w2 = fn2 * pio2_2;
x0 = a0 - w0;
x1 = a1 - w1;
x2 = a2 - w2;
y0 = (a0 - x0) - w0;
y1 = (a1 - x1) - w1;
y2 = (a2 - x2) - w2;
a0 = x0;
a1 = x1;
a2 = x2;
w0 = fn0 * pio2_3 - y0;
w1 = fn1 * pio2_3 - y1;
w2 = fn2 * pio2_3 - y2;
x0 = a0 - w0;
x1 = a1 - w1;
x2 = a2 - w2;
y0 = (a0 - x0) - w0;
y1 = (a1 - x1) - w1;
y2 = (a2 - x2) - w2;
a0 = x0;
a1 = x1;
a2 = x2;
w0 = fn0 * pio2_3t - y0;
w1 = fn1 * pio2_3t - y1;
w2 = fn2 * pio2_3t - y2;
x0 = a0 - w0;
x1 = a1 - w1;
x2 = a2 - w2;
y0 = (a0 - x0) - w0;
y1 = (a1 - x1) - w1;
y2 = (a2 - x2) - w2;
xsb0 = HI(&x0);
i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
xsb1 = HI(&x1);
i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
xsb2 = HI(&x2);
i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
switch (i)
{
double t0, t1, t2, z0, z1, z2;
unsigned j0, j1, j2;
case 0:
j0 = (xsb0 + 0x4000) & 0xffff8000;
j1 = (xsb1 + 0x4000) & 0xffff8000;
j2 = (xsb2 + 0x4000) & 0xffff8000;
HI(&t0) = j0;
HI(&t1) = j1;
HI(&t2) = j2;
LO(&t0) = 0;
LO(&t1) = 0;
LO(&t2) = 0;
x0 = (x0 - t0) + y0;
x1 = (x1 - t1) + y1;
x2 = (x2 - t2) + y2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (qq1 + z0 * qq2);
t1 = z1 * (qq1 + z1 * qq2);
t2 = z2 * (qq1 + z2 * qq2);
w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb0 = (xsb0 >> 30) & 2;
xsb1 = (xsb1 >> 30) & 2;
xsb2 = (xsb2 >> 30) & 2;
n0 ^= (xsb0 & ~(n0 << 1));
n1 ^= (xsb1 & ~(n1 << 1));
n2 ^= (xsb2 & ~(n2 << 1));
xsb0 |= 1;
xsb1 |= 1;
xsb2 |= 1;
a0 = __vlibm_TBL_sincos_hi[j0+n0];
a1 = __vlibm_TBL_sincos_hi[j1+n1];
a2 = __vlibm_TBL_sincos_hi[j2+n2];
t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
*py0 = ( a0 + t0 );
*py1 = ( a1 + t1 );
*py2 = ( a2 + t2 );
break;
case 1:
j0 = n0 & 1;
j1 = (xsb1 + 0x4000) & 0xffff8000;
j2 = (xsb2 + 0x4000) & 0xffff8000;
HI(&t1) = j1;
HI(&t2) = j2;
LO(&t1) = 0;
LO(&t2) = 0;
x0_or_one[0] = x0;
x0_or_one[2] = -x0;
y0_or_zero[0] = y0;
y0_or_zero[2] = -y0;
x1 = (x1 - t1) + y1;
x2 = (x2 - t2) + y2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
t1 = z1 * (qq1 + z1 * qq2);
t2 = z2 * (qq1 + z2 * qq2);
t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb1 = (xsb1 >> 30) & 2;
xsb2 = (xsb2 >> 30) & 2;
n1 ^= (xsb1 & ~(n1 << 1));
n2 ^= (xsb2 & ~(n2 << 1));
xsb1 |= 1;
xsb2 |= 1;
a1 = __vlibm_TBL_sincos_hi[j1+n1];
a2 = __vlibm_TBL_sincos_hi[j2+n2];
t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
*py0 = t0;
*py1 = ( a1 + t1 );
*py2 = ( a2 + t2 );
break;
case 2:
j0 = (xsb0 + 0x4000) & 0xffff8000;
j1 = n1 & 1;
j2 = (xsb2 + 0x4000) & 0xffff8000;
HI(&t0) = j0;
HI(&t2) = j2;
LO(&t0) = 0;
LO(&t2) = 0;
x1_or_one[0] = x1;
x1_or_one[2] = -x1;
x0 = (x0 - t0) + y0;
y1_or_zero[0] = y1;
y1_or_zero[2] = -y1;
x2 = (x2 - t2) + y2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (qq1 + z0 * qq2);
t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
t2 = z2 * (qq1 + z2 * qq2);
w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb0 = (xsb0 >> 30) & 2;
xsb2 = (xsb2 >> 30) & 2;
n0 ^= (xsb0 & ~(n0 << 1));
n2 ^= (xsb2 & ~(n2 << 1));
xsb0 |= 1;
xsb2 |= 1;
a0 = __vlibm_TBL_sincos_hi[j0+n0];
a2 = __vlibm_TBL_sincos_hi[j2+n2];
t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
*py0 = ( a0 + t0 );
*py1 = t1;
*py2 = ( a2 + t2 );
break;
case 3:
j0 = n0 & 1;
j1 = n1 & 1;
j2 = (xsb2 + 0x4000) & 0xffff8000;
HI(&t2) = j2;
LO(&t2) = 0;
x0_or_one[0] = x0;
x0_or_one[2] = -x0;
x1_or_one[0] = x1;
x1_or_one[2] = -x1;
y0_or_zero[0] = y0;
y0_or_zero[2] = -y0;
y1_or_zero[0] = y1;
y1_or_zero[2] = -y1;
x2 = (x2 - t2) + y2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
t2 = z2 * (qq1 + z2 * qq2);
t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb2 = (xsb2 >> 30) & 2;
n2 ^= (xsb2 & ~(n2 << 1));
xsb2 |= 1;
a2 = __vlibm_TBL_sincos_hi[j2+n2];
t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
*py0 = t0;
*py1 = t1;
*py2 = ( a2 + t2 );
break;
case 4:
j0 = (xsb0 + 0x4000) & 0xffff8000;
j1 = (xsb1 + 0x4000) & 0xffff8000;
j2 = n2 & 1;
HI(&t0) = j0;
HI(&t1) = j1;
LO(&t0) = 0;
LO(&t1) = 0;
x2_or_one[0] = x2;
x2_or_one[2] = -x2;
x0 = (x0 - t0) + y0;
x1 = (x1 - t1) + y1;
y2_or_zero[0] = y2;
y2_or_zero[2] = -y2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (qq1 + z0 * qq2);
t1 = z1 * (qq1 + z1 * qq2);
t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb0 = (xsb0 >> 30) & 2;
xsb1 = (xsb1 >> 30) & 2;
n0 ^= (xsb0 & ~(n0 << 1));
n1 ^= (xsb1 & ~(n1 << 1));
xsb0 |= 1;
xsb1 |= 1;
a0 = __vlibm_TBL_sincos_hi[j0+n0];
a1 = __vlibm_TBL_sincos_hi[j1+n1];
t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
*py0 = ( a0 + t0 );
*py1 = ( a1 + t1 );
*py2 = t2;
break;
case 5:
j0 = n0 & 1;
j1 = (xsb1 + 0x4000) & 0xffff8000;
j2 = n2 & 1;
HI(&t1) = j1;
LO(&t1) = 0;
x0_or_one[0] = x0;
x0_or_one[2] = -x0;
x2_or_one[0] = x2;
x2_or_one[2] = -x2;
y0_or_zero[0] = y0;
y0_or_zero[2] = -y0;
x1 = (x1 - t1) + y1;
y2_or_zero[0] = y2;
y2_or_zero[2] = -y2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
t1 = z1 * (qq1 + z1 * qq2);
t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb1 = (xsb1 >> 30) & 2;
n1 ^= (xsb1 & ~(n1 << 1));
xsb1 |= 1;
a1 = __vlibm_TBL_sincos_hi[j1+n1];
t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
*py0 = t0;
*py1 = ( a1 + t1 );
*py2 = t2;
break;
case 6:
j0 = (xsb0 + 0x4000) & 0xffff8000;
j1 = n1 & 1;
j2 = n2 & 1;
HI(&t0) = j0;
LO(&t0) = 0;
x1_or_one[0] = x1;
x1_or_one[2] = -x1;
x2_or_one[0] = x2;
x2_or_one[2] = -x2;
x0 = (x0 - t0) + y0;
y1_or_zero[0] = y1;
y1_or_zero[2] = -y1;
y2_or_zero[0] = y2;
y2_or_zero[2] = -y2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (qq1 + z0 * qq2);
t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb0 = (xsb0 >> 30) & 2;
n0 ^= (xsb0 & ~(n0 << 1));
xsb0 |= 1;
a0 = __vlibm_TBL_sincos_hi[j0+n0];
t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
*py0 = ( a0 + t0 );
*py1 = t1;
*py2 = t2;
break;
case 7:
j0 = n0 & 1;
j1 = n1 & 1;
j2 = n2 & 1;
x0_or_one[0] = x0;
x0_or_one[2] = -x0;
x1_or_one[0] = x1;
x1_or_one[2] = -x1;
x2_or_one[0] = x2;
x2_or_one[2] = -x2;
y0_or_zero[0] = y0;
y0_or_zero[2] = -y0;
y1_or_zero[0] = y1;
y1_or_zero[2] = -y1;
y2_or_zero[0] = y2;
y2_or_zero[2] = -y2;
z0 = x0 * x0;
z1 = x1 * x1;
z2 = x2 * x2;
t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
*py0 = t0;
*py1 = t1;
*py2 = t2;
break;
}
x += stridex;
y += stridey;
i = 0;
} while (--n > 0);
if (i > 0)
{
double fn0, fn1, a0, a1, w0, w1, y0, y1;
double t0, t1, z0, z1;
unsigned j0, j1;
int n0, n1;
if (i > 1)
{
n1 = (int) (x1 * invpio2 + half[xsb1]);
fn1 = (double) n1;
n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
a1 = x1 - fn1 * pio2_1;
w1 = fn1 * pio2_2;
x1 = a1 - w1;
y1 = (a1 - x1) - w1;
a1 = x1;
w1 = fn1 * pio2_3 - y1;
x1 = a1 - w1;
y1 = (a1 - x1) - w1;
a1 = x1;
w1 = fn1 * pio2_3t - y1;
x1 = a1 - w1;
y1 = (a1 - x1) - w1;
xsb1 = HI(&x1);
if ((xsb1 & ~0x80000000) < thresh[n1&1])
{
j1 = n1 & 1;
x1_or_one[0] = x1;
x1_or_one[2] = -x1;
y1_or_zero[0] = y1;
y1_or_zero[2] = -y1;
z1 = x1 * x1;
t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
*py1 = t1;
}
else
{
j1 = (xsb1 + 0x4000) & 0xffff8000;
HI(&t1) = j1;
LO(&t1) = 0;
x1 = (x1 - t1) + y1;
z1 = x1 * x1;
t1 = z1 * (qq1 + z1 * qq2);
w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb1 = (xsb1 >> 30) & 2;
n1 ^= (xsb1 & ~(n1 << 1));
xsb1 |= 1;
a1 = __vlibm_TBL_sincos_hi[j1+n1];
t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
*py1 = ( a1 + t1 );
}
}
n0 = (int) (x0 * invpio2 + half[xsb0]);
fn0 = (double) n0;
n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
a0 = x0 - fn0 * pio2_1;
w0 = fn0 * pio2_2;
x0 = a0 - w0;
y0 = (a0 - x0) - w0;
a0 = x0;
w0 = fn0 * pio2_3 - y0;
x0 = a0 - w0;
y0 = (a0 - x0) - w0;
a0 = x0;
w0 = fn0 * pio2_3t - y0;
x0 = a0 - w0;
y0 = (a0 - x0) - w0;
xsb0 = HI(&x0);
if ((xsb0 & ~0x80000000) < thresh[n0&1])
{
j0 = n0 & 1;
x0_or_one[0] = x0;
x0_or_one[2] = -x0;
y0_or_zero[0] = y0;
y0_or_zero[2] = -y0;
z0 = x0 * x0;
t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
*py0 = t0;
}
else
{
j0 = (xsb0 + 0x4000) & 0xffff8000;
HI(&t0) = j0;
LO(&t0) = 0;
x0 = (x0 - t0) + y0;
z0 = x0 * x0;
t0 = z0 * (qq1 + z0 * qq2);
w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
xsb0 = (xsb0 >> 30) & 2;
n0 ^= (xsb0 & ~(n0 << 1));
xsb0 |= 1;
a0 = __vlibm_TBL_sincos_hi[j0+n0];
t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
*py0 = ( a0 + t0 );
}
}
if (biguns)
__vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
}