fma.c revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER START
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * The contents of this file are subject to the terms of the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Common Development and Distribution License (the "License").
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You may not use this file except in compliance with the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * See the License for the specific language governing permissions
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * and limitations under the License.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * CDDL HEADER END
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * Use is subject to license terms.
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisstatic const union {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis unsigned i[2];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { 0x3fe00000u, 0 },
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { 0x40000000u, 0 },
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { 0x43300000u, 0 },
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { 0x41a00000u, 0 },
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { 0x3e500000u, 0 },
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { 0x3df00000u, 0 },
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { 0x3bf00000u, 0 },
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { 0x7fe00000u, 0 },
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { 0x00100000u, 0 },
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis { 0x00100001u, 0 }
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtisstatic const unsigned int fsr_rm = 0xc0000000u;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * fma for SPARC: 64-bit double precision, big-endian
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis__fma(double x, double y, double z) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis unsigned i[2];
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis unsigned int xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis volatile double dummy;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* extract the high order words of the arguments */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* dispense with inf, nan, and zero cases */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 ||
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis (hy | yy.i[1]) == 0) /* x or y is inf, nan, or zero */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis return (x * y + z);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (hz >= 0x7ff00000) /* z is inf or nan */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis return (x + z); /* avoid spurious under/overflow in x * y */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * x * y isn't zero but could underflow to zero,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * so don't add z, lest we perturb the sign
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis return (x * y);
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * now x, y, and z are all finite and nonzero; save the fsr and
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * set round-to-negative-infinity mode (and clear nonstandard
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * mode before we try to scale subnormal operands)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* extract signs and exponents, and normalize subnormals */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ex = ((xx.i[0] & ~0x80000000) >> 20) - 52;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ey = ((yy.i[0] & ~0x80000000) >> 20) - 52;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ez = ((zz.i[0] & ~0x80000000) >> 20) - 52;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* multiply x*y to 106 bits */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx.i[0] = (xx.i[0] & 0xfffff) | 0x3ff00000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis yy.i[0] = (yy.i[0] & 0xfffff) | 0x3ff00000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* extract the significands */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * now x*y is represented by sxy, exy, and xy[0-3], and z is
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * represented likewise; swap if need be so |xy| <= |z|
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 &&
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* shift the significand of xy keeping a sticky bit */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (e > 116) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis } else if (e >= 96) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (127 - e));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis } else if (e >= 64) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sticky = xy3 | xy2 | ((xy1 << 1) << (95 - e));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis } else if (e >= 32) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis } else if (e) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* if this is a magnitude subtract, negate the significand of xy */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* add, propagating carries */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* postnormalize and collect rounding information into z2 */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* result is tiny; shift right until exponent is within range */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis z2 = 1; /* result can't be exactly zero */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis } else if (e >= 32) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis sticky = z3 | z2 | ((z1 << 1) << (63 - e));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* carry out; shift right by one */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (z0 < 0x100000 && (z0 | z1 | z2 | z3) != 0) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * borrow/cancellation; shift left as much as
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * exponent allows
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis while (!(z0 | (z1 & 0xffe00000)) && ez >= 33) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* get the rounding mode and clear current exceptions */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* strip off the integer bit, if there is one */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * flip the sense of directed roundings if the result is negative;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * the logic below applies to a positive result
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* round and raise exceptions */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* decide whether to round the fraction up */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (rm == FSR_RP || (rm == FSR_RN && (z2 > 0x80000000u ||
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* round up and renormalize if necessary */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (++z1 == 0) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* check for under/overflow */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * !ibit => exact result was tiny before rounding,
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * z2 nonzero => result delivered is inexact
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* restore the fsr and emulate exceptions as needed */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * fma for x86: 64-bit double precision, little-endian
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis__fma(double x, double y, double z) {
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis long double e;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* convert the operands to double extended */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xx.e = (long double) x;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis yy.e = (long double) y;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis zz.e = (long double) z;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* extract the exponents of the arguments */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* dispense with inf, nan, and zero cases */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0)
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* x or y is inf, nan, or zero */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* avoid spurious inexact in x * y */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * save the control and status words, mask all exceptions, and
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * set rounding to 64-bit precision and to-nearest
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* multiply x*y to 106 bits */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ye = ((xhi * yhi - xe) + xhi * ylo + xlo * yhi) + xlo * ylo;
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* distill the sum of xe, ye, and z */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* now (xhi,xlo) = ye + z */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ylo = (xhi - ye) + (xe - (yhi - ye)); /* now (yhi,ylo) = xe + xhi */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis xlo = (ylo - xe) + (xlo - (xhi - xe)); /* now (xhi,xlo) = xlo + ylo */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis ylo = (yhi - yy.e) + xhi; /* now (yy.e,ylo) = xhi + yhi */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* perturb yy.e if its least significant 10 bits are zero */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis /* set sign of zero result according to rounding direction */
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis * restore the control and status words and convert the result
25c28e83beb90e7c80452a7c818c5e6f73a07dc8Piotr Jasiukajtis return ((double) yy.e);