0N/A/*
4248N/A * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/A
0N/Apackage sun.misc;
0N/A
0N/Aimport sun.misc.FloatConsts;
0N/Aimport sun.misc.DoubleConsts;
0N/A
0N/A/**
3202N/A * The class {@code FpUtils} contains static utility methods for
3202N/A * manipulating and inspecting {@code float} and
3202N/A * {@code double} floating-point numbers. These methods include
0N/A * functionality recommended or required by the IEEE 754
0N/A * floating-point standard.
0N/A *
0N/A * @author Joseph D. Darcy
0N/A */
0N/A
0N/Apublic class FpUtils {
0N/A /*
0N/A * The methods in this class are reasonably implemented using
0N/A * direct or indirect bit-level manipulation of floating-point
0N/A * values. However, having access to the IEEE 754 recommended
0N/A * functions would obviate the need for most programmers to engage
0N/A * in floating-point bit-twiddling.
0N/A *
0N/A * An IEEE 754 number has three fields, from most significant bit
0N/A * to to least significant, sign, exponent, and significand.
0N/A *
0N/A * msb lsb
0N/A * [sign|exponent| fractional_significand]
0N/A *
0N/A * Using some encoding cleverness, explained below, the high order
0N/A * bit of the logical significand does not need to be explicitly
0N/A * stored, thus "fractional_significand" instead of simply
0N/A * "significand" in the figure above.
0N/A *
0N/A * For finite normal numbers, the numerical value encoded is
0N/A *
0N/A * (-1)^sign * 2^(exponent)*(1.fractional_significand)
0N/A *
0N/A * Most finite floating-point numbers are normalized; the exponent
0N/A * value is reduced until the leading significand bit is 1.
0N/A * Therefore, the leading 1 is redundant and is not explicitly
0N/A * stored. If a numerical value is so small it cannot be
0N/A * normalized, it has a subnormal representation. Subnormal
0N/A * numbers don't have a leading 1 in their significand; subnormals
0N/A * are encoding using a special exponent value. In other words,
0N/A * the high-order bit of the logical significand can be elided in
0N/A * from the representation in either case since the bit's value is
0N/A * implicit from the exponent value.
0N/A *
0N/A * The exponent field uses a biased representation; if the bits of
0N/A * the exponent are interpreted as a unsigned integer E, the
0N/A * exponent represented is E - E_bias where E_bias depends on the
0N/A * floating-point format. E can range between E_min and E_max,
0N/A * constants which depend on the floating-point format. E_min and
0N/A * E_max are -126 and +127 for float, -1022 and +1023 for double.
0N/A *
0N/A * The 32-bit float format has 1 sign bit, 8 exponent bits, and 23
0N/A * bits for the significand (which is logically 24 bits wide
0N/A * because of the implicit bit). The 64-bit double format has 1
0N/A * sign bit, 11 exponent bits, and 52 bits for the significand
0N/A * (logically 53 bits).
0N/A *
0N/A * Subnormal numbers and zero have the special exponent value
0N/A * E_min -1; the numerical value represented by a subnormal is:
0N/A *
0N/A * (-1)^sign * 2^(E_min)*(0.fractional_significand)
0N/A *
0N/A * Zero is represented by all zero bits in the exponent and all
0N/A * zero bits in the significand; zero can have either sign.
0N/A *
0N/A * Infinity and NaN are encoded using the exponent value E_max +
0N/A * 1. Signed infinities have all significand bits zero; NaNs have
0N/A * at least one non-zero significand bit.
0N/A *
0N/A * The details of IEEE 754 floating-point encoding will be used in
0N/A * the methods below without further comment. For further
0N/A * exposition on IEEE 754 numbers, see "IEEE Standard for Binary
0N/A * Floating-Point Arithmetic" ANSI/IEEE Std 754-1985 or William
0N/A * Kahan's "Lecture Notes on the Status of IEEE Standard 754 for
0N/A * Binary Floating-Point Arithmetic",
0N/A * http://www.cs.berkeley.edu/~wkahan/ieee754status/ieee754.ps.
0N/A *
0N/A * Many of this class's methods are members of the set of IEEE 754
0N/A * recommended functions or similar functions recommended or
0N/A * required by IEEE 754R. Discussion of various implementation
0N/A * techniques for these functions have occurred in:
0N/A *
0N/A * W.J. Cody and Jerome T. Coonen, "Algorithm 772 Functions to
0N/A * Support the IEEE Standard for Binary Floating-Point
0N/A * Arithmetic," ACM Transactions on Mathematical Software,
0N/A * vol. 19, no. 4, December 1993, pp. 443-451.
0N/A *
0N/A * Joseph D. Darcy, "Writing robust IEEE recommended functions in
0N/A * ``100% Pure Java''(TM)," University of California, Berkeley
0N/A * technical report UCB//CSD-98-1009.
0N/A */
0N/A
0N/A /**
0N/A * Don't let anyone instantiate this class.
0N/A */
0N/A private FpUtils() {}
0N/A
0N/A // Constants used in scalb
0N/A static double twoToTheDoubleScaleUp = powerOfTwoD(512);
0N/A static double twoToTheDoubleScaleDown = powerOfTwoD(-512);
0N/A
0N/A // Helper Methods
0N/A
0N/A // The following helper methods are used in the implementation of
0N/A // the public recommended functions; they generally omit certain
0N/A // tests for exception cases.
0N/A
0N/A /**
3202N/A * Returns unbiased exponent of a {@code double}.
0N/A */
0N/A public static int getExponent(double d){
0N/A /*
0N/A * Bitwise convert d to long, mask out exponent bits, shift
0N/A * to the right and then subtract out double's bias adjust to
0N/A * get true exponent value.
0N/A */
0N/A return (int)(((Double.doubleToRawLongBits(d) & DoubleConsts.EXP_BIT_MASK) >>
0N/A (DoubleConsts.SIGNIFICAND_WIDTH - 1)) - DoubleConsts.EXP_BIAS);
0N/A }
0N/A
0N/A /**
3202N/A * Returns unbiased exponent of a {@code float}.
0N/A */
0N/A public static int getExponent(float f){
0N/A /*
0N/A * Bitwise convert f to integer, mask out exponent bits, shift
0N/A * to the right and then subtract out float's bias adjust to
0N/A * get true exponent value
0N/A */
0N/A return ((Float.floatToRawIntBits(f) & FloatConsts.EXP_BIT_MASK) >>
0N/A (FloatConsts.SIGNIFICAND_WIDTH - 1)) - FloatConsts.EXP_BIAS;
0N/A }
0N/A
0N/A /**
0N/A * Returns a floating-point power of two in the normal range.
0N/A */
0N/A static double powerOfTwoD(int n) {
0N/A assert(n >= DoubleConsts.MIN_EXPONENT && n <= DoubleConsts.MAX_EXPONENT);
0N/A return Double.longBitsToDouble((((long)n + (long)DoubleConsts.EXP_BIAS) <<
0N/A (DoubleConsts.SIGNIFICAND_WIDTH-1))
0N/A & DoubleConsts.EXP_BIT_MASK);
0N/A }
0N/A
0N/A /**
0N/A * Returns a floating-point power of two in the normal range.
0N/A */
0N/A static float powerOfTwoF(int n) {
0N/A assert(n >= FloatConsts.MIN_EXPONENT && n <= FloatConsts.MAX_EXPONENT);
0N/A return Float.intBitsToFloat(((n + FloatConsts.EXP_BIAS) <<
0N/A (FloatConsts.SIGNIFICAND_WIDTH-1))
0N/A & FloatConsts.EXP_BIT_MASK);
0N/A }
0N/A
0N/A /**
0N/A * Returns the first floating-point argument with the sign of the
0N/A * second floating-point argument. Note that unlike the {@link
0N/A * FpUtils#copySign(double, double) copySign} method, this method
3202N/A * does not require NaN {@code sign} arguments to be treated
0N/A * as positive values; implementations are permitted to treat some
0N/A * NaN arguments as positive and other NaN arguments as negative
0N/A * to allow greater performance.
0N/A *
0N/A * @param magnitude the parameter providing the magnitude of the result
0N/A * @param sign the parameter providing the sign of the result
3202N/A * @return a value with the magnitude of {@code magnitude}
3202N/A * and the sign of {@code sign}.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static double rawCopySign(double magnitude, double sign) {
0N/A return Double.longBitsToDouble((Double.doubleToRawLongBits(sign) &
0N/A (DoubleConsts.SIGN_BIT_MASK)) |
0N/A (Double.doubleToRawLongBits(magnitude) &
0N/A (DoubleConsts.EXP_BIT_MASK |
0N/A DoubleConsts.SIGNIF_BIT_MASK)));
0N/A }
0N/A
0N/A /**
0N/A * Returns the first floating-point argument with the sign of the
0N/A * second floating-point argument. Note that unlike the {@link
0N/A * FpUtils#copySign(float, float) copySign} method, this method
3202N/A * does not require NaN {@code sign} arguments to be treated
0N/A * as positive values; implementations are permitted to treat some
0N/A * NaN arguments as positive and other NaN arguments as negative
0N/A * to allow greater performance.
0N/A *
0N/A * @param magnitude the parameter providing the magnitude of the result
0N/A * @param sign the parameter providing the sign of the result
3202N/A * @return a value with the magnitude of {@code magnitude}
3202N/A * and the sign of {@code sign}.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static float rawCopySign(float magnitude, float sign) {
0N/A return Float.intBitsToFloat((Float.floatToRawIntBits(sign) &
0N/A (FloatConsts.SIGN_BIT_MASK)) |
0N/A (Float.floatToRawIntBits(magnitude) &
0N/A (FloatConsts.EXP_BIT_MASK |
0N/A FloatConsts.SIGNIF_BIT_MASK)));
0N/A }
0N/A
0N/A /* ***************************************************************** */
0N/A
0N/A /**
3202N/A * Returns {@code true} if the argument is a finite
3202N/A * floating-point value; returns {@code false} otherwise (for
0N/A * NaN and infinity arguments).
0N/A *
3202N/A * @param d the {@code double} value to be tested
3202N/A * @return {@code true} if the argument is a finite
3202N/A * floating-point value, {@code false} otherwise.
0N/A */
0N/A public static boolean isFinite(double d) {
0N/A return Math.abs(d) <= DoubleConsts.MAX_VALUE;
0N/A }
0N/A
0N/A /**
3202N/A * Returns {@code true} if the argument is a finite
3202N/A * floating-point value; returns {@code false} otherwise (for
0N/A * NaN and infinity arguments).
0N/A *
3202N/A * @param f the {@code float} value to be tested
3202N/A * @return {@code true} if the argument is a finite
3202N/A * floating-point value, {@code false} otherwise.
0N/A */
0N/A public static boolean isFinite(float f) {
0N/A return Math.abs(f) <= FloatConsts.MAX_VALUE;
0N/A }
0N/A
0N/A /**
3202N/A * Returns {@code true} if the specified number is infinitely
3202N/A * large in magnitude, {@code false} otherwise.
0N/A *
0N/A * <p>Note that this method is equivalent to the {@link
0N/A * Double#isInfinite(double) Double.isInfinite} method; the
0N/A * functionality is included in this class for convenience.
0N/A *
0N/A * @param d the value to be tested.
3202N/A * @return {@code true} if the value of the argument is positive
3202N/A * infinity or negative infinity; {@code false} otherwise.
0N/A */
0N/A public static boolean isInfinite(double d) {
0N/A return Double.isInfinite(d);
0N/A }
0N/A
0N/A /**
3202N/A * Returns {@code true} if the specified number is infinitely
3202N/A * large in magnitude, {@code false} otherwise.
0N/A *
0N/A * <p>Note that this method is equivalent to the {@link
0N/A * Float#isInfinite(float) Float.isInfinite} method; the
0N/A * functionality is included in this class for convenience.
0N/A *
0N/A * @param f the value to be tested.
3202N/A * @return {@code true} if the argument is positive infinity or
3202N/A * negative infinity; {@code false} otherwise.
0N/A */
0N/A public static boolean isInfinite(float f) {
0N/A return Float.isInfinite(f);
0N/A }
0N/A
0N/A /**
3202N/A * Returns {@code true} if the specified number is a
3202N/A * Not-a-Number (NaN) value, {@code false} otherwise.
0N/A *
0N/A * <p>Note that this method is equivalent to the {@link
0N/A * Double#isNaN(double) Double.isNaN} method; the functionality is
0N/A * included in this class for convenience.
0N/A *
0N/A * @param d the value to be tested.
3202N/A * @return {@code true} if the value of the argument is NaN;
3202N/A * {@code false} otherwise.
0N/A */
0N/A public static boolean isNaN(double d) {
0N/A return Double.isNaN(d);
0N/A }
0N/A
0N/A /**
3202N/A * Returns {@code true} if the specified number is a
3202N/A * Not-a-Number (NaN) value, {@code false} otherwise.
0N/A *
0N/A * <p>Note that this method is equivalent to the {@link
0N/A * Float#isNaN(float) Float.isNaN} method; the functionality is
0N/A * included in this class for convenience.
0N/A *
0N/A * @param f the value to be tested.
3202N/A * @return {@code true} if the argument is NaN;
3202N/A * {@code false} otherwise.
0N/A */
0N/A public static boolean isNaN(float f) {
0N/A return Float.isNaN(f);
0N/A }
0N/A
0N/A /**
3202N/A * Returns {@code true} if the unordered relation holds
0N/A * between the two arguments. When two floating-point values are
0N/A * unordered, one value is neither less than, equal to, nor
0N/A * greater than the other. For the unordered relation to be true,
3202N/A * at least one argument must be a {@code NaN}.
0N/A *
0N/A * @param arg1 the first argument
0N/A * @param arg2 the second argument
3202N/A * @return {@code true} if at least one argument is a NaN,
3202N/A * {@code false} otherwise.
0N/A */
0N/A public static boolean isUnordered(double arg1, double arg2) {
0N/A return isNaN(arg1) || isNaN(arg2);
0N/A }
0N/A
0N/A /**
3202N/A * Returns {@code true} if the unordered relation holds
0N/A * between the two arguments. When two floating-point values are
0N/A * unordered, one value is neither less than, equal to, nor
0N/A * greater than the other. For the unordered relation to be true,
3202N/A * at least one argument must be a {@code NaN}.
0N/A *
0N/A * @param arg1 the first argument
0N/A * @param arg2 the second argument
3202N/A * @return {@code true} if at least one argument is a NaN,
3202N/A * {@code false} otherwise.
0N/A */
0N/A public static boolean isUnordered(float arg1, float arg2) {
0N/A return isNaN(arg1) || isNaN(arg2);
0N/A }
0N/A
0N/A /**
3202N/A * Returns unbiased exponent of a {@code double}; for
0N/A * subnormal values, the number is treated as if it were
0N/A * normalized. That is for all finite, non-zero, positive numbers
0N/A * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is
0N/A * always in the range [1, 2).
0N/A * <p>
0N/A * Special cases:
0N/A * <ul>
0N/A * <li> If the argument is NaN, then the result is 2<sup>30</sup>.
0N/A * <li> If the argument is infinite, then the result is 2<sup>28</sup>.
0N/A * <li> If the argument is zero, then the result is -(2<sup>28</sup>).
0N/A * </ul>
0N/A *
0N/A * @param d floating-point number whose exponent is to be extracted
0N/A * @return unbiased exponent of the argument.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static int ilogb(double d) {
0N/A int exponent = getExponent(d);
0N/A
0N/A switch (exponent) {
0N/A case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity
0N/A if( isNaN(d) )
0N/A return (1<<30); // 2^30
0N/A else // infinite value
0N/A return (1<<28); // 2^28
0N/A
0N/A case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal
0N/A if(d == 0.0) {
0N/A return -(1<<28); // -(2^28)
0N/A }
0N/A else {
0N/A long transducer = Double.doubleToRawLongBits(d);
0N/A
0N/A /*
0N/A * To avoid causing slow arithmetic on subnormals,
0N/A * the scaling to determine when d's significand
0N/A * is normalized is done in integer arithmetic.
0N/A * (there must be at least one "1" bit in the
0N/A * significand since zero has been screened out.
0N/A */
0N/A
0N/A // isolate significand bits
0N/A transducer &= DoubleConsts.SIGNIF_BIT_MASK;
0N/A assert(transducer != 0L);
0N/A
0N/A // This loop is simple and functional. We might be
0N/A // able to do something more clever that was faster;
0N/A // e.g. number of leading zero detection on
0N/A // (transducer << (# exponent and sign bits).
0N/A while (transducer <
0N/A (1L << (DoubleConsts.SIGNIFICAND_WIDTH - 1))) {
0N/A transducer *= 2;
0N/A exponent--;
0N/A }
0N/A exponent++;
0N/A assert( exponent >=
0N/A DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1) &&
0N/A exponent < DoubleConsts.MIN_EXPONENT);
0N/A return exponent;
0N/A }
0N/A
0N/A default:
0N/A assert( exponent >= DoubleConsts.MIN_EXPONENT &&
0N/A exponent <= DoubleConsts.MAX_EXPONENT);
0N/A return exponent;
0N/A }
0N/A }
0N/A
0N/A /**
3202N/A * Returns unbiased exponent of a {@code float}; for
0N/A * subnormal values, the number is treated as if it were
0N/A * normalized. That is for all finite, non-zero, positive numbers
0N/A * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is
0N/A * always in the range [1, 2).
0N/A * <p>
0N/A * Special cases:
0N/A * <ul>
0N/A * <li> If the argument is NaN, then the result is 2<sup>30</sup>.
0N/A * <li> If the argument is infinite, then the result is 2<sup>28</sup>.
0N/A * <li> If the argument is zero, then the result is -(2<sup>28</sup>).
0N/A * </ul>
0N/A *
0N/A * @param f floating-point number whose exponent is to be extracted
0N/A * @return unbiased exponent of the argument.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static int ilogb(float f) {
0N/A int exponent = getExponent(f);
0N/A
0N/A switch (exponent) {
0N/A case FloatConsts.MAX_EXPONENT+1: // NaN or infinity
0N/A if( isNaN(f) )
0N/A return (1<<30); // 2^30
0N/A else // infinite value
0N/A return (1<<28); // 2^28
0N/A
0N/A case FloatConsts.MIN_EXPONENT-1: // zero or subnormal
0N/A if(f == 0.0f) {
0N/A return -(1<<28); // -(2^28)
0N/A }
0N/A else {
0N/A int transducer = Float.floatToRawIntBits(f);
0N/A
0N/A /*
0N/A * To avoid causing slow arithmetic on subnormals,
0N/A * the scaling to determine when f's significand
0N/A * is normalized is done in integer arithmetic.
0N/A * (there must be at least one "1" bit in the
0N/A * significand since zero has been screened out.
0N/A */
0N/A
0N/A // isolate significand bits
0N/A transducer &= FloatConsts.SIGNIF_BIT_MASK;
0N/A assert(transducer != 0);
0N/A
0N/A // This loop is simple and functional. We might be
0N/A // able to do something more clever that was faster;
0N/A // e.g. number of leading zero detection on
0N/A // (transducer << (# exponent and sign bits).
0N/A while (transducer <
0N/A (1 << (FloatConsts.SIGNIFICAND_WIDTH - 1))) {
0N/A transducer *= 2;
0N/A exponent--;
0N/A }
0N/A exponent++;
0N/A assert( exponent >=
0N/A FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1) &&
0N/A exponent < FloatConsts.MIN_EXPONENT);
0N/A return exponent;
0N/A }
0N/A
0N/A default:
0N/A assert( exponent >= FloatConsts.MIN_EXPONENT &&
0N/A exponent <= FloatConsts.MAX_EXPONENT);
0N/A return exponent;
0N/A }
0N/A }
0N/A
0N/A
0N/A /*
0N/A * The scalb operation should be reasonably fast; however, there
0N/A * are tradeoffs in writing a method to minimize the worst case
0N/A * performance and writing a method to minimize the time for
0N/A * expected common inputs. Some processors operate very slowly on
0N/A * subnormal operands, taking hundreds or thousands of cycles for
0N/A * one floating-point add or multiply as opposed to, say, four
0N/A * cycles for normal operands. For processors with very slow
0N/A * subnormal execution, scalb would be fastest if written entirely
0N/A * with integer operations; in other words, scalb would need to
0N/A * include the logic of performing correct rounding of subnormal
0N/A * values. This could be reasonably done in at most a few hundred
0N/A * cycles. However, this approach may penalize normal operations
0N/A * since at least the exponent of the floating-point argument must
0N/A * be examined.
0N/A *
0N/A * The approach taken in this implementation is a compromise.
0N/A * Floating-point multiplication is used to do most of the work;
0N/A * but knowingly multiplying by a subnormal scaling factor is
0N/A * avoided. However, the floating-point argument is not examined
0N/A * to see whether or not it is subnormal since subnormal inputs
0N/A * are assumed to be rare. At most three multiplies are needed to
0N/A * scale from the largest to smallest exponent ranges (scaling
0N/A * down, at most two multiplies are needed if subnormal scaling
0N/A * factors are allowed). However, in this implementation an
0N/A * expensive integer remainder operation is avoided at the cost of
0N/A * requiring five floating-point multiplies in the worst case,
0N/A * which should still be a performance win.
0N/A *
0N/A * If scaling of entire arrays is a concern, it would probably be
0N/A * more efficient to provide a double[] scalb(double[], int)
0N/A * version of scalb to avoid having to recompute the needed
0N/A * scaling factors for each floating-point value.
0N/A */
0N/A
0N/A /**
3202N/A * Return {@code d} &times;
3202N/A * 2<sup>{@code scale_factor}</sup> rounded as if performed
0N/A * by a single correctly rounded floating-point multiply to a
4008N/A * member of the double value set. See section 4.2.3 of
4008N/A * <cite>The Java&trade; Language Specification</cite>
4008N/A * for a discussion of floating-point
0N/A * value sets. If the exponent of the result is between the
3202N/A * {@code double}'s minimum exponent and maximum exponent,
0N/A * the answer is calculated exactly. If the exponent of the
3202N/A * result would be larger than {@code doubles}'s maximum
0N/A * exponent, an infinity is returned. Note that if the result is
3202N/A * subnormal, precision may be lost; that is, when {@code scalb(x,
3202N/A * n)} is subnormal, {@code scalb(scalb(x, n), -n)} may
0N/A * not equal <i>x</i>. When the result is non-NaN, the result has
3202N/A * the same sign as {@code d}.
0N/A *
0N/A *<p>
0N/A * Special cases:
0N/A * <ul>
0N/A * <li> If the first argument is NaN, NaN is returned.
0N/A * <li> If the first argument is infinite, then an infinity of the
0N/A * same sign is returned.
0N/A * <li> If the first argument is zero, then a zero of the same
0N/A * sign is returned.
0N/A * </ul>
0N/A *
0N/A * @param d number to be scaled by a power of two.
3202N/A * @param scale_factor power of 2 used to scale {@code d}
3202N/A * @return {@code d * }2<sup>{@code scale_factor}</sup>
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static double scalb(double d, int scale_factor) {
0N/A /*
0N/A * This method does not need to be declared strictfp to
0N/A * compute the same correct result on all platforms. When
0N/A * scaling up, it does not matter what order the
0N/A * multiply-store operations are done; the result will be
0N/A * finite or overflow regardless of the operation ordering.
0N/A * However, to get the correct result when scaling down, a
0N/A * particular ordering must be used.
0N/A *
0N/A * When scaling down, the multiply-store operations are
0N/A * sequenced so that it is not possible for two consecutive
0N/A * multiply-stores to return subnormal results. If one
0N/A * multiply-store result is subnormal, the next multiply will
0N/A * round it away to zero. This is done by first multiplying
0N/A * by 2 ^ (scale_factor % n) and then multiplying several
0N/A * times by by 2^n as needed where n is the exponent of number
0N/A * that is a covenient power of two. In this way, at most one
0N/A * real rounding error occurs. If the double value set is
0N/A * being used exclusively, the rounding will occur on a
0N/A * multiply. If the double-extended-exponent value set is
0N/A * being used, the products will (perhaps) be exact but the
0N/A * stores to d are guaranteed to round to the double value
0N/A * set.
0N/A *
0N/A * It is _not_ a valid implementation to first multiply d by
0N/A * 2^MIN_EXPONENT and then by 2 ^ (scale_factor %
0N/A * MIN_EXPONENT) since even in a strictfp program double
0N/A * rounding on underflow could occur; e.g. if the scale_factor
0N/A * argument was (MIN_EXPONENT - n) and the exponent of d was a
0N/A * little less than -(MIN_EXPONENT - n), meaning the final
0N/A * result would be subnormal.
0N/A *
0N/A * Since exact reproducibility of this method can be achieved
0N/A * without any undue performance burden, there is no
0N/A * compelling reason to allow double rounding on underflow in
0N/A * scalb.
0N/A */
0N/A
0N/A // magnitude of a power of two so large that scaling a finite
0N/A // nonzero value by it would be guaranteed to over or
0N/A // underflow; due to rounding, scaling down takes takes an
0N/A // additional power of two which is reflected here
0N/A final int MAX_SCALE = DoubleConsts.MAX_EXPONENT + -DoubleConsts.MIN_EXPONENT +
0N/A DoubleConsts.SIGNIFICAND_WIDTH + 1;
0N/A int exp_adjust = 0;
0N/A int scale_increment = 0;
0N/A double exp_delta = Double.NaN;
0N/A
0N/A // Make sure scaling factor is in a reasonable range
0N/A
0N/A if(scale_factor < 0) {
0N/A scale_factor = Math.max(scale_factor, -MAX_SCALE);
0N/A scale_increment = -512;
0N/A exp_delta = twoToTheDoubleScaleDown;
0N/A }
0N/A else {
0N/A scale_factor = Math.min(scale_factor, MAX_SCALE);
0N/A scale_increment = 512;
0N/A exp_delta = twoToTheDoubleScaleUp;
0N/A }
0N/A
0N/A // Calculate (scale_factor % +/-512), 512 = 2^9, using
0N/A // technique from "Hacker's Delight" section 10-2.
0N/A int t = (scale_factor >> 9-1) >>> 32 - 9;
0N/A exp_adjust = ((scale_factor + t) & (512 -1)) - t;
0N/A
0N/A d *= powerOfTwoD(exp_adjust);
0N/A scale_factor -= exp_adjust;
0N/A
0N/A while(scale_factor != 0) {
0N/A d *= exp_delta;
0N/A scale_factor -= scale_increment;
0N/A }
0N/A return d;
0N/A }
0N/A
0N/A /**
3202N/A * Return {@code f} &times;
3202N/A * 2<sup>{@code scale_factor}</sup> rounded as if performed
0N/A * by a single correctly rounded floating-point multiply to a
4008N/A * member of the float value set. See section 4.2.3 of
4008N/A * <cite>The Java&trade; Language Specification</cite>
4008N/A * for a discussion of floating-point
4008N/A * value sets. If the exponent of the result is between the
3202N/A * {@code float}'s minimum exponent and maximum exponent, the
0N/A * answer is calculated exactly. If the exponent of the result
3202N/A * would be larger than {@code float}'s maximum exponent, an
0N/A * infinity is returned. Note that if the result is subnormal,
3202N/A * precision may be lost; that is, when {@code scalb(x, n)}
3202N/A * is subnormal, {@code scalb(scalb(x, n), -n)} may not equal
0N/A * <i>x</i>. When the result is non-NaN, the result has the same
3202N/A * sign as {@code f}.
0N/A *
0N/A *<p>
0N/A * Special cases:
0N/A * <ul>
0N/A * <li> If the first argument is NaN, NaN is returned.
0N/A * <li> If the first argument is infinite, then an infinity of the
0N/A * same sign is returned.
0N/A * <li> If the first argument is zero, then a zero of the same
0N/A * sign is returned.
0N/A * </ul>
0N/A *
0N/A * @param f number to be scaled by a power of two.
3202N/A * @param scale_factor power of 2 used to scale {@code f}
3202N/A * @return {@code f * }2<sup>{@code scale_factor}</sup>
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static float scalb(float f, int scale_factor) {
0N/A // magnitude of a power of two so large that scaling a finite
0N/A // nonzero value by it would be guaranteed to over or
0N/A // underflow; due to rounding, scaling down takes takes an
0N/A // additional power of two which is reflected here
0N/A final int MAX_SCALE = FloatConsts.MAX_EXPONENT + -FloatConsts.MIN_EXPONENT +
0N/A FloatConsts.SIGNIFICAND_WIDTH + 1;
0N/A
0N/A // Make sure scaling factor is in a reasonable range
0N/A scale_factor = Math.max(Math.min(scale_factor, MAX_SCALE), -MAX_SCALE);
0N/A
0N/A /*
0N/A * Since + MAX_SCALE for float fits well within the double
0N/A * exponent range and + float -> double conversion is exact
0N/A * the multiplication below will be exact. Therefore, the
0N/A * rounding that occurs when the double product is cast to
0N/A * float will be the correctly rounded float result. Since
0N/A * all operations other than the final multiply will be exact,
0N/A * it is not necessary to declare this method strictfp.
0N/A */
0N/A return (float)((double)f*powerOfTwoD(scale_factor));
0N/A }
0N/A
0N/A /**
0N/A * Returns the floating-point number adjacent to the first
0N/A * argument in the direction of the second argument. If both
0N/A * arguments compare as equal the second argument is returned.
0N/A *
0N/A * <p>
0N/A * Special cases:
0N/A * <ul>
0N/A * <li> If either argument is a NaN, then NaN is returned.
0N/A *
3202N/A * <li> If both arguments are signed zeros, {@code direction}
0N/A * is returned unchanged (as implied by the requirement of
0N/A * returning the second argument if the arguments compare as
0N/A * equal).
0N/A *
3202N/A * <li> If {@code start} is
3202N/A * &plusmn;{@code Double.MIN_VALUE} and {@code direction}
0N/A * has a value such that the result should have a smaller
3202N/A * magnitude, then a zero with the same sign as {@code start}
0N/A * is returned.
0N/A *
3202N/A * <li> If {@code start} is infinite and
3202N/A * {@code direction} has a value such that the result should
3202N/A * have a smaller magnitude, {@code Double.MAX_VALUE} with the
3202N/A * same sign as {@code start} is returned.
0N/A *
3202N/A * <li> If {@code start} is equal to &plusmn;
3202N/A * {@code Double.MAX_VALUE} and {@code direction} has a
0N/A * value such that the result should have a larger magnitude, an
3202N/A * infinity with same sign as {@code start} is returned.
0N/A * </ul>
0N/A *
0N/A * @param start starting floating-point value
0N/A * @param direction value indicating which of
3202N/A * {@code start}'s neighbors or {@code start} should
0N/A * be returned
3202N/A * @return The floating-point number adjacent to {@code start} in the
3202N/A * direction of {@code direction}.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static double nextAfter(double start, double direction) {
0N/A /*
0N/A * The cases:
0N/A *
0N/A * nextAfter(+infinity, 0) == MAX_VALUE
0N/A * nextAfter(+infinity, +infinity) == +infinity
0N/A * nextAfter(-infinity, 0) == -MAX_VALUE
0N/A * nextAfter(-infinity, -infinity) == -infinity
0N/A *
0N/A * are naturally handled without any additional testing
0N/A */
0N/A
0N/A // First check for NaN values
0N/A if (isNaN(start) || isNaN(direction)) {
0N/A // return a NaN derived from the input NaN(s)
0N/A return start + direction;
0N/A } else if (start == direction) {
0N/A return direction;
0N/A } else { // start > direction or start < direction
0N/A // Add +0.0 to get rid of a -0.0 (+0.0 + -0.0 => +0.0)
0N/A // then bitwise convert start to integer.
0N/A long transducer = Double.doubleToRawLongBits(start + 0.0d);
0N/A
0N/A /*
0N/A * IEEE 754 floating-point numbers are lexicographically
0N/A * ordered if treated as signed- magnitude integers .
0N/A * Since Java's integers are two's complement,
0N/A * incrementing" the two's complement representation of a
0N/A * logically negative floating-point value *decrements*
0N/A * the signed-magnitude representation. Therefore, when
0N/A * the integer representation of a floating-point values
0N/A * is less than zero, the adjustment to the representation
0N/A * is in the opposite direction than would be expected at
0N/A * first .
0N/A */
0N/A if (direction > start) { // Calculate next greater value
0N/A transducer = transducer + (transducer >= 0L ? 1L:-1L);
0N/A } else { // Calculate next lesser value
0N/A assert direction < start;
0N/A if (transducer > 0L)
0N/A --transducer;
0N/A else
0N/A if (transducer < 0L )
0N/A ++transducer;
0N/A /*
0N/A * transducer==0, the result is -MIN_VALUE
0N/A *
0N/A * The transition from zero (implicitly
0N/A * positive) to the smallest negative
0N/A * signed magnitude value must be done
0N/A * explicitly.
0N/A */
0N/A else
0N/A transducer = DoubleConsts.SIGN_BIT_MASK | 1L;
0N/A }
0N/A
0N/A return Double.longBitsToDouble(transducer);
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Returns the floating-point number adjacent to the first
0N/A * argument in the direction of the second argument. If both
0N/A * arguments compare as equal, the second argument is returned.
0N/A *
0N/A * <p>
0N/A * Special cases:
0N/A * <ul>
0N/A * <li> If either argument is a NaN, then NaN is returned.
0N/A *
3202N/A * <li> If both arguments are signed zeros, a {@code float}
3202N/A * zero with the same sign as {@code direction} is returned
0N/A * (as implied by the requirement of returning the second argument
0N/A * if the arguments compare as equal).
0N/A *
3202N/A * <li> If {@code start} is
3202N/A * &plusmn;{@code Float.MIN_VALUE} and {@code direction}
0N/A * has a value such that the result should have a smaller
3202N/A * magnitude, then a zero with the same sign as {@code start}
0N/A * is returned.
0N/A *
3202N/A * <li> If {@code start} is infinite and
3202N/A * {@code direction} has a value such that the result should
3202N/A * have a smaller magnitude, {@code Float.MAX_VALUE} with the
3202N/A * same sign as {@code start} is returned.
0N/A *
3202N/A * <li> If {@code start} is equal to &plusmn;
3202N/A * {@code Float.MAX_VALUE} and {@code direction} has a
0N/A * value such that the result should have a larger magnitude, an
3202N/A * infinity with same sign as {@code start} is returned.
0N/A * </ul>
0N/A *
0N/A * @param start starting floating-point value
0N/A * @param direction value indicating which of
3202N/A * {@code start}'s neighbors or {@code start} should
0N/A * be returned
3202N/A * @return The floating-point number adjacent to {@code start} in the
3202N/A * direction of {@code direction}.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static float nextAfter(float start, double direction) {
0N/A /*
0N/A * The cases:
0N/A *
0N/A * nextAfter(+infinity, 0) == MAX_VALUE
0N/A * nextAfter(+infinity, +infinity) == +infinity
0N/A * nextAfter(-infinity, 0) == -MAX_VALUE
0N/A * nextAfter(-infinity, -infinity) == -infinity
0N/A *
0N/A * are naturally handled without any additional testing
0N/A */
0N/A
0N/A // First check for NaN values
0N/A if (isNaN(start) || isNaN(direction)) {
0N/A // return a NaN derived from the input NaN(s)
0N/A return start + (float)direction;
0N/A } else if (start == direction) {
0N/A return (float)direction;
0N/A } else { // start > direction or start < direction
0N/A // Add +0.0 to get rid of a -0.0 (+0.0 + -0.0 => +0.0)
0N/A // then bitwise convert start to integer.
0N/A int transducer = Float.floatToRawIntBits(start + 0.0f);
0N/A
0N/A /*
0N/A * IEEE 754 floating-point numbers are lexicographically
0N/A * ordered if treated as signed- magnitude integers .
0N/A * Since Java's integers are two's complement,
0N/A * incrementing" the two's complement representation of a
0N/A * logically negative floating-point value *decrements*
0N/A * the signed-magnitude representation. Therefore, when
0N/A * the integer representation of a floating-point values
0N/A * is less than zero, the adjustment to the representation
0N/A * is in the opposite direction than would be expected at
0N/A * first.
0N/A */
0N/A if (direction > start) {// Calculate next greater value
0N/A transducer = transducer + (transducer >= 0 ? 1:-1);
0N/A } else { // Calculate next lesser value
0N/A assert direction < start;
0N/A if (transducer > 0)
0N/A --transducer;
0N/A else
0N/A if (transducer < 0 )
0N/A ++transducer;
0N/A /*
0N/A * transducer==0, the result is -MIN_VALUE
0N/A *
0N/A * The transition from zero (implicitly
0N/A * positive) to the smallest negative
0N/A * signed magnitude value must be done
0N/A * explicitly.
0N/A */
0N/A else
0N/A transducer = FloatConsts.SIGN_BIT_MASK | 1;
0N/A }
0N/A
0N/A return Float.intBitsToFloat(transducer);
0N/A }
0N/A }
0N/A
0N/A /**
3202N/A * Returns the floating-point value adjacent to {@code d} in
0N/A * the direction of positive infinity. This method is
3202N/A * semantically equivalent to {@code nextAfter(d,
3202N/A * Double.POSITIVE_INFINITY)}; however, a {@code nextUp}
0N/A * implementation may run faster than its equivalent
3202N/A * {@code nextAfter} call.
0N/A *
0N/A * <p>Special Cases:
0N/A * <ul>
0N/A * <li> If the argument is NaN, the result is NaN.
0N/A *
0N/A * <li> If the argument is positive infinity, the result is
0N/A * positive infinity.
0N/A *
0N/A * <li> If the argument is zero, the result is
3202N/A * {@code Double.MIN_VALUE}
0N/A *
0N/A * </ul>
0N/A *
0N/A * @param d starting floating-point value
0N/A * @return The adjacent floating-point value closer to positive
0N/A * infinity.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static double nextUp(double d) {
0N/A if( isNaN(d) || d == Double.POSITIVE_INFINITY)
0N/A return d;
0N/A else {
0N/A d += 0.0d;
0N/A return Double.longBitsToDouble(Double.doubleToRawLongBits(d) +
0N/A ((d >= 0.0d)?+1L:-1L));
0N/A }
0N/A }
0N/A
0N/A /**
3202N/A * Returns the floating-point value adjacent to {@code f} in
0N/A * the direction of positive infinity. This method is
3202N/A * semantically equivalent to {@code nextAfter(f,
3202N/A * Double.POSITIVE_INFINITY)}; however, a {@code nextUp}
0N/A * implementation may run faster than its equivalent
3202N/A * {@code nextAfter} call.
0N/A *
0N/A * <p>Special Cases:
0N/A * <ul>
0N/A * <li> If the argument is NaN, the result is NaN.
0N/A *
0N/A * <li> If the argument is positive infinity, the result is
0N/A * positive infinity.
0N/A *
0N/A * <li> If the argument is zero, the result is
3202N/A * {@code Float.MIN_VALUE}
0N/A *
0N/A * </ul>
0N/A *
0N/A * @param f starting floating-point value
0N/A * @return The adjacent floating-point value closer to positive
0N/A * infinity.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static float nextUp(float f) {
0N/A if( isNaN(f) || f == FloatConsts.POSITIVE_INFINITY)
0N/A return f;
0N/A else {
0N/A f += 0.0f;
0N/A return Float.intBitsToFloat(Float.floatToRawIntBits(f) +
0N/A ((f >= 0.0f)?+1:-1));
0N/A }
0N/A }
0N/A
0N/A /**
3202N/A * Returns the floating-point value adjacent to {@code d} in
0N/A * the direction of negative infinity. This method is
3202N/A * semantically equivalent to {@code nextAfter(d,
3202N/A * Double.NEGATIVE_INFINITY)}; however, a
3202N/A * {@code nextDown} implementation may run faster than its
3202N/A * equivalent {@code nextAfter} call.
0N/A *
0N/A * <p>Special Cases:
0N/A * <ul>
0N/A * <li> If the argument is NaN, the result is NaN.
0N/A *
0N/A * <li> If the argument is negative infinity, the result is
0N/A * negative infinity.
0N/A *
0N/A * <li> If the argument is zero, the result is
3202N/A * {@code -Double.MIN_VALUE}
0N/A *
0N/A * </ul>
0N/A *
0N/A * @param d starting floating-point value
0N/A * @return The adjacent floating-point value closer to negative
0N/A * infinity.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static double nextDown(double d) {
0N/A if( isNaN(d) || d == Double.NEGATIVE_INFINITY)
0N/A return d;
0N/A else {
0N/A if (d == 0.0)
0N/A return -Double.MIN_VALUE;
0N/A else
0N/A return Double.longBitsToDouble(Double.doubleToRawLongBits(d) +
0N/A ((d > 0.0d)?-1L:+1L));
0N/A }
0N/A }
0N/A
0N/A /**
3202N/A * Returns the floating-point value adjacent to {@code f} in
0N/A * the direction of negative infinity. This method is
3202N/A * semantically equivalent to {@code nextAfter(f,
3202N/A * Float.NEGATIVE_INFINITY)}; however, a
3202N/A * {@code nextDown} implementation may run faster than its
3202N/A * equivalent {@code nextAfter} call.
0N/A *
0N/A * <p>Special Cases:
0N/A * <ul>
0N/A * <li> If the argument is NaN, the result is NaN.
0N/A *
0N/A * <li> If the argument is negative infinity, the result is
0N/A * negative infinity.
0N/A *
0N/A * <li> If the argument is zero, the result is
3202N/A * {@code -Float.MIN_VALUE}
0N/A *
0N/A * </ul>
0N/A *
0N/A * @param f starting floating-point value
0N/A * @return The adjacent floating-point value closer to negative
0N/A * infinity.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static double nextDown(float f) {
0N/A if( isNaN(f) || f == Float.NEGATIVE_INFINITY)
0N/A return f;
0N/A else {
0N/A if (f == 0.0f)
0N/A return -Float.MIN_VALUE;
0N/A else
0N/A return Float.intBitsToFloat(Float.floatToRawIntBits(f) +
0N/A ((f > 0.0f)?-1:+1));
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Returns the first floating-point argument with the sign of the
0N/A * second floating-point argument. For this method, a NaN
3202N/A * {@code sign} argument is always treated as if it were
0N/A * positive.
0N/A *
0N/A * @param magnitude the parameter providing the magnitude of the result
0N/A * @param sign the parameter providing the sign of the result
3202N/A * @return a value with the magnitude of {@code magnitude}
3202N/A * and the sign of {@code sign}.
0N/A * @author Joseph D. Darcy
0N/A * @since 1.5
0N/A */
0N/A public static double copySign(double magnitude, double sign) {
0N/A return rawCopySign(magnitude, (isNaN(sign)?1.0d:sign));
0N/A }
0N/A
0N/A /**
0N/A * Returns the first floating-point argument with the sign of the
0N/A * second floating-point argument. For this method, a NaN
3202N/A * {@code sign} argument is always treated as if it were
0N/A * positive.
0N/A *
0N/A * @param magnitude the parameter providing the magnitude of the result
0N/A * @param sign the parameter providing the sign of the result
3202N/A * @return a value with the magnitude of {@code magnitude}
3202N/A * and the sign of {@code sign}.
0N/A * @author Joseph D. Darcy
0N/A */
0N/A public static float copySign(float magnitude, float sign) {
0N/A return rawCopySign(magnitude, (isNaN(sign)?1.0f:sign));
0N/A }
0N/A
0N/A /**
0N/A * Returns the size of an ulp of the argument. An ulp of a
3202N/A * {@code double} value is the positive distance between this
3202N/A * floating-point value and the {@code double} value next
0N/A * larger in magnitude. Note that for non-NaN <i>x</i>,
0N/A * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
0N/A *
0N/A * <p>Special Cases:
0N/A * <ul>
0N/A * <li> If the argument is NaN, then the result is NaN.
0N/A * <li> If the argument is positive or negative infinity, then the
0N/A * result is positive infinity.
0N/A * <li> If the argument is positive or negative zero, then the result is
3202N/A * {@code Double.MIN_VALUE}.
3202N/A * <li> If the argument is &plusmn;{@code Double.MAX_VALUE}, then
0N/A * the result is equal to 2<sup>971</sup>.
0N/A * </ul>
0N/A *
0N/A * @param d the floating-point value whose ulp is to be returned
0N/A * @return the size of an ulp of the argument
0N/A * @author Joseph D. Darcy
0N/A * @since 1.5
0N/A */
0N/A public static double ulp(double d) {
0N/A int exp = getExponent(d);
0N/A
0N/A switch(exp) {
0N/A case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity
0N/A return Math.abs(d);
0N/A
0N/A case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal
0N/A return Double.MIN_VALUE;
0N/A
0N/A default:
0N/A assert exp <= DoubleConsts.MAX_EXPONENT && exp >= DoubleConsts.MIN_EXPONENT;
0N/A
0N/A // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
0N/A exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1);
0N/A if (exp >= DoubleConsts.MIN_EXPONENT) {
0N/A return powerOfTwoD(exp);
0N/A }
0N/A else {
0N/A // return a subnormal result; left shift integer
0N/A // representation of Double.MIN_VALUE appropriate
0N/A // number of positions
0N/A return Double.longBitsToDouble(1L <<
0N/A (exp - (DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) ));
0N/A }
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Returns the size of an ulp of the argument. An ulp of a
3202N/A * {@code float} value is the positive distance between this
3202N/A * floating-point value and the {@code float} value next
0N/A * larger in magnitude. Note that for non-NaN <i>x</i>,
0N/A * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
0N/A *
0N/A * <p>Special Cases:
0N/A * <ul>
0N/A * <li> If the argument is NaN, then the result is NaN.
0N/A * <li> If the argument is positive or negative infinity, then the
0N/A * result is positive infinity.
0N/A * <li> If the argument is positive or negative zero, then the result is
3202N/A * {@code Float.MIN_VALUE}.
3202N/A * <li> If the argument is &plusmn;{@code Float.MAX_VALUE}, then
0N/A * the result is equal to 2<sup>104</sup>.
0N/A * </ul>
0N/A *
0N/A * @param f the floating-point value whose ulp is to be returned
0N/A * @return the size of an ulp of the argument
0N/A * @author Joseph D. Darcy
0N/A * @since 1.5
0N/A */
0N/A public static float ulp(float f) {
0N/A int exp = getExponent(f);
0N/A
0N/A switch(exp) {
0N/A case FloatConsts.MAX_EXPONENT+1: // NaN or infinity
0N/A return Math.abs(f);
0N/A
0N/A case FloatConsts.MIN_EXPONENT-1: // zero or subnormal
0N/A return FloatConsts.MIN_VALUE;
0N/A
0N/A default:
0N/A assert exp <= FloatConsts.MAX_EXPONENT && exp >= FloatConsts.MIN_EXPONENT;
0N/A
0N/A // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
0N/A exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1);
0N/A if (exp >= FloatConsts.MIN_EXPONENT) {
0N/A return powerOfTwoF(exp);
0N/A }
0N/A else {
0N/A // return a subnormal result; left shift integer
0N/A // representation of FloatConsts.MIN_VALUE appropriate
0N/A // number of positions
0N/A return Float.intBitsToFloat(1 <<
0N/A (exp - (FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) ));
0N/A }
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Returns the signum function of the argument; zero if the argument
0N/A * is zero, 1.0 if the argument is greater than zero, -1.0 if the
0N/A * argument is less than zero.
0N/A *
0N/A * <p>Special Cases:
0N/A * <ul>
0N/A * <li> If the argument is NaN, then the result is NaN.
0N/A * <li> If the argument is positive zero or negative zero, then the
0N/A * result is the same as the argument.
0N/A * </ul>
0N/A *
0N/A * @param d the floating-point value whose signum is to be returned
0N/A * @return the signum function of the argument
0N/A * @author Joseph D. Darcy
0N/A * @since 1.5
0N/A */
0N/A public static double signum(double d) {
0N/A return (d == 0.0 || isNaN(d))?d:copySign(1.0, d);
0N/A }
0N/A
0N/A /**
0N/A * Returns the signum function of the argument; zero if the argument
0N/A * is zero, 1.0f if the argument is greater than zero, -1.0f if the
0N/A * argument is less than zero.
0N/A *
0N/A * <p>Special Cases:
0N/A * <ul>
0N/A * <li> If the argument is NaN, then the result is NaN.
0N/A * <li> If the argument is positive zero or negative zero, then the
0N/A * result is the same as the argument.
0N/A * </ul>
0N/A *
0N/A * @param f the floating-point value whose signum is to be returned
0N/A * @return the signum function of the argument
0N/A * @author Joseph D. Darcy
0N/A * @since 1.5
0N/A */
0N/A public static float signum(float f) {
0N/A return (f == 0.0f || isNaN(f))?f:copySign(1.0f, f);
0N/A }
0N/A
0N/A}