/* * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package sun.misc; import sun.misc.FloatConsts; import sun.misc.DoubleConsts; /** * The class <code>FpUtils</code> contains static utility methods for * manipulating and inspecting <code>float</code> and * <code>double</code> floating-point numbers. These methods include * functionality recommended or required by the IEEE 754 * floating-point standard. * * @author Joseph D. Darcy */ public class FpUtils { /* * The methods in this class are reasonably implemented using * direct or indirect bit-level manipulation of floating-point * values. However, having access to the IEEE 754 recommended * functions would obviate the need for most programmers to engage * in floating-point bit-twiddling. * * An IEEE 754 number has three fields, from most significant bit * to to least significant, sign, exponent, and significand. * * msb lsb * [sign|exponent| fractional_significand] * * Using some encoding cleverness, explained below, the high order * bit of the logical significand does not need to be explicitly * stored, thus "fractional_significand" instead of simply * "significand" in the figure above. * * For finite normal numbers, the numerical value encoded is * * (-1)^sign * 2^(exponent)*(1.fractional_significand) * * Most finite floating-point numbers are normalized; the exponent * value is reduced until the leading significand bit is 1. * Therefore, the leading 1 is redundant and is not explicitly * stored. If a numerical value is so small it cannot be * normalized, it has a subnormal representation. Subnormal * numbers don't have a leading 1 in their significand; subnormals * are encoding using a special exponent value. In other words, * the high-order bit of the logical significand can be elided in * from the representation in either case since the bit's value is * implicit from the exponent value. * * The exponent field uses a biased representation; if the bits of * the exponent are interpreted as a unsigned integer E, the * exponent represented is E - E_bias where E_bias depends on the * floating-point format. E can range between E_min and E_max, * constants which depend on the floating-point format. E_min and * E_max are -126 and +127 for float, -1022 and +1023 for double. * * The 32-bit float format has 1 sign bit, 8 exponent bits, and 23 * bits for the significand (which is logically 24 bits wide * because of the implicit bit). The 64-bit double format has 1 * sign bit, 11 exponent bits, and 52 bits for the significand * (logically 53 bits). * * Subnormal numbers and zero have the special exponent value * E_min -1; the numerical value represented by a subnormal is: * * (-1)^sign * 2^(E_min)*(0.fractional_significand) * * Zero is represented by all zero bits in the exponent and all * zero bits in the significand; zero can have either sign. * * Infinity and NaN are encoded using the exponent value E_max + * 1. Signed infinities have all significand bits zero; NaNs have * at least one non-zero significand bit. * * The details of IEEE 754 floating-point encoding will be used in * the methods below without further comment. For further * exposition on IEEE 754 numbers, see "IEEE Standard for Binary * Floating-Point Arithmetic" ANSI/IEEE Std 754-1985 or William * Kahan's "Lecture Notes on the Status of IEEE Standard 754 for * Binary Floating-Point Arithmetic", * http://www.cs.berkeley.edu/~wkahan/ieee754status/ieee754.ps. * * Many of this class's methods are members of the set of IEEE 754 * recommended functions or similar functions recommended or * required by IEEE 754R. Discussion of various implementation * techniques for these functions have occurred in: * * W.J. Cody and Jerome T. Coonen, "Algorithm 772 Functions to * Support the IEEE Standard for Binary Floating-Point * Arithmetic," ACM Transactions on Mathematical Software, * vol. 19, no. 4, December 1993, pp. 443-451. * * Joseph D. Darcy, "Writing robust IEEE recommended functions in * ``100% Pure Java''(TM)," University of California, Berkeley * technical report UCB//CSD-98-1009. */ /** * Don't let anyone instantiate this class. */ private FpUtils() {} // Constants used in scalb static double twoToTheDoubleScaleUp = powerOfTwoD(512); static double twoToTheDoubleScaleDown = powerOfTwoD(-512); // Helper Methods // The following helper methods are used in the implementation of // the public recommended functions; they generally omit certain // tests for exception cases. /** * Returns unbiased exponent of a <code>double</code>. */ public static int getExponent(double d){ /* * Bitwise convert d to long, mask out exponent bits, shift * to the right and then subtract out double's bias adjust to * get true exponent value. */ return (int)(((Double.doubleToRawLongBits(d) & DoubleConsts.EXP_BIT_MASK) >> (DoubleConsts.SIGNIFICAND_WIDTH - 1)) - DoubleConsts.EXP_BIAS); } /** * Returns unbiased exponent of a <code>float</code>. */ public static int getExponent(float f){ /* * Bitwise convert f to integer, mask out exponent bits, shift * to the right and then subtract out float's bias adjust to * get true exponent value */ return ((Float.floatToRawIntBits(f) & FloatConsts.EXP_BIT_MASK) >> (FloatConsts.SIGNIFICAND_WIDTH - 1)) - FloatConsts.EXP_BIAS; } /** * Returns a floating-point power of two in the normal range. */ static double powerOfTwoD(int n) { assert(n >= DoubleConsts.MIN_EXPONENT && n <= DoubleConsts.MAX_EXPONENT); return Double.longBitsToDouble((((long)n + (long)DoubleConsts.EXP_BIAS) << (DoubleConsts.SIGNIFICAND_WIDTH-1)) & DoubleConsts.EXP_BIT_MASK); } /** * Returns a floating-point power of two in the normal range. */ static float powerOfTwoF(int n) { assert(n >= FloatConsts.MIN_EXPONENT && n <= FloatConsts.MAX_EXPONENT); return Float.intBitsToFloat(((n + FloatConsts.EXP_BIAS) << (FloatConsts.SIGNIFICAND_WIDTH-1)) & FloatConsts.EXP_BIT_MASK); } /** * Returns the first floating-point argument with the sign of the * second floating-point argument. Note that unlike the {@link * FpUtils#copySign(double, double) copySign} method, this method * does not require NaN <code>sign</code> arguments to be treated * as positive values; implementations are permitted to treat some * NaN arguments as positive and other NaN arguments as negative * to allow greater performance. * * @param magnitude the parameter providing the magnitude of the result * @param sign the parameter providing the sign of the result * @return a value with the magnitude of <code>magnitude</code> * and the sign of <code>sign</code>. * @author Joseph D. Darcy */ public static double rawCopySign(double magnitude, double sign) { return Double.longBitsToDouble((Double.doubleToRawLongBits(sign) & (DoubleConsts.SIGN_BIT_MASK)) | (Double.doubleToRawLongBits(magnitude) & (DoubleConsts.EXP_BIT_MASK | DoubleConsts.SIGNIF_BIT_MASK))); } /** * Returns the first floating-point argument with the sign of the * second floating-point argument. Note that unlike the {@link * FpUtils#copySign(float, float) copySign} method, this method * does not require NaN <code>sign</code> arguments to be treated * as positive values; implementations are permitted to treat some * NaN arguments as positive and other NaN arguments as negative * to allow greater performance. * * @param magnitude the parameter providing the magnitude of the result * @param sign the parameter providing the sign of the result * @return a value with the magnitude of <code>magnitude</code> * and the sign of <code>sign</code>. * @author Joseph D. Darcy */ public static float rawCopySign(float magnitude, float sign) { return Float.intBitsToFloat((Float.floatToRawIntBits(sign) & (FloatConsts.SIGN_BIT_MASK)) | (Float.floatToRawIntBits(magnitude) & (FloatConsts.EXP_BIT_MASK | FloatConsts.SIGNIF_BIT_MASK))); } /* ***************************************************************** */ /** * Returns <code>true</code> if the argument is a finite * floating-point value; returns <code>false</code> otherwise (for * NaN and infinity arguments). * * @param d the <code>double</code> value to be tested * @return <code>true</code> if the argument is a finite * floating-point value, <code>false</code> otherwise. */ public static boolean isFinite(double d) { return Math.abs(d) <= DoubleConsts.MAX_VALUE; } /** * Returns <code>true</code> if the argument is a finite * floating-point value; returns <code>false</code> otherwise (for * NaN and infinity arguments). * * @param f the <code>float</code> value to be tested * @return <code>true</code> if the argument is a finite * floating-point value, <code>false</code> otherwise. */ public static boolean isFinite(float f) { return Math.abs(f) <= FloatConsts.MAX_VALUE; } /** * Returns <code>true</code> if the specified number is infinitely * large in magnitude, <code>false</code> otherwise. * * <p>Note that this method is equivalent to the {@link * Double#isInfinite(double) Double.isInfinite} method; the * functionality is included in this class for convenience. * * @param d the value to be tested. * @return <code>true</code> if the value of the argument is positive * infinity or negative infinity; <code>false</code> otherwise. */ public static boolean isInfinite(double d) { return Double.isInfinite(d); } /** * Returns <code>true</code> if the specified number is infinitely * large in magnitude, <code>false</code> otherwise. * * <p>Note that this method is equivalent to the {@link * Float#isInfinite(float) Float.isInfinite} method; the * functionality is included in this class for convenience. * * @param f the value to be tested. * @return <code>true</code> if the argument is positive infinity or * negative infinity; <code>false</code> otherwise. */ public static boolean isInfinite(float f) { return Float.isInfinite(f); } /** * Returns <code>true</code> if the specified number is a * Not-a-Number (NaN) value, <code>false</code> otherwise. * * <p>Note that this method is equivalent to the {@link * Double#isNaN(double) Double.isNaN} method; the functionality is * included in this class for convenience. * * @param d the value to be tested. * @return <code>true</code> if the value of the argument is NaN; * <code>false</code> otherwise. */ public static boolean isNaN(double d) { return Double.isNaN(d); } /** * Returns <code>true</code> if the specified number is a * Not-a-Number (NaN) value, <code>false</code> otherwise. * * <p>Note that this method is equivalent to the {@link * Float#isNaN(float) Float.isNaN} method; the functionality is * included in this class for convenience. * * @param f the value to be tested. * @return <code>true</code> if the argument is NaN; * <code>false</code> otherwise. */ public static boolean isNaN(float f) { return Float.isNaN(f); } /** * Returns <code>true</code> if the unordered relation holds * between the two arguments. When two floating-point values are * unordered, one value is neither less than, equal to, nor * greater than the other. For the unordered relation to be true, * at least one argument must be a <code>NaN</code>. * * @param arg1 the first argument * @param arg2 the second argument * @return <code>true</code> if at least one argument is a NaN, * <code>false</code> otherwise. */ public static boolean isUnordered(double arg1, double arg2) { return isNaN(arg1) || isNaN(arg2); } /** * Returns <code>true</code> if the unordered relation holds * between the two arguments. When two floating-point values are * unordered, one value is neither less than, equal to, nor * greater than the other. For the unordered relation to be true, * at least one argument must be a <code>NaN</code>. * * @param arg1 the first argument * @param arg2 the second argument * @return <code>true</code> if at least one argument is a NaN, * <code>false</code> otherwise. */ public static boolean isUnordered(float arg1, float arg2) { return isNaN(arg1) || isNaN(arg2); } /** * Returns unbiased exponent of a <code>double</code>; for * subnormal values, the number is treated as if it were * normalized. That is for all finite, non-zero, positive numbers * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is * always in the range [1, 2). * <p> * Special cases: * <ul> * <li> If the argument is NaN, then the result is 2<sup>30</sup>. * <li> If the argument is infinite, then the result is 2<sup>28</sup>. * <li> If the argument is zero, then the result is -(2<sup>28</sup>). * </ul> * * @param d floating-point number whose exponent is to be extracted * @return unbiased exponent of the argument. * @author Joseph D. Darcy */ public static int ilogb(double d) { int exponent = getExponent(d); switch (exponent) { case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity if( isNaN(d) ) return (1<<30); // 2^30 else // infinite value return (1<<28); // 2^28 // break; case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal if(d == 0.0) { return -(1<<28); // -(2^28) } else { long transducer = Double.doubleToRawLongBits(d); /* * To avoid causing slow arithmetic on subnormals, * the scaling to determine when d's significand * is normalized is done in integer arithmetic. * (there must be at least one "1" bit in the * significand since zero has been screened out. */ // isolate significand bits transducer &= DoubleConsts.SIGNIF_BIT_MASK; assert(transducer != 0L); // This loop is simple and functional. We might be // able to do something more clever that was faster; // e.g. number of leading zero detection on // (transducer << (# exponent and sign bits). while (transducer < (1L << (DoubleConsts.SIGNIFICAND_WIDTH - 1))) { transducer *= 2; exponent--; } exponent++; assert( exponent >= DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1) && exponent < DoubleConsts.MIN_EXPONENT); return exponent; } // break; default: assert( exponent >= DoubleConsts.MIN_EXPONENT && exponent <= DoubleConsts.MAX_EXPONENT); return exponent; // break; } } /** * Returns unbiased exponent of a <code>float</code>; for * subnormal values, the number is treated as if it were * normalized. That is for all finite, non-zero, positive numbers * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is * always in the range [1, 2). * <p> * Special cases: * <ul> * <li> If the argument is NaN, then the result is 2<sup>30</sup>. * <li> If the argument is infinite, then the result is 2<sup>28</sup>. * <li> If the argument is zero, then the result is -(2<sup>28</sup>). * </ul> * * @param f floating-point number whose exponent is to be extracted * @return unbiased exponent of the argument. * @author Joseph D. Darcy */ public static int ilogb(float f) { int exponent = getExponent(f); switch (exponent) { case FloatConsts.MAX_EXPONENT+1: // NaN or infinity if( isNaN(f) ) return (1<<30); // 2^30 else // infinite value return (1<<28); // 2^28 // break; case FloatConsts.MIN_EXPONENT-1: // zero or subnormal if(f == 0.0f) { return -(1<<28); // -(2^28) } else { int transducer = Float.floatToRawIntBits(f); /* * To avoid causing slow arithmetic on subnormals, * the scaling to determine when f's significand * is normalized is done in integer arithmetic. * (there must be at least one "1" bit in the * significand since zero has been screened out. */ // isolate significand bits transducer &= FloatConsts.SIGNIF_BIT_MASK; assert(transducer != 0); // This loop is simple and functional. We might be // able to do something more clever that was faster; // e.g. number of leading zero detection on // (transducer << (# exponent and sign bits). while (transducer < (1 << (FloatConsts.SIGNIFICAND_WIDTH - 1))) { transducer *= 2; exponent--; } exponent++; assert( exponent >= FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1) && exponent < FloatConsts.MIN_EXPONENT); return exponent; } // break; default: assert( exponent >= FloatConsts.MIN_EXPONENT && exponent <= FloatConsts.MAX_EXPONENT); return exponent; // break; } } /* * The scalb operation should be reasonably fast; however, there * are tradeoffs in writing a method to minimize the worst case * performance and writing a method to minimize the time for * expected common inputs. Some processors operate very slowly on * subnormal operands, taking hundreds or thousands of cycles for * one floating-point add or multiply as opposed to, say, four * cycles for normal operands. For processors with very slow * subnormal execution, scalb would be fastest if written entirely * with integer operations; in other words, scalb would need to * include the logic of performing correct rounding of subnormal * values. This could be reasonably done in at most a few hundred * cycles. However, this approach may penalize normal operations * since at least the exponent of the floating-point argument must * be examined. * * The approach taken in this implementation is a compromise. * Floating-point multiplication is used to do most of the work; * but knowingly multiplying by a subnormal scaling factor is * avoided. However, the floating-point argument is not examined * to see whether or not it is subnormal since subnormal inputs * are assumed to be rare. At most three multiplies are needed to * scale from the largest to smallest exponent ranges (scaling * down, at most two multiplies are needed if subnormal scaling * factors are allowed). However, in this implementation an * expensive integer remainder operation is avoided at the cost of * requiring five floating-point multiplies in the worst case, * which should still be a performance win. * * If scaling of entire arrays is a concern, it would probably be * more efficient to provide a double[] scalb(double[], int) * version of scalb to avoid having to recompute the needed * scaling factors for each floating-point value. */ /** * Return <code>d</code> × * 2<sup><code>scale_factor</code></sup> rounded as if performed * by a single correctly rounded floating-point multiply to a * member of the double value set. See <a * href="http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html#9208">§4.2.3</a> * of the <a href="http://java.sun.com/docs/books/jls/html/">Java * Language Specification</a> for a discussion of floating-point * value sets. If the exponent of the result is between the * <code>double</code>'s minimum exponent and maximum exponent, * the answer is calculated exactly. If the exponent of the * result would be larger than <code>doubles</code>'s maximum * exponent, an infinity is returned. Note that if the result is * subnormal, precision may be lost; that is, when <code>scalb(x, * n)</code> is subnormal, <code>scalb(scalb(x, n), -n)</code> may * not equal <i>x</i>. When the result is non-NaN, the result has * the same sign as <code>d</code>. * *<p> * Special cases: * <ul> * <li> If the first argument is NaN, NaN is returned. * <li> If the first argument is infinite, then an infinity of the * same sign is returned. * <li> If the first argument is zero, then a zero of the same * sign is returned. * </ul> * * @param d number to be scaled by a power of two. * @param scale_factor power of 2 used to scale <code>d</code> * @return <code>d * </code>2<sup><code>scale_factor</code></sup> * @author Joseph D. Darcy */ public static double scalb(double d, int scale_factor) { /* * This method does not need to be declared strictfp to * compute the same correct result on all platforms. When * scaling up, it does not matter what order the * multiply-store operations are done; the result will be * finite or overflow regardless of the operation ordering. * However, to get the correct result when scaling down, a * particular ordering must be used. * * When scaling down, the multiply-store operations are * sequenced so that it is not possible for two consecutive * multiply-stores to return subnormal results. If one * multiply-store result is subnormal, the next multiply will * round it away to zero. This is done by first multiplying * by 2 ^ (scale_factor % n) and then multiplying several * times by by 2^n as needed where n is the exponent of number * that is a covenient power of two. In this way, at most one * real rounding error occurs. If the double value set is * being used exclusively, the rounding will occur on a * multiply. If the double-extended-exponent value set is * being used, the products will (perhaps) be exact but the * stores to d are guaranteed to round to the double value * set. * * It is _not_ a valid implementation to first multiply d by * 2^MIN_EXPONENT and then by 2 ^ (scale_factor % * MIN_EXPONENT) since even in a strictfp program double * rounding on underflow could occur; e.g. if the scale_factor * argument was (MIN_EXPONENT - n) and the exponent of d was a * little less than -(MIN_EXPONENT - n), meaning the final * result would be subnormal. * * Since exact reproducibility of this method can be achieved * without any undue performance burden, there is no * compelling reason to allow double rounding on underflow in * scalb. */ // magnitude of a power of two so large that scaling a finite // nonzero value by it would be guaranteed to over or // underflow; due to rounding, scaling down takes takes an // additional power of two which is reflected here final int MAX_SCALE = DoubleConsts.MAX_EXPONENT + -DoubleConsts.MIN_EXPONENT + DoubleConsts.SIGNIFICAND_WIDTH + 1; int exp_adjust = 0; int scale_increment = 0; double exp_delta = Double.NaN; // Make sure scaling factor is in a reasonable range if(scale_factor < 0) { scale_factor = Math.max(scale_factor, -MAX_SCALE); scale_increment = -512; exp_delta = twoToTheDoubleScaleDown; } else { scale_factor = Math.min(scale_factor, MAX_SCALE); scale_increment = 512; exp_delta = twoToTheDoubleScaleUp; } // Calculate (scale_factor % +/-512), 512 = 2^9, using // technique from "Hacker's Delight" section 10-2. int t = (scale_factor >> 9-1) >>> 32 - 9; exp_adjust = ((scale_factor + t) & (512 -1)) - t; d *= powerOfTwoD(exp_adjust); scale_factor -= exp_adjust; while(scale_factor != 0) { d *= exp_delta; scale_factor -= scale_increment; } return d; } /** * Return <code>f </code>× * 2<sup><code>scale_factor</code></sup> rounded as if performed * by a single correctly rounded floating-point multiply to a * member of the float value set. See <a * href="http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html#9208">§4.2.3</a> * of the <a href="http://java.sun.com/docs/books/jls/html/">Java * Language Specification</a> for a discussion of floating-point * value set. If the exponent of the result is between the * <code>float</code>'s minimum exponent and maximum exponent, the * answer is calculated exactly. If the exponent of the result * would be larger than <code>float</code>'s maximum exponent, an * infinity is returned. Note that if the result is subnormal, * precision may be lost; that is, when <code>scalb(x, n)</code> * is subnormal, <code>scalb(scalb(x, n), -n)</code> may not equal * <i>x</i>. When the result is non-NaN, the result has the same * sign as <code>f</code>. * *<p> * Special cases: * <ul> * <li> If the first argument is NaN, NaN is returned. * <li> If the first argument is infinite, then an infinity of the * same sign is returned. * <li> If the first argument is zero, then a zero of the same * sign is returned. * </ul> * * @param f number to be scaled by a power of two. * @param scale_factor power of 2 used to scale <code>f</code> * @return <code>f * </code>2<sup><code>scale_factor</code></sup> * @author Joseph D. Darcy */ public static float scalb(float f, int scale_factor) { // magnitude of a power of two so large that scaling a finite // nonzero value by it would be guaranteed to over or // underflow; due to rounding, scaling down takes takes an // additional power of two which is reflected here final int MAX_SCALE = FloatConsts.MAX_EXPONENT + -FloatConsts.MIN_EXPONENT + FloatConsts.SIGNIFICAND_WIDTH + 1; // Make sure scaling factor is in a reasonable range scale_factor = Math.max(Math.min(scale_factor, MAX_SCALE), -MAX_SCALE); /* * Since + MAX_SCALE for float fits well within the double * exponent range and + float -> double conversion is exact * the multiplication below will be exact. Therefore, the * rounding that occurs when the double product is cast to * float will be the correctly rounded float result. Since * all operations other than the final multiply will be exact, * it is not necessary to declare this method strictfp. */ return (float)((double)f*powerOfTwoD(scale_factor)); } /** * Returns the floating-point number adjacent to the first * argument in the direction of the second argument. If both * arguments compare as equal the second argument is returned. * * <p> * Special cases: * <ul> * <li> If either argument is a NaN, then NaN is returned. * * <li> If both arguments are signed zeros, <code>direction</code> * is returned unchanged (as implied by the requirement of * returning the second argument if the arguments compare as * equal). * * <li> If <code>start</code> is * ±<code>Double.MIN_VALUE</code> and <code>direction</code> * has a value such that the result should have a smaller * magnitude, then a zero with the same sign as <code>start</code> * is returned. * * <li> If <code>start</code> is infinite and * <code>direction</code> has a value such that the result should * have a smaller magnitude, <code>Double.MAX_VALUE</code> with the * same sign as <code>start</code> is returned. * * <li> If <code>start</code> is equal to ± * <code>Double.MAX_VALUE</code> and <code>direction</code> has a * value such that the result should have a larger magnitude, an * infinity with same sign as <code>start</code> is returned. * </ul> * * @param start starting floating-point value * @param direction value indicating which of * <code>start</code>'s neighbors or <code>start</code> should * be returned * @return The floating-point number adjacent to <code>start</code> in the * direction of <code>direction</code>. * @author Joseph D. Darcy */ public static double nextAfter(double start, double direction) { /* * The cases: * * nextAfter(+infinity, 0) == MAX_VALUE * nextAfter(+infinity, +infinity) == +infinity * nextAfter(-infinity, 0) == -MAX_VALUE * nextAfter(-infinity, -infinity) == -infinity * * are naturally handled without any additional testing */ // First check for NaN values if (isNaN(start) || isNaN(direction)) { // return a NaN derived from the input NaN(s) return start + direction; } else if (start == direction) { return direction; } else { // start > direction or start < direction // Add +0.0 to get rid of a -0.0 (+0.0 + -0.0 => +0.0) // then bitwise convert start to integer. long transducer = Double.doubleToRawLongBits(start + 0.0d); /* * IEEE 754 floating-point numbers are lexicographically * ordered if treated as signed- magnitude integers . * Since Java's integers are two's complement, * incrementing" the two's complement representation of a * logically negative floating-point value *decrements* * the signed-magnitude representation. Therefore, when * the integer representation of a floating-point values * is less than zero, the adjustment to the representation * is in the opposite direction than would be expected at * first . */ if (direction > start) { // Calculate next greater value transducer = transducer + (transducer >= 0L ? 1L:-1L); } else { // Calculate next lesser value assert direction < start; if (transducer > 0L) --transducer; else if (transducer < 0L ) ++transducer; /* * transducer==0, the result is -MIN_VALUE * * The transition from zero (implicitly * positive) to the smallest negative * signed magnitude value must be done * explicitly. */ else transducer = DoubleConsts.SIGN_BIT_MASK | 1L; } return Double.longBitsToDouble(transducer); } } /** * Returns the floating-point number adjacent to the first * argument in the direction of the second argument. If both * arguments compare as equal, the second argument is returned. * * <p> * Special cases: * <ul> * <li> If either argument is a NaN, then NaN is returned. * * <li> If both arguments are signed zeros, a <code>float</code> * zero with the same sign as <code>direction</code> is returned * (as implied by the requirement of returning the second argument * if the arguments compare as equal). * * <li> If <code>start</code> is * ±<code>Float.MIN_VALUE</code> and <code>direction</code> * has a value such that the result should have a smaller * magnitude, then a zero with the same sign as <code>start</code> * is returned. * * <li> If <code>start</code> is infinite and * <code>direction</code> has a value such that the result should * have a smaller magnitude, <code>Float.MAX_VALUE</code> with the * same sign as <code>start</code> is returned. * * <li> If <code>start</code> is equal to ± * <code>Float.MAX_VALUE</code> and <code>direction</code> has a * value such that the result should have a larger magnitude, an * infinity with same sign as <code>start</code> is returned. * </ul> * * @param start starting floating-point value * @param direction value indicating which of * <code>start</code>'s neighbors or <code>start</code> should * be returned * @return The floating-point number adjacent to <code>start</code> in the * direction of <code>direction</code>. * @author Joseph D. Darcy */ public static float nextAfter(float start, double direction) { /* * The cases: * * nextAfter(+infinity, 0) == MAX_VALUE * nextAfter(+infinity, +infinity) == +infinity * nextAfter(-infinity, 0) == -MAX_VALUE * nextAfter(-infinity, -infinity) == -infinity * * are naturally handled without any additional testing */ // First check for NaN values if (isNaN(start) || isNaN(direction)) { // return a NaN derived from the input NaN(s) return start + (float)direction; } else if (start == direction) { return (float)direction; } else { // start > direction or start < direction // Add +0.0 to get rid of a -0.0 (+0.0 + -0.0 => +0.0) // then bitwise convert start to integer. int transducer = Float.floatToRawIntBits(start + 0.0f); /* * IEEE 754 floating-point numbers are lexicographically * ordered if treated as signed- magnitude integers . * Since Java's integers are two's complement, * incrementing" the two's complement representation of a * logically negative floating-point value *decrements* * the signed-magnitude representation. Therefore, when * the integer representation of a floating-point values * is less than zero, the adjustment to the representation * is in the opposite direction than would be expected at * first. */ if (direction > start) {// Calculate next greater value transducer = transducer + (transducer >= 0 ? 1:-1); } else { // Calculate next lesser value assert direction < start; if (transducer > 0) --transducer; else if (transducer < 0 ) ++transducer; /* * transducer==0, the result is -MIN_VALUE * * The transition from zero (implicitly * positive) to the smallest negative * signed magnitude value must be done * explicitly. */ else transducer = FloatConsts.SIGN_BIT_MASK | 1; } return Float.intBitsToFloat(transducer); } } /** * Returns the floating-point value adjacent to <code>d</code> in * the direction of positive infinity. This method is * semantically equivalent to <code>nextAfter(d, * Double.POSITIVE_INFINITY)</code>; however, a <code>nextUp</code> * implementation may run faster than its equivalent * <code>nextAfter</code> call. * * <p>Special Cases: * <ul> * <li> If the argument is NaN, the result is NaN. * * <li> If the argument is positive infinity, the result is * positive infinity. * * <li> If the argument is zero, the result is * <code>Double.MIN_VALUE</code> * * </ul> * * @param d starting floating-point value * @return The adjacent floating-point value closer to positive * infinity. * @author Joseph D. Darcy */ public static double nextUp(double d) { if( isNaN(d) || d == Double.POSITIVE_INFINITY) return d; else { d += 0.0d; return Double.longBitsToDouble(Double.doubleToRawLongBits(d) + ((d >= 0.0d)?+1L:-1L)); } } /** * Returns the floating-point value adjacent to <code>f</code> in * the direction of positive infinity. This method is * semantically equivalent to <code>nextAfter(f, * Double.POSITIVE_INFINITY)</code>; however, a <code>nextUp</code> * implementation may run faster than its equivalent * <code>nextAfter</code> call. * * <p>Special Cases: * <ul> * <li> If the argument is NaN, the result is NaN. * * <li> If the argument is positive infinity, the result is * positive infinity. * * <li> If the argument is zero, the result is * <code>Float.MIN_VALUE</code> * * </ul> * * @param f starting floating-point value * @return The adjacent floating-point value closer to positive * infinity. * @author Joseph D. Darcy */ public static float nextUp(float f) { if( isNaN(f) || f == FloatConsts.POSITIVE_INFINITY) return f; else { f += 0.0f; return Float.intBitsToFloat(Float.floatToRawIntBits(f) + ((f >= 0.0f)?+1:-1)); } } /** * Returns the floating-point value adjacent to <code>d</code> in * the direction of negative infinity. This method is * semantically equivalent to <code>nextAfter(d, * Double.NEGATIVE_INFINITY)</code>; however, a * <code>nextDown</code> implementation may run faster than its * equivalent <code>nextAfter</code> call. * * <p>Special Cases: * <ul> * <li> If the argument is NaN, the result is NaN. * * <li> If the argument is negative infinity, the result is * negative infinity. * * <li> If the argument is zero, the result is * <code>-Double.MIN_VALUE</code> * * </ul> * * @param d starting floating-point value * @return The adjacent floating-point value closer to negative * infinity. * @author Joseph D. Darcy */ public static double nextDown(double d) { if( isNaN(d) || d == Double.NEGATIVE_INFINITY) return d; else { if (d == 0.0) return -Double.MIN_VALUE; else return Double.longBitsToDouble(Double.doubleToRawLongBits(d) + ((d > 0.0d)?-1L:+1L)); } } /** * Returns the floating-point value adjacent to <code>f</code> in * the direction of negative infinity. This method is * semantically equivalent to <code>nextAfter(f, * Float.NEGATIVE_INFINITY)</code>; however, a * <code>nextDown</code> implementation may run faster than its * equivalent <code>nextAfter</code> call. * * <p>Special Cases: * <ul> * <li> If the argument is NaN, the result is NaN. * * <li> If the argument is negative infinity, the result is * negative infinity. * * <li> If the argument is zero, the result is * <code>-Float.MIN_VALUE</code> * * </ul> * * @param f starting floating-point value * @return The adjacent floating-point value closer to negative * infinity. * @author Joseph D. Darcy */ public static double nextDown(float f) { if( isNaN(f) || f == Float.NEGATIVE_INFINITY) return f; else { if (f == 0.0f) return -Float.MIN_VALUE; else return Float.intBitsToFloat(Float.floatToRawIntBits(f) + ((f > 0.0f)?-1:+1)); } } /** * Returns the first floating-point argument with the sign of the * second floating-point argument. For this method, a NaN * <code>sign</code> argument is always treated as if it were * positive. * * @param magnitude the parameter providing the magnitude of the result * @param sign the parameter providing the sign of the result * @return a value with the magnitude of <code>magnitude</code> * and the sign of <code>sign</code>. * @author Joseph D. Darcy * @since 1.5 */ public static double copySign(double magnitude, double sign) { return rawCopySign(magnitude, (isNaN(sign)?1.0d:sign)); } /** * Returns the first floating-point argument with the sign of the * second floating-point argument. For this method, a NaN * <code>sign</code> argument is always treated as if it were * positive. * * @param magnitude the parameter providing the magnitude of the result * @param sign the parameter providing the sign of the result * @return a value with the magnitude of <code>magnitude</code> * and the sign of <code>sign</code>. * @author Joseph D. Darcy */ public static float copySign(float magnitude, float sign) { return rawCopySign(magnitude, (isNaN(sign)?1.0f:sign)); } /** * Returns the size of an ulp of the argument. An ulp of a * <code>double</code> value is the positive distance between this * floating-point value and the <code>double</code> value next * larger in magnitude. Note that for non-NaN <i>x</i>, * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>. * * <p>Special Cases: * <ul> * <li> If the argument is NaN, then the result is NaN. * <li> If the argument is positive or negative infinity, then the * result is positive infinity. * <li> If the argument is positive or negative zero, then the result is * <code>Double.MIN_VALUE</code>. * <li> If the argument is ±<code>Double.MAX_VALUE</code>, then * the result is equal to 2<sup>971</sup>. * </ul> * * @param d the floating-point value whose ulp is to be returned * @return the size of an ulp of the argument * @author Joseph D. Darcy * @since 1.5 */ public static double ulp(double d) { int exp = getExponent(d); switch(exp) { case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity return Math.abs(d); // break; case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal return Double.MIN_VALUE; // break default: assert exp <= DoubleConsts.MAX_EXPONENT && exp >= DoubleConsts.MIN_EXPONENT; // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x)) exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1); if (exp >= DoubleConsts.MIN_EXPONENT) { return powerOfTwoD(exp); } else { // return a subnormal result; left shift integer // representation of Double.MIN_VALUE appropriate // number of positions return Double.longBitsToDouble(1L << (exp - (DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) )); } // break } } /** * Returns the size of an ulp of the argument. An ulp of a * <code>float</code> value is the positive distance between this * floating-point value and the <code>float</code> value next * larger in magnitude. Note that for non-NaN <i>x</i>, * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>. * * <p>Special Cases: * <ul> * <li> If the argument is NaN, then the result is NaN. * <li> If the argument is positive or negative infinity, then the * result is positive infinity. * <li> If the argument is positive or negative zero, then the result is * <code>Float.MIN_VALUE</code>. * <li> If the argument is ±<code>Float.MAX_VALUE</code>, then * the result is equal to 2<sup>104</sup>. * </ul> * * @param f the floating-point value whose ulp is to be returned * @return the size of an ulp of the argument * @author Joseph D. Darcy * @since 1.5 */ public static float ulp(float f) { int exp = getExponent(f); switch(exp) { case FloatConsts.MAX_EXPONENT+1: // NaN or infinity return Math.abs(f); // break; case FloatConsts.MIN_EXPONENT-1: // zero or subnormal return FloatConsts.MIN_VALUE; // break default: assert exp <= FloatConsts.MAX_EXPONENT && exp >= FloatConsts.MIN_EXPONENT; // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x)) exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1); if (exp >= FloatConsts.MIN_EXPONENT) { return powerOfTwoF(exp); } else { // return a subnormal result; left shift integer // representation of FloatConsts.MIN_VALUE appropriate // number of positions return Float.intBitsToFloat(1 << (exp - (FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) )); } // break } } /** * Returns the signum function of the argument; zero if the argument * is zero, 1.0 if the argument is greater than zero, -1.0 if the * argument is less than zero. * * <p>Special Cases: * <ul> * <li> If the argument is NaN, then the result is NaN. * <li> If the argument is positive zero or negative zero, then the * result is the same as the argument. * </ul> * * @param d the floating-point value whose signum is to be returned * @return the signum function of the argument * @author Joseph D. Darcy * @since 1.5 */ public static double signum(double d) { return (d == 0.0 || isNaN(d))?d:copySign(1.0, d); } /** * Returns the signum function of the argument; zero if the argument * is zero, 1.0f if the argument is greater than zero, -1.0f if the * argument is less than zero. * * <p>Special Cases: * <ul> * <li> If the argument is NaN, then the result is NaN. * <li> If the argument is positive zero or negative zero, then the * result is the same as the argument. * </ul> * * @param f the floating-point value whose signum is to be returned * @return the signum function of the argument * @author Joseph D. Darcy * @since 1.5 */ public static float signum(float f) { return (f == 0.0f || isNaN(f))?f:copySign(1.0f, f); } }