StringToDouble.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 *	Source code for the "strtod" library procedure.
 *
 * Copyright 1988-1992 Regents of the University of California
 * Permission to use, copy, modify, and distribute this
 * software and its documentation for any purpose and without
 * fee is hereby granted, provided that the above copyright
 * notice appear in all copies.  The University of California
 * makes no representations about the suitability of this
 * software for any purpose.  It is provided "as is" without
 * express or implied warranty.
 */
package org.apache.hadoop.hive.serde2.lazy.fast;

import java.nio.charset.StandardCharsets;

public class StringToDouble {
  static final int maxExponent = 511;	/* Largest possible base 10 exponent.  Any
				 * exponent larger than this will already
				 * produce underflow or overflow, so there's
				 * no need to worry about additional digits.
				 */
  static final double powersOf10[] = {	/* Table giving binary powers of 10.  Entry */
      10.,			/* is 10^2^i.  Used to convert decimal */
      100.,			/* exponents into floating-point numbers. */
      1.0e4,
      1.0e8,
      1.0e16,
      1.0e32,
      1.0e64,
      1.0e128,
      1.0e256
  };

  /*
   * Only for testing
   */
  static double strtod(String s) {
    final byte[] utf8 = s.getBytes(StandardCharsets.UTF_8);
    return strtod(utf8, 0, utf8.length);
  }

  public static double strtod(byte[] utf8, int offset, int length)
  {
    if (length == 0) {
      throw new NumberFormatException();
    }
    boolean signIsNegative = true;
    boolean expSignIsNegative = true;
    double fraction;
    int d;
    int p = offset;
    int end = offset + length;
    int c;
    int exp = 0;		/* Exponent read from "EX" field. */
    int fracExp = 0;		/* Exponent that derives from the fractional
				 * part.  Under normal circumstatnces, it is
				 * the negative of the number of digits in F.
				 * However, if I is very long, the last digits
				 * of I get dropped (otherwise a long I with a
				 * large negative exponent could cause an
				 * unnecessary overflow on I alone).  In this
				 * case, fracExp is incremented one for each
				 * dropped digit. */
    int mantSize;		/* Number of digits in mantissa. */
    int decPt;			/* Number of mantissa digits BEFORE decimal
				 * point. */
    int pExp;		/* Temporarily holds location of exponent
				 * in string. */

    /*
     * Strip off leading blanks and check for a sign.
     */

    while(p < end && Character.isWhitespace(utf8[p])) {
      p++;
    }
    while(end > p && Character.isWhitespace(utf8[end - 1])) {
      end--;
    }
    if (!testSimpleDecimal(utf8, p, end - p)) {
      return Double.parseDouble(new String(utf8, p, end-p, StandardCharsets.UTF_8));
    }

    if (utf8[p] == '-') {
      signIsNegative = true;
      p += 1;
    } else {
      if (utf8[p] == '+') {
        p += 1;
      }
      signIsNegative = false;
    }

    /*
     * Count the number of digits in the mantissa (including the decimal
     * point), and also locate the decimal point.
     */

    decPt = -1;
    int mantEnd = end - p;
    for (mantSize = 0; mantSize < mantEnd; mantSize += 1)
    {
      c = utf8[p];
      if (!isdigit(c)) {
        if ((c != '.') || (decPt >= 0)) {
          break;
        }
        decPt = mantSize;
      }
      p += 1;
    }

    /*
     * Now suck up the digits in the mantissa.  Use two integers to
     * collect 9 digits each (this is faster than using floating-point).
     * If the mantissa has more than 18 digits, ignore the extras, since
     * they can't affect the value anyway.
     */

    pExp  = p;
    p -= mantSize;
    if (decPt < 0) {
      decPt = mantSize;
    } else {
      mantSize -= 1;			/* One of the digits was the point. */
    }
    if (mantSize > 18) {
      fracExp = decPt - 18;
      mantSize = 18;
    } else {
      fracExp = decPt - mantSize;
    }
    if (mantSize == 0) {
      if (signIsNegative) {
        return -0.0d;
      }
      return 0.0d;
    } else {
      double frac1, frac2;
      frac1 = 0;
      for (; mantSize > 9; mantSize -= 1)
      {
        c = utf8[p];
        p += 1;
        if (c == '.') {
          c = utf8[p];
          p += 1;
        }
        frac1 = 10 * frac1 + (c - '0');
      }
      frac2 = 0;
      for (; mantSize > 0; mantSize -= 1)
      {
        c = utf8[p];
        p += 1;
        if (c == '.') {
          c = utf8[p];
          p += 1;
        }
        frac2 = 10 * frac2 + (c - '0');
      }
      fraction = (1e9d * frac1) + frac2;
    }

    /*
     * Skim off the exponent.
     */

    p = pExp;

    if (p < end) {
      if ((utf8[p] == 'E') || (utf8[p] == 'e')) {
        p += 1;
        if (p < end) {
          if (utf8[p] == '-') {
            expSignIsNegative = true;
            p += 1;
          } else {
            if (utf8[p] == '+') {
              p += 1;
            }
            expSignIsNegative = false;
          }
          while (p < end && isdigit(utf8[p])) {
            exp = exp * 10 + (utf8[p] - '0');
            p += 1;
          }
        }
      }
    }
    if (expSignIsNegative) {
      exp = fracExp - exp;
    } else {
      exp = fracExp + exp;
    }

    /*
     * Generate a floating-point number that represents the exponent.
     * Do this by processing the exponent one bit at a time to combine
     * many powers of 2 of 10. Then combine the exponent with the
     * fraction.
     */

    if (exp < 0) {
      expSignIsNegative = true;
      exp = -exp;
    } else {
      expSignIsNegative = false;
    }
    if (exp > maxExponent) {
      exp = maxExponent;
    }

    double dblExp = 1.0;
    for (d = 0; exp != 0; exp >>= 1, d += 1) {
      if ((exp & 1) == 1) {
        dblExp *= powersOf10[d];
      }
    }

    if (expSignIsNegative) {
      fraction /= dblExp;
    } else {
      fraction *= dblExp;
    }

    if (signIsNegative) {
      return -fraction;
    }
    return fraction;
  }

  private static boolean testSimpleDecimal(byte[] utf8, int off, int len) {
    if (len > 18) {
      return false;
    }
    int decimalPts = 0;
    int signs = 0;
    int nondigits = 0;
    int digits = 0;
    for (int i = off; i < len + off; i++) {
      final int c = utf8[i];
      if (c == '.') {
        decimalPts++;
      } else if (c == '-' || c == '+') {
        signs++;
      } else if (!isdigit(c)) {
        // could be exponential notations
        nondigits++;
      } else {
        digits++;
      }
    }
    // There can be up to 5e-16 error
    return (decimalPts <= 1 && signs <= 1 && nondigits == 0 && digits < 16);
  }

  private static boolean isdigit(int c) {
    return '0' <= c && c <= '9';
  }
}