XSDPrimitiveTypeParser.java example

Explorer
siren-master
/**
 * Copyright 2014 National University of Ireland, Galway.
 *
 * This file is part of the SIREn project. Project and contact information:
 *
 *  https://github.com/rdelbru/SIREn
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.sindice.siren.util;

import java.io.IOException;
import java.io.Reader;

/**
 * This class provides utility methods to parse {@link Reader} into primitive
 * types, as specified in the xsd datatype reference.</p>
 *
 * <p> Methods from this class <b>do not create temporary objects</b> and
 *     are typically faster than standard library methods.</p>
 *
 * <p> For floating-point numbers, there is a lose of precision of
 * approximatively 1e-15 %.
 */
public final class XSDPrimitiveTypeParser {

    /**
     * Parses the specified character sequence as a signed 16 bit integer, as
     * specified in the xsd:short datatype reference.
     *
     * <p> The value space of xsd:short is the set of common short integers
     * (16 bits)—the integers between -32768 and 32767. Its lexical space allows
     * any number of insignificant leading zeros.
     *
     * @param  reader A reader for reading the character streams.
     * @throws IOException If a problem occurs with the specified reader
     * @throws NumberFormatException if the specified character sequence
     *         does not contain a parsable signed 16 bit integer.
     */
    public static short parseShort(final Reader reader)
    throws IOException {
      final int i = parseInt(reader);
      if ((i < Short.MIN_VALUE) || (i > Short.MAX_VALUE)) {
        throw new NumberFormatException("Short overflow");
      }
      return (short) i;
    }

    /**
     * Parses the specified character sequence as a signed 32 bit integer, as
     * specified in the xsd:int datatype reference.
     *
     * <p> The value space of xsd:int is the set of common single-size integers
     * (32 bits), the integers between -2147483648 and 2147483647. Its lexical
     * space allows any number of insignificant leading zeros.
     *
     * @param  reader A reader for reading the character streams.
     * @throws IOException If a problem occurs with the specified reader
     * @throws NumberFormatException if the specified reader
     *         does not contain a parsable signed 32 bit integer.
     */
    public static int parseInt(final Reader reader) throws IOException {
        boolean isNegative = false;
        int result = 0;

        // Check for sign
        int c = reader.read();
        if (isSign(c)) {
          isNegative = (c == '-') ? true : false;
          c = reader.read();
        }

        while (c != -1) {
          result = accumulateIntegerDecimal(c, result);
          c = reader.read();
        }

        // Check MAX_VALUE overflow
        if ((result == Integer.MIN_VALUE) && !isNegative) {
          throw new NumberFormatException("Integer overflow");
        }

        // Decimal has been accumulated negatively. We must switch sign if
        // the number is non-negative.
        return isNegative ? result : -result;
    }

    /**
     * Parses the specified character sequence as a signed 64 bit integer, as
     * specified in the xsd:long datatype reference.
     *
     * <p> The value space of xsd:long is the set of common double-size integers
     * (64 bits)—the integers between -9223372036854775808 and
     * 9223372036854775807. Its lexical space allows any number of insignificant
     * leading zeros.
     *
     * @param  reader A reader for reading the character streams.
     * @throws IOException If a problem occurs with the specified reader
     * @throws NumberFormatException if the specified reader
     *         does not contain a parsable signed 32 bit integer.
     */
    public static long parseLong(final Reader reader) throws IOException {
      boolean isNegative = false;
      long result = 0;

      // Check for sign
      int c = reader.read();
      if (isSign(c)) {
        isNegative = (c == '-') ? true : false;
        c = reader.read();
      }

      while (c != -1) {
        result = accumulateLongDecimal(c, result);
        c = reader.read();
      }

      // Check MAX_VALUE overflow
      if ((result == Long.MIN_VALUE) && !isNegative) {
        throw new NumberFormatException("Long overflow");
      }

      // Decimal has been accumulated negatively. We must switch sign if
      // the number is non-negative.
      return isNegative ? result : -result;
    }

    /**
     * Parses the specified character sequence as a 32 bit floating-point
     * numbers, as specified in the xsd:float datatype reference.
     *
     * <p> The value space of xsd:float is "float," 32-bit floating-point
     * numbers as defined by the IEEE. The lexical space uses a decimal format
     * with optional scientific notation. The match between lexical (powers of
     * 10) and value (powers of 2) spaces is approximate and maps to the closest
     * value.
     *
     * @param  reader A reader for reading the character streams.
     * @throws IOException If a problem occurs with the specified reader
     * @throws NumberFormatException if the specified reader
     *         does not contain a parsable signed 32 bit floating-point numbers.
     */
    public static float parseFloat(final Reader reader) throws IOException {
        return (float) parseDouble(reader);
    }

    /**
     * Parses the specified character sequence as a 64 bit floating-point
     * numbers, as specified in the xsd:double datatype reference.
     *
     * <p> The value space of xsd:double is double (64 bits) floating-point
     * numbers as defined by the IEEE (Institute of Electrical and Electronic
     * Engineers). The lexical space uses a decimal format with optional
     * scientific notation. The match between lexical (powers of 10) and value
     * (powers of 2) spaces is approximate and done on the closest value.
     *
     * @param  reader A reader for reading the character streams.
     * @throws IOException If a problem occurs with the specified reader
     * @throws NumberFormatException if the specified reader
     *         does not contain a parsable signed 64 bit floating-point numbers.
     */
    public static double parseDouble(final Reader reader) throws IOException {
      boolean isNegative = false;
      boolean isNegativeExp = false;

      long decimal = 0;
      double fraction = 0;
      long exp = 0;

      int c = reader.read();

      // Checks for NaN.
      if (c == 'N') {
        return parseNaN(reader);
      }

      // Check for sign
      if (isSign(c)) {
        isNegative = (c == '-') ? true : false;
        c = reader.read();
      }

      // Checks for INF.
      if (c == 'I') {
        return parseINF(reader, isNegative);
      }

      // At least one digit or a '.' required.
      if (((c < '0') || (c > '9')) && (c != '.')) {
        throw new NumberFormatException("Invalid double value");
      }

      // Reads decimal.
      while (c != -1 && c != '.' && (c != 'E' && c != 'e')) {
        decimal = accumulateLongDecimal(c, decimal);
        c = reader.read();
      }

      // Check MAX_VALUE overflow
      if ((decimal == Long.MIN_VALUE) && !isNegative) {
        throw new NumberFormatException("Double overflow");
      }

      // Decimal has been accumulated negatively. We must switch sign if
      // the number is non-negative.
      if (!isNegative) {
          decimal = -decimal;
      }

      // End - return decimal
      if (c == -1) {
        return decimal;
      }

      // Reads fraction.
      if (c == '.') {
        // skip '.'
        c = reader.read();

        double base = 0.1;
        while (c != -1 && (c != 'E' && c != 'e')) {
          fraction = accumulateDoubleFraction(c, fraction, base);
          base *= 0.1;
          c = reader.read();
        }
      }

      // End - return decimal + fraction
      if (c == -1) {
        return decimal + fraction;
      }

      // skip 'e' or 'E'
      c = reader.read();

      // Check for sign
      if (isSign(c)) {
        isNegativeExp = (c == '-') ? true : false;
        c = reader.read();
      }

      // Reads exponent.
      while (c != -1) {
        exp = accumulateLongDecimal(c, exp);
        c = reader.read();
      }

      // Decimal has been accumulated negatively. We must switch sign if
      // the number is non-negative.
      if (!isNegativeExp) {
        exp = -exp;
      }

      return (decimal + fraction) * Math.pow(10, exp);
    }

    private static double parseNaN(final Reader reader)
    throws IOException {
      if (reader.read() == 'a' && (reader.read() == 'N')) {
        return Double.NaN;
      }
      else {
        throw new NumberFormatException("Invalid double value");
      }
    }

    private static double parseINF(final Reader reader, final boolean isNegative)
    throws IOException {
      final int n = reader.read();
      final int f = reader.read();

      if ((n == 'n' || n == 'N') && (f == 'f' || f == 'F')) {
        return isNegative ? Double.NEGATIVE_INFINITY
                          : Double.POSITIVE_INFINITY;
      }
      else {
        throw new NumberFormatException("Invalid double value");
      }
    }

    private static boolean isSign(final int c) {
      if ((c == '-') || (c == '+')) {
        return true;
      }
      return false;
    }

    /**
     * Accumulate digit negatively in order to avoid {@link Integer.MIN_VALUE}
     * overflow.
     *
     * @param c
     * @param decimal
     */
    private static int accumulateIntegerDecimal(final int c, final int decimal) {
      if (c >= '0' && c <= '9') {
        final int digit = c - '0';
        final int newResult = decimal * 10 - digit;
        if (newResult > decimal) {
          throw new NumberFormatException("Integer overflow");
        }
        return newResult;
      }
      else {
        throw new NumberFormatException("Invalid integer value");
      }
    }

    /**
     * Accumulate digit negatively in order to avoid {@link Long.MIN_VALUE}
     * overflow.
     *
     * @param c
     * @param decimal
     */
    private static long accumulateLongDecimal(final int c, final long decimal) {
      if (c >= '0' && c <= '9') {
        final int digit = c - '0';
        final long newResult = decimal * 10 - digit;
        if (newResult > decimal) {
          throw new NumberFormatException("Long overflow");
        }
        return newResult;
      }
      else {
        throw new NumberFormatException("Invalid long value");
      }
    }

    private static double accumulateDoubleFraction(final int c, final double fraction, final double base) {
      if (c >= '0' && c <= '9') {
        final int digit = c - '0';
        final double newResult = digit * base + fraction;
        if (newResult < fraction) {
          throw new NumberFormatException("Double overflow");
        }
        return newResult;
      }
      else {
        throw new NumberFormatException("Invalid double value");
      }
    }

}