/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.util;
import java.io.IOException;
import java.io.Reader;
/**
* This class provides utility methods to parse {@link Reader} into primitive
* types, as specified in the xsd datatype reference.</p>
*
* <p> Methods from this class <b>do not create temporary objects</b> and
* are typically faster than standard library methods.</p>
*
* <p> For floating-point numbers, there is a lose of precision of
* approximatively 1e-15 %.
*/
public final class XSDPrimitiveTypeParser {
/**
* Parses the specified character sequence as a signed 16 bit integer, as
* specified in the xsd:short datatype reference.
*
* <p> The value space of xsd:short is the set of common short integers
* (16 bits)—the integers between -32768 and 32767. Its lexical space allows
* any number of insignificant leading zeros.
*
* @param reader A reader for reading the character streams.
* @throws IOException If a problem occurs with the specified reader
* @throws NumberFormatException if the specified character sequence
* does not contain a parsable signed 16 bit integer.
*/
public static short parseShort(final Reader reader)
throws IOException {
final int i = parseInt(reader);
if ((i < Short.MIN_VALUE) || (i > Short.MAX_VALUE)) {
throw new NumberFormatException("Short overflow");
}
return (short) i;
}
/**
* Parses the specified character sequence as a signed 32 bit integer, as
* specified in the xsd:int datatype reference.
*
* <p> The value space of xsd:int is the set of common single-size integers
* (32 bits), the integers between -2147483648 and 2147483647. Its lexical
* space allows any number of insignificant leading zeros.
*
* @param reader A reader for reading the character streams.
* @throws IOException If a problem occurs with the specified reader
* @throws NumberFormatException if the specified reader
* does not contain a parsable signed 32 bit integer.
*/
public static int parseInt(final Reader reader) throws IOException {
boolean isNegative = false;
int result = 0;
// Check for sign
int c = reader.read();
if (isSign(c)) {
isNegative = (c == '-') ? true : false;
c = reader.read();
}
while (c != -1) {
result = accumulateIntegerDecimal(c, result);
c = reader.read();
}
// Check MAX_VALUE overflow
if ((result == Integer.MIN_VALUE) && !isNegative) {
throw new NumberFormatException("Integer overflow");
}
// Decimal has been accumulated negatively. We must switch sign if
// the number is non-negative.
return isNegative ? result : -result;
}
/**
* Parses the specified character sequence as a signed 64 bit integer, as
* specified in the xsd:long datatype reference.
*
* <p> The value space of xsd:long is the set of common double-size integers
* (64 bits)—the integers between -9223372036854775808 and
* 9223372036854775807. Its lexical space allows any number of insignificant
* leading zeros.
*
* @param reader A reader for reading the character streams.
* @throws IOException If a problem occurs with the specified reader
* @throws NumberFormatException if the specified reader
* does not contain a parsable signed 32 bit integer.
*/
public static long parseLong(final Reader reader) throws IOException {
boolean isNegative = false;
long result = 0;
// Check for sign
int c = reader.read();
if (isSign(c)) {
isNegative = (c == '-') ? true : false;
c = reader.read();
}
while (c != -1) {
result = accumulateLongDecimal(c, result);
c = reader.read();
}
// Check MAX_VALUE overflow
if ((result == Long.MIN_VALUE) && !isNegative) {
throw new NumberFormatException("Long overflow");
}
// Decimal has been accumulated negatively. We must switch sign if
// the number is non-negative.
return isNegative ? result : -result;
}
/**
* Parses the specified character sequence as a 32 bit floating-point
* numbers, as specified in the xsd:float datatype reference.
*
* <p> The value space of xsd:float is "float," 32-bit floating-point
* numbers as defined by the IEEE. The lexical space uses a decimal format
* with optional scientific notation. The match between lexical (powers of
* 10) and value (powers of 2) spaces is approximate and maps to the closest
* value.
*
* @param reader A reader for reading the character streams.
* @throws IOException If a problem occurs with the specified reader
* @throws NumberFormatException if the specified reader
* does not contain a parsable signed 32 bit floating-point numbers.
*/
public static float parseFloat(final Reader reader) throws IOException {
return (float) parseDouble(reader);
}
/**
* Parses the specified character sequence as a 64 bit floating-point
* numbers, as specified in the xsd:double datatype reference.
*
* <p> The value space of xsd:double is double (64 bits) floating-point
* numbers as defined by the IEEE (Institute of Electrical and Electronic
* Engineers). The lexical space uses a decimal format with optional
* scientific notation. The match between lexical (powers of 10) and value
* (powers of 2) spaces is approximate and done on the closest value.
*
* @param reader A reader for reading the character streams.
* @throws IOException If a problem occurs with the specified reader
* @throws NumberFormatException if the specified reader
* does not contain a parsable signed 64 bit floating-point numbers.
*/
public static double parseDouble(final Reader reader) throws IOException {
boolean isNegative = false;
boolean isNegativeExp = false;
long decimal = 0;
double fraction = 0;
long exp = 0;
int c = reader.read();
// Checks for NaN.
if (c == 'N') {
return parseNaN(reader);
}
// Check for sign
if (isSign(c)) {
isNegative = (c == '-') ? true : false;
c = reader.read();
}
// Checks for INF.
if (c == 'I') {
return parseINF(reader, isNegative);
}
// At least one digit or a '.' required.
if (((c < '0') || (c > '9')) && (c != '.')) {
throw new NumberFormatException("Invalid double value");
}
// Reads decimal.
while (c != -1 && c != '.' && (c != 'E' && c != 'e')) {
decimal = accumulateLongDecimal(c, decimal);
c = reader.read();
}
// Check MAX_VALUE overflow
if ((decimal == Long.MIN_VALUE) && !isNegative) {
throw new NumberFormatException("Double overflow");
}
// Decimal has been accumulated negatively. We must switch sign if
// the number is non-negative.
if (!isNegative) {
decimal = -decimal;
}
// End - return decimal
if (c == -1) {
return decimal;
}
// Reads fraction.
if (c == '.') {
// skip '.'
c = reader.read();
double base = 0.1;
while (c != -1 && (c != 'E' && c != 'e')) {
fraction = accumulateDoubleFraction(c, fraction, base);
base *= 0.1;
c = reader.read();
}
}
// End - return decimal + fraction
if (c == -1) {
return decimal + fraction;
}
// skip 'e' or 'E'
c = reader.read();
// Check for sign
if (isSign(c)) {
isNegativeExp = (c == '-') ? true : false;
c = reader.read();
}
// Reads exponent.
while (c != -1) {
exp = accumulateLongDecimal(c, exp);
c = reader.read();
}
// Decimal has been accumulated negatively. We must switch sign if
// the number is non-negative.
if (!isNegativeExp) {
exp = -exp;
}
return (decimal + fraction) * Math.pow(10, exp);
}
private static double parseNaN(final Reader reader)
throws IOException {
if (reader.read() == 'a' && (reader.read() == 'N')) {
return Double.NaN;
}
else {
throw new NumberFormatException("Invalid double value");
}
}
private static double parseINF(final Reader reader, final boolean isNegative)
throws IOException {
final int n = reader.read();
final int f = reader.read();
if ((n == 'n' || n == 'N') && (f == 'f' || f == 'F')) {
return isNegative ? Double.NEGATIVE_INFINITY
: Double.POSITIVE_INFINITY;
}
else {
throw new NumberFormatException("Invalid double value");
}
}
private static boolean isSign(final int c) {
if ((c == '-') || (c == '+')) {
return true;
}
return false;
}
/**
* Accumulate digit negatively in order to avoid {@link Integer.MIN_VALUE}
* overflow.
*
* @param c
* @param decimal
*/
private static int accumulateIntegerDecimal(final int c, final int decimal) {
if (c >= '0' && c <= '9') {
final int digit = c - '0';
final int newResult = decimal * 10 - digit;
if (newResult > decimal) {
throw new NumberFormatException("Integer overflow");
}
return newResult;
}
else {
throw new NumberFormatException("Invalid integer value");
}
}
/**
* Accumulate digit negatively in order to avoid {@link Long.MIN_VALUE}
* overflow.
*
* @param c
* @param decimal
*/
private static long accumulateLongDecimal(final int c, final long decimal) {
if (c >= '0' && c <= '9') {
final int digit = c - '0';
final long newResult = decimal * 10 - digit;
if (newResult > decimal) {
throw new NumberFormatException("Long overflow");
}
return newResult;
}
else {
throw new NumberFormatException("Invalid long value");
}
}
private static double accumulateDoubleFraction(final int c, final double fraction, final double base) {
if (c >= '0' && c <= '9') {
final int digit = c - '0';
final double newResult = digit * base + fraction;
if (newResult < fraction) {
throw new NumberFormatException("Double overflow");
}
return newResult;
}
else {
throw new NumberFormatException("Invalid double value");
}
}
}