/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A heuristic number parser. That tries to analyze a given String in a more flexible way than the
* traditional Java number parse:
*
* 1. All "," are replaced by "." 2. ¼,½,¾ are replaced by 0.25, 0.5, 0.75
* respectively 3. Ranges ([number]-[number])are matched and the arithmetic average is used as
* result 4. All Prefixes and suffices are ignored
*
* Example: "Weight: 8,5 - 10,5 g" would result in 9.5
*
* @author Michael Wurst
*
*/
public class NumberParser {
/**
* Parse a number possibly surrounded by other information.
*
* @param s
* the string
* @return a double representation or NaN if it cannot be parsed
*/
public static double parse(String s) {
try {
return Double.parseDouble(s);
} catch (NumberFormatException e) {
// Try other heuristics in this case
}
// First, replace all ',' by '.'
s = s.replaceAll(",", ".");
s = s.replaceAll("\u00BC", ".25");
s = s.replaceAll("\u00BD", ".5");
s = s.replaceAll("\u00BE", ".75");
try {
return Double.parseDouble(s);
} catch (NumberFormatException e) {
// Try other heuristics in this case
}
// Try to resolve ranges
Pattern p2 = Pattern.compile("[^0-9]*([[0-9.]\\.]+)\\s?-\\s?([[0-9]\\.]+)(.*)");
Matcher m2 = p2.matcher(s);
if (m2.matches()) {
try {
return (Double.parseDouble(m2.group(1)) + Double.parseDouble(m2.group(2))) / 2;
} catch (NumberFormatException e) {
// Try other heuristics in this case
}
}
// Try to ignore all suffixes
Pattern p1 = Pattern.compile("[^0-9]*([[0-9]\\.]+)(.*)");
Matcher m1 = p1.matcher(s);
if (m1.matches()) {
try {
return Double.parseDouble(m1.group(1));
} catch (NumberFormatException e) {
// Try other heuristics in this case
}
}
return Double.NaN;
}
/**
* This method parses the given string as double value. It first tries the normal parse method,
* then tests if it is the ? and would return NaN in this case. Otherwise a NumberFormatExceptin
* is thrown.
*/
public static double parseDouble(String s) throws NumberFormatException {
try {
return Double.parseDouble(s);
} catch (NumberFormatException e) {
}
// try if NaN
if (s.equals("?")) {
return Double.NaN;
}
throw new NumberFormatException();
}
}