/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A heuristic number parser. That tries to analyze a given String in a more
* flexible way than the traditional Java number parse:
*
* 1. All "," are replaced by "."
* 2. ¼,½,¾ are replaced by 0.25, 0.5, 0.75 respectively
* 3. Ranges ([number]-[number])are matched and the arithmetic average is used as result
* 4. All Prefixes and suffices are ignored
*
* Example: "Weight: 8,5 - 10,5 g" would result in 9.5
*
* @author Michael Wurst
*
*/
public class NumberParser {
/**
* Parse a number possibly surrounded by other information.
* @param s the string
* @return a double representation or NaN if it cannot be parsed
*/
public static double parse(String s) {
try {
return Double.parseDouble(s);
} catch (NumberFormatException e) {
// Try other heuristics in this case
}
// First, replace all ',' by '.'
s = s.replaceAll(",",".");
s = s.replaceAll("\u00BC",".25");
s = s.replaceAll("\u00BD",".5");
s = s.replaceAll("\u00BE",".75");
try {
return Double.parseDouble(s);
} catch (NumberFormatException e) {
// Try other heuristics in this case
}
// Try to resolve ranges
Pattern p2 = Pattern.compile("[^0-9]*([[0-9.]\\.]+)\\s?-\\s?([[0-9]\\.]+)(.*)");
Matcher m2 = p2.matcher(s);
if(m2.matches()) {
try {
return (Double.parseDouble(m2.group(1))+Double.parseDouble(m2.group(2)))/2;
} catch (NumberFormatException e) {
// Try other heuristics in this case
}
}
// Try to ignore all suffixes
Pattern p1 = Pattern.compile("[^0-9]*([[0-9]\\.]+)(.*)");
Matcher m1 = p1.matcher(s);
if(m1.matches()) {
try {
return Double.parseDouble(m1.group(1));
} catch (NumberFormatException e) {
// Try other heuristics in this case
}
}
return Double.NaN;
}
/**
* This method parses the given string as double value.
* It first tries the normal parse method, then tests if it is the ?
* and would return NaN in this case.
* Otherwise a NumberFormatExceptin is thrown.
*/
public static double parseDouble(String s) throws NumberFormatException {
try {
return Double.parseDouble(s);
} catch (NumberFormatException e) {}
// try if NaN
if (s.equals("?"))
return Double.NaN;
throw new NumberFormatException();
}
}